illumos-gate Old usr/src/uts/common/fs/udfs/udf

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2015, Joyent, Inc.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/t_lock.h>
  32 #include <sys/param.h>
  33 #include <sys/time.h>
  34 #include <sys/systm.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/resource.h>
  37 #include <sys/signal.h>
  38 #include <sys/cred.h>
  39 #include <sys/user.h>
  40 #include <sys/buf.h>
  41 #include <sys/vfs.h>
  42 #include <sys/vfs_opreg.h>
  43 #include <sys/stat.h>
  44 #include <sys/vnode.h>
  45 #include <sys/mode.h>
  46 #include <sys/proc.h>
  47 #include <sys/disp.h>
  48 #include <sys/file.h>
  49 #include <sys/fcntl.h>
  50 #include <sys/flock.h>
  51 #include <sys/kmem.h>
  52 #include <sys/uio.h>
  53 #include <sys/dnlc.h>
  54 #include <sys/conf.h>
  55 #include <sys/errno.h>
  56 #include <sys/mman.h>
  57 #include <sys/fbuf.h>
  58 #include <sys/pathname.h>
  59 #include <sys/debug.h>
  60 #include <sys/vmsystm.h>
  61 #include <sys/cmn_err.h>
  62 #include <sys/dirent.h>
  63 #include <sys/errno.h>
  64 #include <sys/modctl.h>
  65 #include <sys/statvfs.h>
  66 #include <sys/mount.h>
  67 #include <sys/sunddi.h>
  68 #include <sys/bootconf.h>
  69 #include <sys/policy.h>
  70 
  71 #include <vm/hat.h>
  72 #include <vm/page.h>
  73 #include <vm/pvn.h>
  74 #include <vm/as.h>
  75 #include <vm/seg.h>
  76 #include <vm/seg_map.h>
  77 #include <vm/seg_kmem.h>
  78 #include <vm/seg_vn.h>
  79 #include <vm/rm.h>
  80 #include <vm/page.h>
  81 #include <sys/swap.h>
  82 
  83 #include <fs/fs_subr.h>
  84 
  85 #include <sys/fs/udf_volume.h>
  86 #include <sys/fs/udf_inode.h>
  87 
  88 static int32_t udf_open(struct vnode **,
  89         int32_t, struct cred *, caller_context_t *);
  90 static int32_t udf_close(struct vnode *,
  91         int32_t, int32_t, offset_t, struct cred *, caller_context_t *);
  92 static int32_t udf_read(struct vnode *,
  93         struct uio *, int32_t, struct cred *, caller_context_t *);
  94 static int32_t udf_write(struct vnode *,
  95         struct uio *, int32_t, struct cred *, caller_context_t *);
  96 static int32_t udf_ioctl(struct vnode *,
  97         int32_t, intptr_t, int32_t, struct cred *, int32_t *,
  98         caller_context_t *);
  99 static int32_t udf_getattr(struct vnode *,
 100         struct vattr *, int32_t, struct cred *, caller_context_t *);
 101 static int32_t udf_setattr(struct vnode *,
 102         struct vattr *, int32_t, struct cred *, caller_context_t *);
 103 static int32_t udf_access(struct vnode *,
 104         int32_t, int32_t, struct cred *, caller_context_t *);
 105 static int32_t udf_lookup(struct vnode *,
 106         char *, struct vnode **, struct pathname *,
 107         int32_t, struct vnode *, struct cred *,
 108         caller_context_t *, int *, pathname_t *);
 109 static int32_t udf_create(struct vnode *,
 110         char *, struct vattr *, enum vcexcl,
 111         int32_t, struct vnode **, struct cred *, int32_t,
 112         caller_context_t *, vsecattr_t *);
 113 static int32_t udf_remove(struct vnode *,
 114         char *, struct cred *, caller_context_t *, int);
 115 static int32_t udf_link(struct vnode *,
 116         struct vnode *, char *, struct cred *, caller_context_t *, int);
 117 static int32_t udf_rename(struct vnode *,
 118         char *, struct vnode *, char *, struct cred *, caller_context_t *, int);
 119 static int32_t udf_mkdir(struct vnode *,
 120         char *, struct vattr *, struct vnode **, struct cred *,
 121         caller_context_t *, int, vsecattr_t *);
 122 static int32_t udf_rmdir(struct vnode *,
 123         char *, struct vnode *, struct cred *, caller_context_t *, int);
 124 static int32_t udf_readdir(struct vnode *,
 125         struct uio *, struct cred *, int32_t *, caller_context_t *, int);
 126 static int32_t udf_symlink(struct vnode *,
 127         char *, struct vattr *, char *, struct cred *, caller_context_t *, int);
 128 static int32_t udf_readlink(struct vnode *,
 129         struct uio *, struct cred *, caller_context_t *);
 130 static int32_t udf_fsync(struct vnode *,
 131         int32_t, struct cred *, caller_context_t *);
 132 static void udf_inactive(struct vnode *,
 133         struct cred *, caller_context_t *);
 134 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *);
 135 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *);
 136 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *);
 137 static int32_t udf_seek(struct vnode *, offset_t, offset_t *,
 138         caller_context_t *);
 139 static int32_t udf_frlock(struct vnode *, int32_t,
 140         struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *,
 141         caller_context_t *);
 142 static int32_t udf_space(struct vnode *, int32_t,
 143         struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *);
 144 static int32_t udf_getpage(struct vnode *, offset_t,
 145         size_t, uint32_t *, struct page **, size_t,
 146         struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *);
 147 static int32_t udf_putpage(struct vnode *, offset_t,
 148         size_t, int32_t, struct cred *, caller_context_t *);
 149 static int32_t udf_map(struct vnode *, offset_t, struct as *,
 150         caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 151         caller_context_t *);
 152 static int32_t udf_addmap(struct vnode *, offset_t, struct as *,
 153         caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 154         caller_context_t *);
 155 static int32_t udf_delmap(struct vnode *, offset_t, struct as *,
 156         caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *,
 157         caller_context_t *);
 158 static int32_t udf_l_pathconf(struct vnode *, int32_t,
 159         ulong_t *, struct cred *, caller_context_t *);
 160 static int32_t udf_pageio(struct vnode *, struct page *,
 161         u_offset_t, size_t, int32_t, struct cred *, caller_context_t *);
 162 
 163 int32_t ud_getpage_miss(struct vnode *, u_offset_t,
 164         size_t, struct seg *, caddr_t, page_t *pl[],
 165         size_t, enum seg_rw, int32_t);
 166 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t);
 167 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *);
 168 int32_t ud_page_fill(struct ud_inode *, page_t *,
 169         u_offset_t, uint32_t, u_offset_t *);
 170 int32_t ud_iodone(struct buf *);
 171 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 172 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 173 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t);
 174 int32_t ud_slave_done(struct buf *);
 175 
 176 /*
 177  * Structures to control multiple IO operations to get or put pages
 178  * that are backed by discontiguous blocks. The master struct is
 179  * a dummy that holds the original bp from pageio_setup. The
 180  * slave struct holds the working bp's to do the actual IO. Once
 181  * all the slave IOs complete. The master is processed as if a single
 182  * IO op has completed.
 183  */
 184 uint32_t master_index = 0;
 185 typedef struct mio_master {
 186         kmutex_t        mm_mutex;       /* protect the fields below */
 187         int32_t         mm_size;
 188         buf_t           *mm_bp;         /* original bp */
 189         int32_t         mm_resid;       /* bytes remaining to transfer */
 190         int32_t         mm_error;       /* accumulated error from slaves */
 191         int32_t         mm_index;       /* XXX debugging */
 192 } mio_master_t;
 193 
 194 typedef struct mio_slave {
 195         buf_t           ms_buf;         /* working buffer for this IO chunk */
 196         mio_master_t    *ms_ptr;        /* pointer to master */
 197 } mio_slave_t;
 198 
 199 struct vnodeops *udf_vnodeops;
 200 
 201 const fs_operation_def_t udf_vnodeops_template[] = {
 202         VOPNAME_OPEN,           { .vop_open = udf_open },
 203         VOPNAME_CLOSE,          { .vop_close = udf_close },
 204         VOPNAME_READ,           { .vop_read = udf_read },
 205         VOPNAME_WRITE,          { .vop_write = udf_write },
 206         VOPNAME_IOCTL,          { .vop_ioctl = udf_ioctl },
 207         VOPNAME_GETATTR,        { .vop_getattr = udf_getattr },
 208         VOPNAME_SETATTR,        { .vop_setattr = udf_setattr },
 209         VOPNAME_ACCESS,         { .vop_access = udf_access },
 210         VOPNAME_LOOKUP,         { .vop_lookup = udf_lookup },
 211         VOPNAME_CREATE,         { .vop_create = udf_create },
 212         VOPNAME_REMOVE,         { .vop_remove = udf_remove },
 213         VOPNAME_LINK,           { .vop_link = udf_link },
 214         VOPNAME_RENAME,         { .vop_rename = udf_rename },
 215         VOPNAME_MKDIR,          { .vop_mkdir = udf_mkdir },
 216         VOPNAME_RMDIR,          { .vop_rmdir = udf_rmdir },
 217         VOPNAME_READDIR,        { .vop_readdir = udf_readdir },
 218         VOPNAME_SYMLINK,        { .vop_symlink = udf_symlink },
 219         VOPNAME_READLINK,       { .vop_readlink = udf_readlink },
 220         VOPNAME_FSYNC,          { .vop_fsync = udf_fsync },
 221         VOPNAME_INACTIVE,       { .vop_inactive = udf_inactive },
 222         VOPNAME_FID,            { .vop_fid = udf_fid },
 223         VOPNAME_RWLOCK,         { .vop_rwlock = udf_rwlock },
 224         VOPNAME_RWUNLOCK,       { .vop_rwunlock = udf_rwunlock },
 225         VOPNAME_SEEK,           { .vop_seek = udf_seek },
 226         VOPNAME_FRLOCK,         { .vop_frlock = udf_frlock },
 227         VOPNAME_SPACE,          { .vop_space = udf_space },
 228         VOPNAME_GETPAGE,        { .vop_getpage = udf_getpage },
 229         VOPNAME_PUTPAGE,        { .vop_putpage = udf_putpage },
 230         VOPNAME_MAP,            { .vop_map = udf_map },
 231         VOPNAME_ADDMAP,         { .vop_addmap = udf_addmap },
 232         VOPNAME_DELMAP,         { .vop_delmap = udf_delmap },
 233         VOPNAME_PATHCONF,       { .vop_pathconf = udf_l_pathconf },
 234         VOPNAME_PAGEIO,         { .vop_pageio = udf_pageio },
 235         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 236         NULL,                   NULL
 237 };
 238 
 239 /* ARGSUSED */
 240 static int32_t
 241 udf_open(
 242         struct vnode **vpp,
 243         int32_t flag,
 244         struct cred *cr,
 245         caller_context_t *ct)
 246 {
 247         ud_printf("udf_open\n");
 248 
 249         return (0);
 250 }
 251 
 252 /* ARGSUSED */
 253 static int32_t
 254 udf_close(
 255         struct vnode *vp,
 256         int32_t flag,
 257         int32_t count,
 258         offset_t offset,
 259         struct cred *cr,
 260         caller_context_t *ct)
 261 {
 262         struct ud_inode *ip = VTOI(vp);
 263 
 264         ud_printf("udf_close\n");
 265 
 266         ITIMES(ip);
 267 
 268         cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 269         cleanshares(vp, ttoproc(curthread)->p_pid);
 270 
 271         /*
 272          * Push partially filled cluster at last close.
 273          * ``last close'' is approximated because the dnlc
 274          * may have a hold on the vnode.
 275          */
 276         if (vp->v_count <= 2 && vp->v_type != VBAD) {
 277                 struct ud_inode *ip = VTOI(vp);
 278                 if (ip->i_delaylen) {
 279                         (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen,
 280                             B_ASYNC | B_FREE, cr);
 281                         ip->i_delaylen = 0;
 282                 }
 283         }
 284 
 285         return (0);
 286 }
 287 
 288 /* ARGSUSED */
 289 static int32_t
 290 udf_read(
 291         struct vnode *vp,
 292         struct uio *uiop,
 293         int32_t ioflag,
 294         struct cred *cr,
 295         caller_context_t *ct)
 296 {
 297         struct ud_inode *ip = VTOI(vp);
 298         int32_t error;
 299 
 300         ud_printf("udf_read\n");
 301 
 302 #ifdef  __lock_lint
 303         rw_enter(&ip->i_rwlock, RW_READER);
 304 #endif
 305 
 306         ASSERT(RW_READ_HELD(&ip->i_rwlock));
 307 
 308         if (MANDLOCK(vp, ip->i_char)) {
 309                 /*
 310                  * udf_getattr ends up being called by chklock
 311                  */
 312                 error = chklock(vp, FREAD, uiop->uio_loffset,
 313                     uiop->uio_resid, uiop->uio_fmode, ct);
 314                 if (error) {
 315                         goto end;
 316                 }
 317         }
 318 
 319         rw_enter(&ip->i_contents, RW_READER);
 320         error = ud_rdip(ip, uiop, ioflag, cr);
 321         rw_exit(&ip->i_contents);
 322 
 323 end:
 324 #ifdef  __lock_lint
 325         rw_exit(&ip->i_rwlock);
 326 #endif
 327 
 328         return (error);
 329 }
 330 
 331 
 332 int32_t ud_WRITES = 1;
 333 int32_t ud_HW = 96 * 1024;
 334 int32_t ud_LW = 64 * 1024;
 335 int32_t ud_throttles = 0;
 336 
 337 /* ARGSUSED */
 338 static int32_t
 339 udf_write(
 340         struct vnode *vp,
 341         struct uio *uiop,
 342         int32_t ioflag,
 343         struct cred *cr,
 344         caller_context_t *ct)
 345 {
 346         struct ud_inode *ip = VTOI(vp);
 347         int32_t error = 0;
 348 
 349         ud_printf("udf_write\n");
 350 
 351 #ifdef  __lock_lint
 352         rw_enter(&ip->i_rwlock, RW_WRITER);
 353 #endif
 354 
 355         ASSERT(RW_WRITE_HELD(&ip->i_rwlock));
 356 
 357         if (MANDLOCK(vp, ip->i_char)) {
 358                 /*
 359                  * ud_getattr ends up being called by chklock
 360                  */
 361                 error = chklock(vp, FWRITE, uiop->uio_loffset,
 362                     uiop->uio_resid, uiop->uio_fmode, ct);
 363                 if (error) {
 364                         goto end;
 365                 }
 366         }
 367         /*
 368          * Throttle writes.
 369          */
 370         mutex_enter(&ip->i_tlock);
 371         if (ud_WRITES && (ip->i_writes > ud_HW)) {
 372                 while (ip->i_writes > ud_HW) {
 373                         ud_throttles++;
 374                         cv_wait(&ip->i_wrcv, &ip->i_tlock);
 375                 }
 376         }
 377         mutex_exit(&ip->i_tlock);
 378 
 379         /*
 380          * Write to the file
 381          */
 382         rw_enter(&ip->i_contents, RW_WRITER);
 383         if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) {
 384                 /*
 385                  * In append mode start at end of file.
 386                  */
 387                 uiop->uio_loffset = ip->i_size;
 388         }
 389         error = ud_wrip(ip, uiop, ioflag, cr);
 390         rw_exit(&ip->i_contents);
 391 
 392 end:
 393 #ifdef  __lock_lint
 394         rw_exit(&ip->i_rwlock);
 395 #endif
 396 
 397         return (error);
 398 }
 399 
 400 /* ARGSUSED */
 401 static int32_t
 402 udf_ioctl(
 403         struct vnode *vp,
 404         int32_t cmd,
 405         intptr_t arg,
 406         int32_t flag,
 407         struct cred *cr,
 408         int32_t *rvalp,
 409         caller_context_t *ct)
 410 {
 411         return (ENOTTY);
 412 }
 413 
 414 /* ARGSUSED */
 415 static int32_t
 416 udf_getattr(
 417         struct vnode *vp,
 418         struct vattr *vap,
 419         int32_t flags,
 420         struct cred *cr,
 421         caller_context_t *ct)
 422 {
 423         struct ud_inode *ip = VTOI(vp);
 424 
 425         ud_printf("udf_getattr\n");
 426 
 427         if (vap->va_mask == AT_SIZE) {
 428                 /*
 429                  * for performance, if only the size is requested don't bother
 430                  * with anything else.
 431                  */
 432                 vap->va_size = ip->i_size;
 433                 return (0);
 434         }
 435 
 436         rw_enter(&ip->i_contents, RW_READER);
 437 
 438         vap->va_type = vp->v_type;
 439         vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 440 
 441         vap->va_uid = ip->i_uid;
 442         vap->va_gid = ip->i_gid;
 443         vap->va_fsid = ip->i_dev;
 444         vap->va_nodeid = ip->i_icb_lbano;
 445         vap->va_nlink = ip->i_nlink;
 446         vap->va_size = ip->i_size;
 447         vap->va_seq = ip->i_seq;
 448         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 449                 vap->va_rdev = ip->i_rdev;
 450         } else {
 451                 vap->va_rdev = 0;
 452         }
 453 
 454         mutex_enter(&ip->i_tlock);
 455         ITIMES_NOLOCK(ip);      /* mark correct time in inode */
 456         vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec;
 457         vap->va_atime.tv_nsec = ip->i_atime.tv_nsec;
 458         vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec;
 459         vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec;
 460         vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec;
 461         vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec;
 462         mutex_exit(&ip->i_tlock);
 463 
 464         switch (ip->i_type) {
 465                 case VBLK:
 466                         vap->va_blksize = MAXBSIZE;
 467                         break;
 468                 case VCHR:
 469                         vap->va_blksize = MAXBSIZE;
 470                         break;
 471                 default:
 472                         vap->va_blksize = ip->i_udf->udf_lbsize;
 473                         break;
 474         }
 475         vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift;
 476 
 477         rw_exit(&ip->i_contents);
 478 
 479         return (0);
 480 }
 481 
 482 static int
 483 ud_iaccess_vmode(void *ip, int mode, struct cred *cr)
 484 {
 485         return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0));
 486 }
 487 
 488 /*ARGSUSED4*/
 489 static int32_t
 490 udf_setattr(
 491         struct vnode *vp,
 492         struct vattr *vap,
 493         int32_t flags,
 494         struct cred *cr,
 495         caller_context_t *ct)
 496 {
 497         int32_t error = 0;
 498         uint32_t mask = vap->va_mask;
 499         struct ud_inode *ip;
 500         timestruc_t now;
 501         struct vattr ovap;
 502 
 503         ud_printf("udf_setattr\n");
 504 
 505         ip = VTOI(vp);
 506 
 507         /*
 508          * not updates allowed to 4096 files
 509          */
 510         if (ip->i_astrat == STRAT_TYPE4096) {
 511                 return (EINVAL);
 512         }
 513 
 514         /*
 515          * Cannot set these attributes
 516          */
 517         if (mask & AT_NOSET) {
 518                 return (EINVAL);
 519         }
 520 
 521         rw_enter(&ip->i_rwlock, RW_WRITER);
 522         rw_enter(&ip->i_contents, RW_WRITER);
 523 
 524         ovap.va_uid = ip->i_uid;
 525         ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 526         error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags,
 527             ud_iaccess_vmode, ip);
 528         if (error)
 529                 goto update_inode;
 530 
 531         mask = vap->va_mask;
 532         /*
 533          * Change file access modes.
 534          */
 535         if (mask & AT_MODE) {
 536                 ip->i_perm = VA2UD_PERM(vap->va_mode);
 537                 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX);
 538                 mutex_enter(&ip->i_tlock);
 539                 ip->i_flag |= ICHG;
 540                 mutex_exit(&ip->i_tlock);
 541         }
 542         if (mask & (AT_UID|AT_GID)) {
 543                 if (mask & AT_UID) {
 544                         ip->i_uid = vap->va_uid;
 545                 }
 546                 if (mask & AT_GID) {
 547                         ip->i_gid = vap->va_gid;
 548                 }
 549                 mutex_enter(&ip->i_tlock);
 550                 ip->i_flag |= ICHG;
 551                 mutex_exit(&ip->i_tlock);
 552         }
 553         /*
 554          * Truncate file.  Must have write permission and not be a directory.
 555          */
 556         if (mask & AT_SIZE) {
 557                 if (vp->v_type == VDIR) {
 558                         error = EISDIR;
 559                         goto update_inode;
 560                 }
 561                 if (error = ud_iaccess(ip, IWRITE, cr, 0)) {
 562                         goto update_inode;
 563                 }
 564                 if (vap->va_size > MAXOFFSET_T) {
 565                         error = EFBIG;
 566                         goto update_inode;
 567                 }
 568                 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) {
 569                         goto update_inode;
 570                 }
 571 
 572                 if (vap->va_size == 0)
 573                         vnevent_truncate(vp, ct);
 574         }
 575         /*
 576          * Change file access or modified times.
 577          */
 578         if (mask & (AT_ATIME|AT_MTIME)) {
 579                 mutex_enter(&ip->i_tlock);
 580                 if (mask & AT_ATIME) {
 581                         ip->i_atime.tv_sec = vap->va_atime.tv_sec;
 582                         ip->i_atime.tv_nsec = vap->va_atime.tv_nsec;
 583                         ip->i_flag &= ~IACC;
 584                 }
 585                 if (mask & AT_MTIME) {
 586                         ip->i_mtime.tv_sec = vap->va_mtime.tv_sec;
 587                         ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec;
 588                         gethrestime(&now);
 589                         ip->i_ctime.tv_sec = now.tv_sec;
 590                         ip->i_ctime.tv_nsec = now.tv_nsec;
 591                         ip->i_flag &= ~(IUPD|ICHG);
 592                         ip->i_flag |= IMODTIME;
 593                 }
 594                 ip->i_flag |= IMOD;
 595                 mutex_exit(&ip->i_tlock);
 596         }
 597 
 598 update_inode:
 599         if (curthread->t_flag & T_DONTPEND) {
 600                 ud_iupdat(ip, 1);
 601         } else {
 602                 ITIMES_NOLOCK(ip);
 603         }
 604         rw_exit(&ip->i_contents);
 605         rw_exit(&ip->i_rwlock);
 606 
 607         return (error);
 608 }
 609 
 610 /* ARGSUSED */
 611 static int32_t
 612 udf_access(
 613         struct vnode *vp,
 614         int32_t mode,
 615         int32_t flags,
 616         struct cred *cr,
 617         caller_context_t *ct)
 618 {
 619         struct ud_inode *ip = VTOI(vp);
 620 
 621         ud_printf("udf_access\n");
 622 
 623         if (ip->i_udf == NULL) {
 624                 return (EIO);
 625         }
 626 
 627         return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1));
 628 }
 629 
 630 int32_t udfs_stickyhack = 1;
 631 
 632 /* ARGSUSED */
 633 static int32_t
 634 udf_lookup(
 635         struct vnode *dvp,
 636         char *nm,
 637         struct vnode **vpp,
 638         struct pathname *pnp,
 639         int32_t flags,
 640         struct vnode *rdir,
 641         struct cred *cr,
 642         caller_context_t *ct,
 643         int *direntflags,
 644         pathname_t *realpnp)
 645 {
 646         int32_t error;
 647         struct vnode *vp;
 648         struct ud_inode *ip, *xip;
 649 
 650         ud_printf("udf_lookup\n");
 651         /*
 652          * Null component name is a synonym for directory being searched.
 653          */
 654         if (*nm == '\0') {
 655                 VN_HOLD(dvp);
 656                 *vpp = dvp;
 657                 error = 0;
 658                 goto out;
 659         }
 660 
 661         /*
 662          * Fast path: Check the directory name lookup cache.
 663          */
 664         ip = VTOI(dvp);
 665         if (vp = dnlc_lookup(dvp, nm)) {
 666                 /*
 667                  * Check accessibility of directory.
 668                  */
 669                 if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) {
 670                         VN_RELE(vp);
 671                 }
 672                 xip = VTOI(vp);
 673         } else {
 674                 error = ud_dirlook(ip, nm, &xip, cr, 1);
 675                 ITIMES(ip);
 676         }
 677 
 678         if (error == 0) {
 679                 ip = xip;
 680                 *vpp = ITOV(ip);
 681                 if ((ip->i_type != VDIR) &&
 682                     (ip->i_char & ISVTX) &&
 683                     ((ip->i_perm & IEXEC) == 0) &&
 684                     udfs_stickyhack) {
 685                         mutex_enter(&(*vpp)->v_lock);
 686                         (*vpp)->v_flag |= VISSWAP;
 687                         mutex_exit(&(*vpp)->v_lock);
 688                 }
 689                 ITIMES(ip);
 690                 /*
 691                  * If vnode is a device return special vnode instead.
 692                  */
 693                 if (IS_DEVVP(*vpp)) {
 694                         struct vnode *newvp;
 695                         newvp = specvp(*vpp, (*vpp)->v_rdev,
 696                             (*vpp)->v_type, cr);
 697                         VN_RELE(*vpp);
 698                         if (newvp == NULL) {
 699                                 error = ENOSYS;
 700                         } else {
 701                                 *vpp = newvp;
 702                         }
 703                 }
 704         }
 705 out:
 706         return (error);
 707 }
 708 
 709 /* ARGSUSED */
 710 static int32_t
 711 udf_create(
 712         struct vnode *dvp,
 713         char *name,
 714         struct vattr *vap,
 715         enum vcexcl excl,
 716         int32_t mode,
 717         struct vnode **vpp,
 718         struct cred *cr,
 719         int32_t flag,
 720         caller_context_t *ct,
 721         vsecattr_t *vsecp)
 722 {
 723         int32_t error;
 724         struct ud_inode *ip = VTOI(dvp), *xip;
 725 
 726         ud_printf("udf_create\n");
 727 
 728         if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
 729                 vap->va_mode &= ~VSVTX;
 730 
 731         if (*name == '\0') {
 732                 /*
 733                  * Null component name refers to the directory itself.
 734                  */
 735                 VN_HOLD(dvp);
 736                 ITIMES(ip);
 737                 error = EEXIST;
 738         } else {
 739                 xip = NULL;
 740                 rw_enter(&ip->i_rwlock, RW_WRITER);
 741                 error = ud_direnter(ip, name, DE_CREATE,
 742                     (struct ud_inode *)0, (struct ud_inode *)0,
 743                     vap, &xip, cr, ct);
 744                 rw_exit(&ip->i_rwlock);
 745                 ITIMES(ip);
 746                 ip = xip;
 747         }
 748 #ifdef  __lock_lint
 749         rw_enter(&ip->i_contents, RW_WRITER);
 750 #else
 751         if (ip != NULL) {
 752                 rw_enter(&ip->i_contents, RW_WRITER);
 753         }
 754 #endif
 755 
 756         /*
 757          * If the file already exists and this is a non-exclusive create,
 758          * check permissions and allow access for non-directories.
 759          * Read-only create of an existing directory is also allowed.
 760          * We fail an exclusive create of anything which already exists.
 761          */
 762         if (error == EEXIST) {
 763                 if (excl == NONEXCL) {
 764                         if ((ip->i_type == VDIR) && (mode & VWRITE)) {
 765                                 error = EISDIR;
 766                         } else if (mode) {
 767                                 error = ud_iaccess(ip,
 768                                     UD_UPERM2DPERM(mode), cr, 0);
 769                         } else {
 770                                 error = 0;
 771                         }
 772                 }
 773                 if (error) {
 774                         rw_exit(&ip->i_contents);
 775                         VN_RELE(ITOV(ip));
 776                         goto out;
 777                 } else if ((ip->i_type == VREG) &&
 778                     (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
 779                         /*
 780                          * Truncate regular files, if requested by caller.
 781                          * Grab i_rwlock to make sure no one else is
 782                          * currently writing to the file (we promised
 783                          * bmap we would do this).
 784                          * Must get the locks in the correct order.
 785                          */
 786                         if (ip->i_size == 0) {
 787                                 ip->i_flag |= ICHG | IUPD;
 788                         } else {
 789                                 rw_exit(&ip->i_contents);
 790                                 rw_enter(&ip->i_rwlock, RW_WRITER);
 791                                 rw_enter(&ip->i_contents, RW_WRITER);
 792                                 (void) ud_itrunc(ip, 0, 0, cr);
 793                                 rw_exit(&ip->i_rwlock);
 794                         }
 795                         vnevent_create(ITOV(ip), ct);
 796                 }
 797         }
 798 
 799         if (error == 0) {
 800                 *vpp = ITOV(ip);
 801                 ITIMES(ip);
 802         }
 803 #ifdef  __lock_lint
 804         rw_exit(&ip->i_contents);
 805 #else
 806         if (ip != NULL) {
 807                 rw_exit(&ip->i_contents);
 808         }
 809 #endif
 810         if (error) {
 811                 goto out;
 812         }
 813 
 814         /*
 815          * If vnode is a device return special vnode instead.
 816          */
 817         if (!error && IS_DEVVP(*vpp)) {
 818                 struct vnode *newvp;
 819 
 820                 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
 821                 VN_RELE(*vpp);
 822                 if (newvp == NULL) {
 823                         error = ENOSYS;
 824                         goto out;
 825                 }
 826                 *vpp = newvp;
 827         }
 828 out:
 829         return (error);
 830 }
 831 
 832 /* ARGSUSED */
 833 static int32_t
 834 udf_remove(
 835         struct vnode *vp,
 836         char *nm,
 837         struct cred *cr,
 838         caller_context_t *ct,
 839         int flags)
 840 {
 841         int32_t error;
 842         struct ud_inode *ip = VTOI(vp);
 843 
 844         ud_printf("udf_remove\n");
 845 
 846         rw_enter(&ip->i_rwlock, RW_WRITER);
 847         error = ud_dirremove(ip, nm,
 848             (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
 849         rw_exit(&ip->i_rwlock);
 850         ITIMES(ip);
 851 
 852         return (error);
 853 }
 854 
 855 /* ARGSUSED */
 856 static int32_t
 857 udf_link(
 858         struct vnode *tdvp,
 859         struct vnode *svp,
 860         char *tnm,
 861         struct cred *cr,
 862         caller_context_t *ct,
 863         int flags)
 864 {
 865         int32_t error;
 866         struct vnode *realvp;
 867         struct ud_inode *sip;
 868         struct ud_inode *tdp;
 869 
 870         ud_printf("udf_link\n");
 871         if (VOP_REALVP(svp, &realvp, ct) == 0) {
 872                 svp = realvp;
 873         }
 874 
 875         /*
 876          * Do not allow links to directories
 877          */
 878         if (svp->v_type == VDIR) {
 879                 return (EPERM);
 880         }
 881 
 882         sip = VTOI(svp);
 883 
 884         if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
 885                 return (EPERM);
 886 
 887         tdp = VTOI(tdvp);
 888 
 889         rw_enter(&tdp->i_rwlock, RW_WRITER);
 890         error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0,
 891             sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
 892         rw_exit(&tdp->i_rwlock);
 893         ITIMES(sip);
 894         ITIMES(tdp);
 895 
 896         if (error == 0) {
 897                 vnevent_link(svp, ct);
 898         }
 899 
 900         return (error);
 901 }
 902 
 903 /* ARGSUSED */
 904 static int32_t
 905 udf_rename(
 906         struct vnode *sdvp,
 907         char *snm,
 908         struct vnode *tdvp,
 909         char *tnm,
 910         struct cred *cr,
 911         caller_context_t *ct,
 912         int flags)
 913 {
 914         int32_t error = 0;
 915         struct udf_vfs *udf_vfsp;
 916         struct ud_inode *sip;           /* source inode */
 917         struct ud_inode *tip;           /* target inode */
 918         struct ud_inode *sdp, *tdp;     /* source and target parent inode */
 919         struct vnode *realvp;
 920 
 921         ud_printf("udf_rename\n");
 922 
 923         if (VOP_REALVP(tdvp, &realvp, ct) == 0) {
 924                 tdvp = realvp;
 925         }
 926 
 927         sdp = VTOI(sdvp);
 928         tdp = VTOI(tdvp);
 929 
 930         udf_vfsp = sdp->i_udf;
 931 
 932         mutex_enter(&udf_vfsp->udf_rename_lck);
 933         /*
 934          * Look up inode of file we're supposed to rename.
 935          */
 936         if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) {
 937                 mutex_exit(&udf_vfsp->udf_rename_lck);
 938                 return (error);
 939         }
 940         /*
 941          * be sure this is not a directory with another file system mounted
 942          * over it.  If it is just give up the locks, and return with
 943          * EBUSY
 944          */
 945         if (vn_mountedvfs(ITOV(sip)) != NULL) {
 946                 error = EBUSY;
 947                 goto errout;
 948         }
 949         /*
 950          * Make sure we can delete the source entry.  This requires
 951          * write permission on the containing directory.  If that
 952          * directory is "sticky" it further requires (except for
 953          * privileged users) that the user own the directory or the
 954          * source entry, or else have permission to write the source
 955          * entry.
 956          */
 957         rw_enter(&sdp->i_contents, RW_READER);
 958         rw_enter(&sip->i_contents, RW_READER);
 959         if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 ||
 960             (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) {
 961                 rw_exit(&sip->i_contents);
 962                 rw_exit(&sdp->i_contents);
 963                 ITIMES(sip);
 964                 goto errout;
 965         }
 966 
 967         /*
 968          * Check for renaming '.' or '..' or alias of '.'
 969          */
 970         if ((strcmp(snm, ".") == 0) ||
 971             (strcmp(snm, "..") == 0) ||
 972             (sdp == sip)) {
 973                 error = EINVAL;
 974                 rw_exit(&sip->i_contents);
 975                 rw_exit(&sdp->i_contents);
 976                 goto errout;
 977         }
 978 
 979         rw_exit(&sip->i_contents);
 980         rw_exit(&sdp->i_contents);
 981 
 982         if (ud_dirlook(tdp, tnm, &tip, cr, 0) == 0) {
 983                 vnevent_pre_rename_dest(ITOV(tip), tdvp, tnm, ct);
 984                 VN_RELE(ITOV(tip));
 985         }
 986 
 987         /* Notify the target dir. if not the same as the source dir. */
 988         if (sdvp != tdvp)
 989                 vnevent_pre_rename_dest_dir(tdvp, ITOV(sip), tnm, ct);
 990 
 991         vnevent_pre_rename_src(ITOV(sip), sdvp, snm, ct);
 992 
 993         /*
 994          * Link source to the target.
 995          */
 996         rw_enter(&tdp->i_rwlock, RW_WRITER);
 997         if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip,
 998             (struct vattr *)0, (struct ud_inode **)0, cr, ct)) {
 999                 /*
1000                  * ESAME isn't really an error; it indicates that the
1001                  * operation should not be done because the source and target
1002                  * are the same file, but that no error should be reported.
1003                  */
1004                 if (error == ESAME) {
1005                         error = 0;
1006                 }
1007                 rw_exit(&tdp->i_rwlock);
1008                 goto errout;
1009         }
1010         rw_exit(&tdp->i_rwlock);
1011 
1012         rw_enter(&sdp->i_rwlock, RW_WRITER);
1013         /*
1014          * Unlink the source.
1015          * Remove the source entry.  ud_dirremove() checks that the entry
1016          * still reflects sip, and returns an error if it doesn't.
1017          * If the entry has changed just forget about it.  Release
1018          * the source inode.
1019          */
1020         if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
1021             DR_RENAME, cr, ct)) == ENOENT) {
1022                 error = 0;
1023         }
1024         rw_exit(&sdp->i_rwlock);
1025 
1026         if (error == 0) {
1027                 vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
1028                 /*
1029                  * vnevent_rename_dest and vnevent_rename_dest_dir are called
1030                  * in ud_direnter().
1031                  */
1032         }
1033 
1034 errout:
1035         ITIMES(sdp);
1036         ITIMES(tdp);
1037         VN_RELE(ITOV(sip));
1038         mutex_exit(&udf_vfsp->udf_rename_lck);
1039 
1040         return (error);
1041 }
1042 
1043 /* ARGSUSED */
1044 static int32_t
1045 udf_mkdir(
1046         struct vnode *dvp,
1047         char *dirname,
1048         struct vattr *vap,
1049         struct vnode **vpp,
1050         struct cred *cr,
1051         caller_context_t *ct,
1052         int flags,
1053         vsecattr_t *vsecp)
1054 {
1055         int32_t error;
1056         struct ud_inode *ip;
1057         struct ud_inode *xip;
1058 
1059         ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1060 
1061         ud_printf("udf_mkdir\n");
1062 
1063         ip = VTOI(dvp);
1064         rw_enter(&ip->i_rwlock, RW_WRITER);
1065         error = ud_direnter(ip, dirname, DE_MKDIR,
1066             (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
1067         rw_exit(&ip->i_rwlock);
1068         ITIMES(ip);
1069         if (error == 0) {
1070                 ip = xip;
1071                 *vpp = ITOV(ip);
1072                 ITIMES(ip);
1073         } else if (error == EEXIST) {
1074                 ITIMES(xip);
1075                 VN_RELE(ITOV(xip));
1076         }
1077 
1078         return (error);
1079 }
1080 
1081 /* ARGSUSED */
1082 static int32_t
1083 udf_rmdir(
1084         struct vnode *vp,
1085         char *nm,
1086         struct vnode *cdir,
1087         struct cred *cr,
1088         caller_context_t *ct,
1089         int flags)
1090 {
1091         int32_t error;
1092         struct ud_inode *ip = VTOI(vp);
1093 
1094         ud_printf("udf_rmdir\n");
1095 
1096         rw_enter(&ip->i_rwlock, RW_WRITER);
1097         error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR,
1098             cr, ct);
1099         rw_exit(&ip->i_rwlock);
1100         ITIMES(ip);
1101 
1102         return (error);
1103 }
1104 
1105 /* ARGSUSED */
1106 static int32_t
1107 udf_readdir(
1108         struct vnode *vp,
1109         struct uio *uiop,
1110         struct cred *cr,
1111         int32_t *eofp,
1112         caller_context_t *ct,
1113         int flags)
1114 {
1115         struct ud_inode *ip;
1116         struct dirent64 *nd;
1117         struct udf_vfs *udf_vfsp;
1118         int32_t error = 0, len, outcount = 0;
1119         uint32_t dirsiz, offset;
1120         uint32_t bufsize, ndlen, dummy;
1121         caddr_t outbuf;
1122         caddr_t outb, end_outb;
1123         struct iovec *iovp;
1124 
1125         uint8_t *dname;
1126         int32_t length;
1127 
1128         uint8_t *buf = NULL;
1129 
1130         struct fbuf *fbp = NULL;
1131         struct file_id *fid;
1132         uint8_t *name;
1133 
1134 
1135         ud_printf("udf_readdir\n");
1136 
1137         ip = VTOI(vp);
1138         udf_vfsp = ip->i_udf;
1139 
1140         dirsiz = ip->i_size;
1141         if ((uiop->uio_offset >= dirsiz) ||
1142             (ip->i_nlink <= 0)) {
1143                 if (eofp) {
1144                         *eofp = 1;
1145                 }
1146                 return (0);
1147         }
1148 
1149         offset = uiop->uio_offset;
1150         iovp = uiop->uio_iov;
1151         bufsize = iovp->iov_len;
1152 
1153         outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP);
1154         end_outb = outb + bufsize;
1155         nd = (struct dirent64 *)outbuf;
1156 
1157         dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP);
1158         buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);
1159 
1160         if (offset == 0) {
1161                 len = DIRENT64_RECLEN(1);
1162                 if (((caddr_t)nd + len) >= end_outb) {
1163                         error = EINVAL;
1164                         goto end;
1165                 }
1166                 nd->d_ino = ip->i_icb_lbano;
1167                 nd->d_reclen = (uint16_t)len;
1168                 nd->d_off = 0x10;
1169                 nd->d_name[0] = '.';
1170                 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1);
1171                 nd = (struct dirent64 *)((char *)nd + nd->d_reclen);
1172                 outcount++;
1173         } else if (offset == 0x10) {
1174                 offset = 0;
1175         }
1176 
1177         while (offset < dirsiz) {
1178                 error = ud_get_next_fid(ip, &fbp,
1179                     offset, &fid, &name, buf);
1180                 if (error != 0) {
1181                         break;
1182                 }
1183 
1184                 if ((fid->fid_flags & FID_DELETED) == 0) {
1185                         if (fid->fid_flags & FID_PARENT) {
1186 
1187                                 len = DIRENT64_RECLEN(2);
1188                                 if (((caddr_t)nd + len) >= end_outb) {
1189                                         error = EINVAL;
1190                                         break;
1191                                 }
1192 
1193                                 nd->d_ino = ip->i_icb_lbano;
1194                                 nd->d_reclen = (uint16_t)len;
1195                                 nd->d_off = offset + FID_LEN(fid);
1196                                 nd->d_name[0] = '.';
1197                                 nd->d_name[1] = '.';
1198                                 bzero(&nd->d_name[2],
1199                                     DIRENT64_NAMELEN(len) - 2);
1200                                 nd = (struct dirent64 *)
1201                                     ((char *)nd + nd->d_reclen);
1202                         } else {
1203                                 if ((error = ud_uncompress(fid->fid_idlen,
1204                                     &length, name, dname)) != 0) {
1205                                         break;
1206                                 }
1207                                 if (length == 0) {
1208                                         offset += FID_LEN(fid);
1209                                         continue;
1210                                 }
1211                                 len = DIRENT64_RECLEN(length);
1212                                 if (((caddr_t)nd + len) >= end_outb) {
1213                                         if (!outcount) {
1214                                                 error = EINVAL;
1215                                         }
1216                                         break;
1217                                 }
1218                                 (void) strncpy(nd->d_name,
1219                                     (caddr_t)dname, length);
1220                                 bzero(&nd->d_name[length],
1221                                     DIRENT64_NAMELEN(len) - length);
1222                                 nd->d_ino = ud_xlate_to_daddr(udf_vfsp,
1223                                     SWAP_16(fid->fid_icb.lad_ext_prn),
1224                                     SWAP_32(fid->fid_icb.lad_ext_loc), 1,
1225                                     &dummy);
1226                                 nd->d_reclen = (uint16_t)len;
1227                                 nd->d_off = offset + FID_LEN(fid);
1228                                 nd = (struct dirent64 *)
1229                                     ((char *)nd + nd->d_reclen);
1230                         }
1231                         outcount++;
1232                 }
1233 
1234                 offset += FID_LEN(fid);
1235         }
1236 
1237 end:
1238         if (fbp != NULL) {
1239                 fbrelse(fbp, S_OTHER);
1240         }
1241         ndlen = ((char *)nd - outbuf);
1242         /*
1243          * In case of error do not call uiomove.
1244          * Return the error to the caller.
1245          */
1246         if ((error == 0) && (ndlen != 0)) {
1247                 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop);
1248                 uiop->uio_offset = offset;
1249         }
1250         kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize);
1251         kmem_free((caddr_t)dname, 1024);
1252         kmem_free(outbuf, (uint32_t)bufsize);
1253         if (eofp && error == 0) {
1254                 *eofp = (uiop->uio_offset >= dirsiz);
1255         }
1256         return (error);
1257 }
1258 
1259 /* ARGSUSED */
1260 static int32_t
1261 udf_symlink(
1262         struct vnode *dvp,
1263         char *linkname,
1264         struct vattr *vap,
1265         char *target,
1266         struct cred *cr,
1267         caller_context_t *ct,
1268         int flags)
1269 {
1270         int32_t error = 0, outlen;
1271         uint32_t ioflag = 0;
1272         struct ud_inode *ip, *dip = VTOI(dvp);
1273 
1274         struct path_comp *pc;
1275         int8_t *dname = NULL, *uname = NULL, *sp;
1276 
1277         ud_printf("udf_symlink\n");
1278 
1279         ip = (struct ud_inode *)0;
1280         vap->va_type = VLNK;
1281         vap->va_rdev = 0;
1282 
1283         rw_enter(&dip->i_rwlock, RW_WRITER);
1284         error = ud_direnter(dip, linkname, DE_CREATE,
1285             (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
1286         rw_exit(&dip->i_rwlock);
1287         if (error == 0) {
1288                 dname = kmem_zalloc(1024, KM_SLEEP);
1289                 uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1290 
1291                 pc = (struct path_comp *)uname;
1292                 /*
1293                  * If the first character in target is "/"
1294                  * then skip it and create entry for it
1295                  */
1296                 if (*target == '/') {
1297                         pc->pc_type = 2;
1298                         pc->pc_len = 0;
1299                         pc = (struct path_comp *)(((char *)pc) + 4);
1300                         while (*target == '/') {
1301                                 target++;
1302                         }
1303                 }
1304 
1305                 while (*target != NULL) {
1306                         sp = target;
1307                         while ((*target != '/') && (*target != '\0')) {
1308                                 target ++;
1309                         }
1310                         /*
1311                          * We got the next component of the
1312                          * path name. Create path_comp of
1313                          * appropriate type
1314                          */
1315                         if (((target - sp) == 1) && (*sp == '.')) {
1316                                 /*
1317                                  * Dot entry.
1318                                  */
1319                                 pc->pc_type = 4;
1320                                 pc = (struct path_comp *)(((char *)pc) + 4);
1321                         } else if (((target - sp) == 2) &&
1322                             (*sp == '.') && ((*(sp + 1)) == '.')) {
1323                                 /*
1324                                  * DotDot entry.
1325                                  */
1326                                 pc->pc_type = 3;
1327                                 pc = (struct path_comp *)(((char *)pc) + 4);
1328                         } else {
1329                                 /*
1330                                  * convert the user given name
1331                                  * into appropriate form to be put
1332                                  * on the media
1333                                  */
1334                                 outlen = 1024;  /* set to size of dname */
1335                                 if (error = ud_compress(target - sp, &outlen,
1336                                     (uint8_t *)sp, (uint8_t *)dname)) {
1337                                         break;
1338                                 }
1339                                 pc->pc_type = 5;
1340                                 /* LINTED */
1341                                 pc->pc_len = outlen;
1342                                 dname[outlen] = '\0';
1343                                 (void) strcpy((char *)pc->pc_id, dname);
1344                                 pc = (struct path_comp *)
1345                                     (((char *)pc) + 4 + outlen);
1346                         }
1347                         while (*target == '/') {
1348                                 target++;
1349                         }
1350                         if (*target == NULL) {
1351                                 break;
1352                         }
1353                 }
1354 
1355                 rw_enter(&ip->i_contents, RW_WRITER);
1356                 if (error == 0) {
1357                         ioflag = FWRITE;
1358                         if (curthread->t_flag & T_DONTPEND) {
1359                                 ioflag |= FDSYNC;
1360                         }
1361                         error = ud_rdwri(UIO_WRITE, ioflag, ip,
1362                             uname, ((int8_t *)pc) - uname,
1363                             (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
1364                 }
1365                 if (error) {
1366                         ud_idrop(ip);
1367                         rw_exit(&ip->i_contents);
1368                         rw_enter(&dip->i_rwlock, RW_WRITER);
1369                         (void) ud_dirremove(dip, linkname, (struct ud_inode *)0,
1370                             (struct vnode *)0, DR_REMOVE, cr, ct);
1371                         rw_exit(&dip->i_rwlock);
1372                         goto update_inode;
1373                 }
1374                 rw_exit(&ip->i_contents);
1375         }
1376 
1377         if ((error == 0) || (error == EEXIST)) {
1378                 VN_RELE(ITOV(ip));
1379         }
1380 
1381 update_inode:
1382         ITIMES(VTOI(dvp));
1383         if (uname != NULL) {
1384                 kmem_free(uname, PAGESIZE);
1385         }
1386         if (dname != NULL) {
1387                 kmem_free(dname, 1024);
1388         }
1389 
1390         return (error);
1391 }
1392 
1393 /* ARGSUSED */
1394 static int32_t
1395 udf_readlink(
1396         struct vnode *vp,
1397         struct uio *uiop,
1398         struct cred *cr,
1399         caller_context_t *ct)
1400 {
1401         int32_t error = 0, off, id_len, size, len;
1402         int8_t *dname = NULL, *uname = NULL;
1403         struct ud_inode *ip;
1404         struct fbuf *fbp = NULL;
1405         struct path_comp *pc;
1406 
1407         ud_printf("udf_readlink\n");
1408 
1409         if (vp->v_type != VLNK) {
1410                 return (EINVAL);
1411         }
1412 
1413         ip = VTOI(vp);
1414         size = ip->i_size;
1415         if (size > PAGESIZE) {
1416                 return (EIO);
1417         }
1418 
1419         if (size == 0) {
1420                 return (0);
1421         }
1422 
1423         dname = kmem_zalloc(1024, KM_SLEEP);
1424         uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1425 
1426         rw_enter(&ip->i_contents, RW_READER);
1427 
1428         if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) {
1429                 goto end;
1430         }
1431 
1432         off = 0;
1433 
1434         while (off < size) {
1435                 pc = (struct path_comp *)(fbp->fb_addr + off);
1436                 switch (pc->pc_type) {
1437                         case 1 :
1438                                 (void) strcpy(uname, ip->i_udf->udf_fsmnt);
1439                                 (void) strcat(uname, "/");
1440                                 break;
1441                         case 2 :
1442                                 if (pc->pc_len != 0) {
1443                                         goto end;
1444                                 }
1445                                 uname[0] = '/';
1446                                 uname[1] = '\0';
1447                                 break;
1448                         case 3 :
1449                                 (void) strcat(uname, "../");
1450                                 break;
1451                         case 4 :
1452                                 (void) strcat(uname, "./");
1453                                 break;
1454                         case 5 :
1455                                 if ((error = ud_uncompress(pc->pc_len, &id_len,
1456                                     pc->pc_id, (uint8_t *)dname)) != 0) {
1457                                         break;
1458                                 }
1459                                 dname[id_len] = '\0';
1460                                 (void) strcat(uname, dname);
1461                                 (void) strcat(uname, "/");
1462                                 break;
1463                         default :
1464                                 error = EINVAL;
1465                                 goto end;
1466                 }
1467                 off += 4 + pc->pc_len;
1468         }
1469         len = strlen(uname) - 1;
1470         if (uname[len] == '/') {
1471                 if (len == 0) {
1472                         /*
1473                          * special case link to /
1474                          */
1475                         len = 1;
1476                 } else {
1477                         uname[len] = '\0';
1478                 }
1479         }
1480 
1481         error = uiomove(uname, len, UIO_READ, uiop);
1482 
1483         ITIMES(ip);
1484 
1485 end:
1486         if (fbp != NULL) {
1487                 fbrelse(fbp, S_OTHER);
1488         }
1489         rw_exit(&ip->i_contents);
1490         if (uname != NULL) {
1491                 kmem_free(uname, PAGESIZE);
1492         }
1493         if (dname != NULL) {
1494                 kmem_free(dname, 1024);
1495         }
1496         return (error);
1497 }
1498 
1499 /* ARGSUSED */
1500 static int32_t
1501 udf_fsync(
1502         struct vnode *vp,
1503         int32_t syncflag,
1504         struct cred *cr,
1505         caller_context_t *ct)
1506 {
1507         int32_t error = 0;
1508         struct ud_inode *ip = VTOI(vp);
1509 
1510         ud_printf("udf_fsync\n");
1511 
1512         rw_enter(&ip->i_contents, RW_WRITER);
1513         if (!(IS_SWAPVP(vp))) {
1514                 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */
1515         }
1516         if (error == 0) {
1517                 error = ud_sync_indir(ip);
1518         }
1519         ITIMES(ip);             /* XXX: is this necessary ??? */
1520         rw_exit(&ip->i_contents);
1521 
1522         return (error);
1523 }
1524 
1525 /* ARGSUSED */
1526 static void
1527 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
1528 {
1529         ud_printf("udf_iinactive\n");
1530 
1531         ud_iinactive(VTOI(vp), cr);
1532 }
1533 
1534 /* ARGSUSED */
1535 static int32_t
1536 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1537 {
1538         struct udf_fid *udfidp;
1539         struct ud_inode *ip = VTOI(vp);
1540 
1541         ud_printf("udf_fid\n");
1542 
1543         if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) {
1544                 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1545                 return (ENOSPC);
1546         }
1547 
1548         udfidp = (struct udf_fid *)fidp;
1549         bzero((char *)udfidp, sizeof (struct udf_fid));
1550         rw_enter(&ip->i_contents, RW_READER);
1551         udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1552         udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff;
1553         udfidp->udfid_prn = ip->i_icb_prn;
1554         udfidp->udfid_icb_lbn = ip->i_icb_block;
1555         rw_exit(&ip->i_contents);
1556 
1557         return (0);
1558 }
1559 
1560 /* ARGSUSED2 */
1561 static int
1562 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1563 {
1564         struct ud_inode *ip = VTOI(vp);
1565 
1566         ud_printf("udf_rwlock\n");
1567 
1568         if (write_lock) {
1569                 rw_enter(&ip->i_rwlock, RW_WRITER);
1570         } else {
1571                 rw_enter(&ip->i_rwlock, RW_READER);
1572         }
1573 #ifdef  __lock_lint
1574         rw_exit(&ip->i_rwlock);
1575 #endif
1576         return (write_lock);
1577 }
1578 
1579 /* ARGSUSED */
1580 static void
1581 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1582 {
1583         struct ud_inode *ip = VTOI(vp);
1584 
1585         ud_printf("udf_rwunlock\n");
1586 
1587 #ifdef  __lock_lint
1588         rw_enter(&ip->i_rwlock, RW_WRITER);
1589 #endif
1590 
1591         rw_exit(&ip->i_rwlock);
1592 
1593 }
1594 
1595 /* ARGSUSED */
1596 static int32_t
1597 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1598 {
1599         return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
1600 }
1601 
1602 static int32_t
1603 udf_frlock(
1604         struct vnode *vp,
1605         int32_t cmd,
1606         struct flock64 *bfp,
1607         int32_t flag,
1608         offset_t offset,
1609         struct flk_callback *flk_cbp,
1610         cred_t *cr,
1611         caller_context_t *ct)
1612 {
1613         struct ud_inode *ip = VTOI(vp);
1614 
1615         ud_printf("udf_frlock\n");
1616 
1617         /*
1618          * If file is being mapped, disallow frlock.
1619          * XXX I am not holding tlock while checking i_mapcnt because the
1620          * current locking strategy drops all locks before calling fs_frlock.
1621          * So, mapcnt could change before we enter fs_frlock making is
1622          * meaningless to have held tlock in the first place.
1623          */
1624         if ((ip->i_mapcnt > 0) &&
1625             (MANDLOCK(vp, ip->i_char))) {
1626                 return (EAGAIN);
1627         }
1628 
1629         return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1630 }
1631 
1632 /*ARGSUSED6*/
1633 static int32_t
1634 udf_space(
1635         struct vnode *vp,
1636         int32_t cmd,
1637         struct flock64 *bfp,
1638         int32_t flag,
1639         offset_t offset,
1640         cred_t *cr,
1641         caller_context_t *ct)
1642 {
1643         int32_t error = 0;
1644 
1645         ud_printf("udf_space\n");
1646 
1647         if (cmd != F_FREESP) {
1648                 error =  EINVAL;
1649         } else if ((error = convoff(vp, bfp, 0, offset)) == 0) {
1650                 error = ud_freesp(vp, bfp, flag, cr);
1651 
1652                 if (error == 0 && bfp->l_start == 0)
1653                         vnevent_truncate(vp, ct);
1654         }
1655 
1656         return (error);
1657 }
1658 
1659 /* ARGSUSED */
1660 static int32_t
1661 udf_getpage(
1662         struct vnode *vp,
1663         offset_t off,
1664         size_t len,
1665         uint32_t *protp,
1666         struct page **plarr,
1667         size_t plsz,
1668         struct seg *seg,
1669         caddr_t addr,
1670         enum seg_rw rw,
1671         struct cred *cr,
1672         caller_context_t *ct)
1673 {
1674         struct ud_inode *ip = VTOI(vp);
1675         int32_t error, has_holes, beyond_eof, seqmode, dolock;
1676         int32_t pgsize = PAGESIZE;
1677         struct udf_vfs *udf_vfsp = ip->i_udf;
1678         page_t **pl;
1679         u_offset_t pgoff, eoff, uoff;
1680         krw_t rwtype;
1681         caddr_t pgaddr;
1682 
1683         ud_printf("udf_getpage\n");
1684 
1685         uoff = (u_offset_t)off; /* type conversion */
1686         if (protp) {
1687                 *protp = PROT_ALL;
1688         }
1689         if (vp->v_flag & VNOMAP) {
1690                 return (ENOSYS);
1691         }
1692         seqmode = ip->i_nextr == uoff && rw != S_CREATE;
1693 
1694         rwtype = RW_READER;
1695         dolock = (rw_owner(&ip->i_contents) != curthread);
1696 retrylock:
1697 #ifdef  __lock_lint
1698         rw_enter(&ip->i_contents, rwtype);
1699 #else
1700         if (dolock) {
1701                 rw_enter(&ip->i_contents, rwtype);
1702         }
1703 #endif
1704 
1705         /*
1706          * We may be getting called as a side effect of a bmap using
1707          * fbread() when the blocks might be being allocated and the
1708          * size has not yet been up'ed.  In this case we want to be
1709          * able to return zero pages if we get back UDF_HOLE from
1710          * calling bmap for a non write case here.  We also might have
1711          * to read some frags from the disk into a page if we are
1712          * extending the number of frags for a given lbn in bmap().
1713          */
1714         beyond_eof = uoff + len > ip->i_size + PAGEOFFSET;
1715         if (beyond_eof && seg != segkmap) {
1716 #ifdef  __lock_lint
1717                 rw_exit(&ip->i_contents);
1718 #else
1719                 if (dolock) {
1720                         rw_exit(&ip->i_contents);
1721                 }
1722 #endif
1723                 return (EFAULT);
1724         }
1725 
1726         /*
1727          * Must hold i_contents lock throughout the call to pvn_getpages
1728          * since locked pages are returned from each call to ud_getapage.
1729          * Must *not* return locked pages and then try for contents lock
1730          * due to lock ordering requirements (inode > page)
1731          */
1732 
1733         has_holes = ud_bmap_has_holes(ip);
1734 
1735         if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) {
1736                 int32_t blk_size, count;
1737                 u_offset_t offset;
1738 
1739                 /*
1740                  * We must acquire the RW_WRITER lock in order to
1741                  * call bmap_write().
1742                  */
1743                 if (dolock && rwtype == RW_READER) {
1744                         rwtype = RW_WRITER;
1745 
1746                         if (!rw_tryupgrade(&ip->i_contents)) {
1747 
1748                                 rw_exit(&ip->i_contents);
1749 
1750                                 goto retrylock;
1751                         }
1752                 }
1753 
1754                 /*
1755                  * May be allocating disk blocks for holes here as
1756                  * a result of mmap faults. write(2) does the bmap_write
1757                  * in rdip/wrip, not here. We are not dealing with frags
1758                  * in this case.
1759                  */
1760                 offset = uoff;
1761                 while ((offset < uoff + len) &&
1762                     (offset < ip->i_size)) {
1763                         /*
1764                          * the variable "bnp" is to simplify the expression for
1765                          * the compiler; * just passing in &bn to bmap_write
1766                          * causes a compiler "loop"
1767                          */
1768 
1769                         blk_size = udf_vfsp->udf_lbsize;
1770                         if ((offset + blk_size) > ip->i_size) {
1771                                 count = ip->i_size - offset;
1772                         } else {
1773                                 count = blk_size;
1774                         }
1775                         error = ud_bmap_write(ip, offset, count, 0, cr);
1776                         if (error) {
1777                                 goto update_inode;
1778                         }
1779                         offset += count; /* XXX - make this contig */
1780                 }
1781         }
1782 
1783         /*
1784          * Can be a reader from now on.
1785          */
1786 #ifdef  __lock_lint
1787         if (rwtype == RW_WRITER) {
1788                 rw_downgrade(&ip->i_contents);
1789         }
1790 #else
1791         if (dolock && rwtype == RW_WRITER) {
1792                 rw_downgrade(&ip->i_contents);
1793         }
1794 #endif
1795 
1796         /*
1797          * We remove PROT_WRITE in cases when the file has UDF holes
1798          * because we don't  want to call bmap_read() to check each
1799          * page if it is backed with a disk block.
1800          */
1801         if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) {
1802                 *protp &= ~PROT_WRITE;
1803         }
1804 
1805         error = 0;
1806 
1807         /*
1808          * The loop looks up pages in the range <off, off + len).
1809          * For each page, we first check if we should initiate an asynchronous
1810          * read ahead before we call page_lookup (we may sleep in page_lookup
1811          * for a previously initiated disk read).
1812          */
1813         eoff = (uoff + len);
1814         for (pgoff = uoff, pgaddr = addr, pl = plarr;
1815             pgoff < eoff; /* empty */) {
1816                 page_t  *pp;
1817                 u_offset_t      nextrio;
1818                 se_t    se;
1819 
1820                 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED);
1821 
1822                 /*
1823                  * Handle async getpage (faultahead)
1824                  */
1825                 if (plarr == NULL) {
1826                         ip->i_nextrio = pgoff;
1827                         ud_getpage_ra(vp, pgoff, seg, pgaddr);
1828                         pgoff += pgsize;
1829                         pgaddr += pgsize;
1830                         continue;
1831                 }
1832 
1833                 /*
1834                  * Check if we should initiate read ahead of next cluster.
1835                  * We call page_exists only when we need to confirm that
1836                  * we have the current page before we initiate the read ahead.
1837                  */
1838                 nextrio = ip->i_nextrio;
1839                 if (seqmode &&
1840                     pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio &&
1841                     nextrio < ip->i_size && page_exists(vp, pgoff))
1842                         ud_getpage_ra(vp, pgoff, seg, pgaddr);
1843 
1844                 if ((pp = page_lookup(vp, pgoff, se)) != NULL) {
1845 
1846                         /*
1847                          * We found the page in the page cache.
1848                          */
1849                         *pl++ = pp;
1850                         pgoff += pgsize;
1851                         pgaddr += pgsize;
1852                         len -= pgsize;
1853                         plsz -= pgsize;
1854                 } else  {
1855 
1856                         /*
1857                          * We have to create the page, or read it from disk.
1858                          */
1859                         if (error = ud_getpage_miss(vp, pgoff, len,
1860                             seg, pgaddr, pl, plsz, rw, seqmode)) {
1861                                 goto error_out;
1862                         }
1863 
1864                         while (*pl != NULL) {
1865                                 pl++;
1866                                 pgoff += pgsize;
1867                                 pgaddr += pgsize;
1868                                 len -= pgsize;
1869                                 plsz -= pgsize;
1870                         }
1871                 }
1872         }
1873 
1874         /*
1875          * Return pages up to plsz if they are in the page cache.
1876          * We cannot return pages if there is a chance that they are
1877          * backed with a UDF hole and rw is S_WRITE or S_CREATE.
1878          */
1879         if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) {
1880 
1881                 ASSERT((protp == NULL) ||
1882                     !(has_holes && (*protp & PROT_WRITE)));
1883 
1884                 eoff = pgoff + plsz;
1885                 while (pgoff < eoff) {
1886                         page_t          *pp;
1887 
1888                         if ((pp = page_lookup_nowait(vp, pgoff,
1889                             SE_SHARED)) == NULL)
1890                                 break;
1891 
1892                         *pl++ = pp;
1893                         pgoff += pgsize;
1894                         plsz -= pgsize;
1895                 }
1896         }
1897 
1898         if (plarr)
1899                 *pl = NULL;                     /* Terminate page list */
1900         ip->i_nextr = pgoff;
1901 
1902 error_out:
1903         if (error && plarr) {
1904                 /*
1905                  * Release any pages we have locked.
1906                  */
1907                 while (pl > &plarr[0])
1908                         page_unlock(*--pl);
1909 
1910                 plarr[0] = NULL;
1911         }
1912 
1913 update_inode:
1914 #ifdef  __lock_lint
1915         rw_exit(&ip->i_contents);
1916 #else
1917         if (dolock) {
1918                 rw_exit(&ip->i_contents);
1919         }
1920 #endif
1921 
1922         /*
1923          * If the inode is not already marked for IACC (in rwip() for read)
1924          * and the inode is not marked for no access time update (in rwip()
1925          * for write) then update the inode access time and mod time now.
1926          */
1927         mutex_enter(&ip->i_tlock);
1928         if ((ip->i_flag & (IACC | INOACC)) == 0) {
1929                 if ((rw != S_OTHER) && (ip->i_type != VDIR)) {
1930                         ip->i_flag |= IACC;
1931                 }
1932                 if (rw == S_WRITE) {
1933                         ip->i_flag |= IUPD;
1934                 }
1935                 ITIMES_NOLOCK(ip);
1936         }
1937         mutex_exit(&ip->i_tlock);
1938 
1939         return (error);
1940 }
1941 
1942 int32_t ud_delay = 1;
1943 
1944 /* ARGSUSED */
1945 static int32_t
1946 udf_putpage(
1947         struct vnode *vp,
1948         offset_t off,
1949         size_t len,
1950         int32_t flags,
1951         struct cred *cr,
1952         caller_context_t *ct)
1953 {
1954         struct ud_inode *ip;
1955         int32_t error = 0;
1956 
1957         ud_printf("udf_putpage\n");
1958 
1959         ip = VTOI(vp);
1960 #ifdef  __lock_lint
1961         rw_enter(&ip->i_contents, RW_WRITER);
1962 #endif
1963 
1964         if (vp->v_count == 0) {
1965                 cmn_err(CE_WARN, "ud_putpage : bad v_count");
1966                 error = EINVAL;
1967                 goto out;
1968         }
1969 
1970         if (vp->v_flag & VNOMAP) {
1971                 error = ENOSYS;
1972                 goto out;
1973         }
1974 
1975         if (flags & B_ASYNC) {
1976                 if (ud_delay && len &&
1977                     (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) {
1978                         mutex_enter(&ip->i_tlock);
1979 
1980                         /*
1981                          * If nobody stalled, start a new cluster.
1982                          */
1983                         if (ip->i_delaylen == 0) {
1984                                 ip->i_delayoff = off;
1985                                 ip->i_delaylen = len;
1986                                 mutex_exit(&ip->i_tlock);
1987                                 goto out;
1988                         }
1989 
1990                         /*
1991                          * If we have a full cluster or they are not contig,
1992                          * then push last cluster and start over.
1993                          */
1994                         if (ip->i_delaylen >= WR_CLUSTSZ(ip) ||
1995                             ip->i_delayoff + ip->i_delaylen != off) {
1996                                 u_offset_t doff;
1997                                 size_t dlen;
1998 
1999                                 doff = ip->i_delayoff;
2000                                 dlen = ip->i_delaylen;
2001                                 ip->i_delayoff = off;
2002                                 ip->i_delaylen = len;
2003                                 mutex_exit(&ip->i_tlock);
2004                                 error = ud_putpages(vp, doff, dlen, flags, cr);
2005                                 /* LMXXX - flags are new val, not old */
2006                                 goto out;
2007                         }
2008 
2009                         /*
2010                          * There is something there, it's not full, and
2011                          * it is contig.
2012                          */
2013                         ip->i_delaylen += len;
2014                         mutex_exit(&ip->i_tlock);
2015                         goto out;
2016                 }
2017 
2018                 /*
2019                  * Must have weird flags or we are not clustering.
2020                  */
2021         }
2022 
2023         error = ud_putpages(vp, off, len, flags, cr);
2024 
2025 out:
2026 #ifdef  __lock_lint
2027         rw_exit(&ip->i_contents);
2028 #endif
2029         return (error);
2030 }
2031 
2032 /* ARGSUSED */
2033 static int32_t
2034 udf_map(
2035         struct vnode *vp,
2036         offset_t off,
2037         struct as *as,
2038         caddr_t *addrp,
2039         size_t len,
2040         uint8_t prot,
2041         uint8_t maxprot,
2042         uint32_t flags,
2043         struct cred *cr,
2044         caller_context_t *ct)
2045 {
2046         struct segvn_crargs vn_a;
2047         int32_t error = 0;
2048 
2049         ud_printf("udf_map\n");
2050 
2051         if (vp->v_flag & VNOMAP) {
2052                 error = ENOSYS;
2053                 goto end;
2054         }
2055 
2056         if ((off < (offset_t)0) ||
2057             ((off + len) < (offset_t)0)) {
2058                 error = EINVAL;
2059                 goto end;
2060         }
2061 
2062         if (vp->v_type != VREG) {
2063                 error = ENODEV;
2064                 goto end;
2065         }
2066 
2067         /*
2068          * If file is being locked, disallow mapping.
2069          */
2070         if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) {
2071                 error = EAGAIN;
2072                 goto end;
2073         }
2074 
2075         as_rangelock(as);
2076         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
2077         if (error != 0) {
2078                 as_rangeunlock(as);
2079                 goto end;
2080         }
2081 
2082         vn_a.vp = vp;
2083         vn_a.offset = off;
2084         vn_a.type = flags & MAP_TYPE;
2085         vn_a.prot = prot;
2086         vn_a.maxprot = maxprot;
2087         vn_a.cred = cr;
2088         vn_a.amp = NULL;
2089         vn_a.flags = flags & ~MAP_TYPE;
2090         vn_a.szc = 0;
2091         vn_a.lgrp_mem_policy_flags = 0;
2092 
2093         error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a);
2094         as_rangeunlock(as);
2095 
2096 end:
2097         return (error);
2098 }
2099 
2100 /* ARGSUSED */
2101 static int32_t
2102 udf_addmap(struct vnode *vp,
2103         offset_t off,
2104         struct as *as,
2105         caddr_t addr,
2106         size_t len,
2107         uint8_t prot,
2108         uint8_t maxprot,
2109         uint32_t flags,
2110         struct cred *cr,
2111         caller_context_t *ct)
2112 {
2113         struct ud_inode *ip = VTOI(vp);
2114 
2115         ud_printf("udf_addmap\n");
2116 
2117         if (vp->v_flag & VNOMAP) {
2118                 return (ENOSYS);
2119         }
2120 
2121         mutex_enter(&ip->i_tlock);
2122         ip->i_mapcnt += btopr(len);
2123         mutex_exit(&ip->i_tlock);
2124 
2125         return (0);
2126 }
2127 
2128 /* ARGSUSED */
2129 static int32_t
2130 udf_delmap(
2131         struct vnode *vp, offset_t off,
2132         struct as *as,
2133         caddr_t addr,
2134         size_t len,
2135         uint32_t prot,
2136         uint32_t maxprot,
2137         uint32_t flags,
2138         struct cred *cr,
2139         caller_context_t *ct)
2140 {
2141         struct ud_inode *ip = VTOI(vp);
2142 
2143         ud_printf("udf_delmap\n");
2144 
2145         if (vp->v_flag & VNOMAP) {
2146                 return (ENOSYS);
2147         }
2148 
2149         mutex_enter(&ip->i_tlock);
2150         ip->i_mapcnt -= btopr(len);  /* Count released mappings */
2151         ASSERT(ip->i_mapcnt >= 0);
2152         mutex_exit(&ip->i_tlock);
2153 
2154         return (0);
2155 }
2156 
2157 /* ARGSUSED */
2158 static int32_t
2159 udf_l_pathconf(
2160         struct vnode *vp,
2161         int32_t cmd,
2162         ulong_t *valp,
2163         struct cred *cr,
2164         caller_context_t *ct)
2165 {
2166         int32_t error = 0;
2167 
2168         ud_printf("udf_l_pathconf\n");
2169 
2170         if (cmd == _PC_FILESIZEBITS) {
2171                 /*
2172                  * udf supports 64 bits as file size
2173                  * but there are several other restrictions
2174                  * it only supports 32-bit block numbers and
2175                  * daddr32_t is only and int32_t so taking these
2176                  * into account we can stay just as where ufs is
2177                  */
2178                 *valp = 41;
2179         } else if (cmd == _PC_TIMESTAMP_RESOLUTION) {
2180                 /* nanosecond timestamp resolution */
2181                 *valp = 1L;
2182         } else {
2183                 error = fs_pathconf(vp, cmd, valp, cr, ct);
2184         }
2185 
2186         return (error);
2187 }
2188 
2189 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0;
2190 #ifndef __lint
2191 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads))
2192 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes))
2193 #endif
2194 /*
2195  * Assumption is that there will not be a pageio request
2196  * to a enbedded file
2197  */
2198 /* ARGSUSED */
2199 static int32_t
2200 udf_pageio(
2201         struct vnode *vp,
2202         struct page *pp,
2203         u_offset_t io_off,
2204         size_t io_len,
2205         int32_t flags,
2206         struct cred *cr,
2207         caller_context_t *ct)
2208 {
2209         daddr_t bn;
2210         struct buf *bp;
2211         struct ud_inode *ip = VTOI(vp);
2212         int32_t dolock, error = 0, contig, multi_io;
2213         size_t done_len = 0, cur_len = 0;
2214         page_t *npp = NULL, *opp = NULL, *cpp = pp;
2215 
2216         if (pp == NULL) {
2217                 return (EINVAL);
2218         }
2219 
2220         dolock = (rw_owner(&ip->i_contents) != curthread);
2221 
2222         /*
2223          * We need a better check.  Ideally, we would use another
2224          * vnodeops so that hlocked and forcibly unmounted file
2225          * systems would return EIO where appropriate and w/o the
2226          * need for these checks.
2227          */
2228         if (ip->i_udf == NULL) {
2229                 return (EIO);
2230         }
2231 
2232 #ifdef  __lock_lint
2233         rw_enter(&ip->i_contents, RW_READER);
2234 #else
2235         if (dolock) {
2236                 rw_enter(&ip->i_contents, RW_READER);
2237         }
2238 #endif
2239 
2240         /*
2241          * Break the io request into chunks, one for each contiguous
2242          * stretch of disk blocks in the target file.
2243          */
2244         while (done_len < io_len) {
2245                 ASSERT(cpp);
2246                 bp = NULL;
2247                 contig = 0;
2248                 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len),
2249                     &bn, &contig)) {
2250                         break;
2251                 }
2252 
2253                 if (bn == UDF_HOLE) {   /* No holey swapfiles */
2254                         cmn_err(CE_WARN, "SWAP file has HOLES");
2255                         error = EINVAL;
2256                         break;
2257                 }
2258 
2259                 cur_len = MIN(io_len - done_len, contig);
2260 
2261                 /*
2262                  * Check if more than one I/O is
2263                  * required to complete the given
2264                  * I/O operation
2265                  */
2266                 if (ip->i_udf->udf_lbsize < PAGESIZE) {
2267                         if (cur_len >= PAGESIZE) {
2268                                 multi_io = 0;
2269                                 cur_len &= PAGEMASK;
2270                         } else {
2271                                 multi_io = 1;
2272                                 cur_len = MIN(io_len - done_len, PAGESIZE);
2273                         }
2274                 }
2275                 page_list_break(&cpp, &npp, btop(cur_len));
2276 
2277                 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags);
2278                 ASSERT(bp != NULL);
2279 
2280                 bp->b_edev = ip->i_dev;
2281                 bp->b_dev = cmpdev(ip->i_dev);
2282                 bp->b_blkno = bn;
2283                 bp->b_un.b_addr = (caddr_t)0;
2284                 bp->b_file = vp;
2285                 bp->b_offset = (offset_t)(io_off + done_len);
2286 
2287 /*
2288  *              ub.ub_pageios.value.ul++;
2289  */
2290                 if (multi_io == 0) {
2291                         (void) bdev_strategy(bp);
2292                 } else {
2293                         error = ud_multi_strat(ip, cpp, bp,
2294                             (u_offset_t)(io_off + done_len));
2295                         if (error != 0) {
2296                                 pageio_done(bp);
2297                                 break;
2298                         }
2299                 }
2300                 if (flags & B_READ) {
2301                         ud_pageio_reads++;
2302                 } else {
2303                         ud_pageio_writes++;
2304                 }
2305 
2306                 /*
2307                  * If the request is not B_ASYNC, wait for i/o to complete
2308                  * and re-assemble the page list to return to the caller.
2309                  * If it is B_ASYNC we leave the page list in pieces and
2310                  * cleanup() will dispose of them.
2311                  */
2312                 if ((flags & B_ASYNC) == 0) {
2313                         error = biowait(bp);
2314                         pageio_done(bp);
2315                         if (error) {
2316                                 break;
2317                         }
2318                         page_list_concat(&opp, &cpp);
2319                 }
2320                 cpp = npp;
2321                 npp = NULL;
2322                 done_len += cur_len;
2323         }
2324 
2325         ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len));
2326         if (error) {
2327                 if (flags & B_ASYNC) {
2328                         /* Cleanup unprocessed parts of list */
2329                         page_list_concat(&cpp, &npp);
2330                         if (flags & B_READ) {
2331                                 pvn_read_done(cpp, B_ERROR);
2332                         } else {
2333                                 pvn_write_done(cpp, B_ERROR);
2334                         }
2335                 } else {
2336                         /* Re-assemble list and let caller clean up */
2337                         page_list_concat(&opp, &cpp);
2338                         page_list_concat(&opp, &npp);
2339                 }
2340         }
2341 
2342 #ifdef  __lock_lint
2343         rw_exit(&ip->i_contents);
2344 #else
2345         if (dolock) {
2346                 rw_exit(&ip->i_contents);
2347         }
2348 #endif
2349         return (error);
2350 }
2351 
2352 
2353 
2354 
2355 /* -------------------- local functions --------------------------- */
2356 
2357 
2358 
2359 int32_t
2360 ud_rdwri(enum uio_rw rw, int32_t ioflag,
2361         struct ud_inode *ip, caddr_t base, int32_t len,
2362         offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr)
2363 {
2364         int32_t error;
2365         struct uio auio;
2366         struct iovec aiov;
2367 
2368         ud_printf("ud_rdwri\n");
2369 
2370         bzero((caddr_t)&auio, sizeof (uio_t));
2371         bzero((caddr_t)&aiov, sizeof (iovec_t));
2372 
2373         aiov.iov_base = base;
2374         aiov.iov_len = len;
2375         auio.uio_iov = &aiov;
2376         auio.uio_iovcnt = 1;
2377         auio.uio_loffset = offset;
2378         auio.uio_segflg = (int16_t)seg;
2379         auio.uio_resid = len;
2380 
2381         if (rw == UIO_WRITE) {
2382                 auio.uio_fmode = FWRITE;
2383                 auio.uio_extflg = UIO_COPY_DEFAULT;
2384                 auio.uio_llimit = curproc->p_fsz_ctl;
2385                 error = ud_wrip(ip, &auio, ioflag, cr);
2386         } else {
2387                 auio.uio_fmode = FREAD;
2388                 auio.uio_extflg = UIO_COPY_CACHED;
2389                 auio.uio_llimit = MAXOFFSET_T;
2390                 error = ud_rdip(ip, &auio, ioflag, cr);
2391         }
2392 
2393         if (aresid) {
2394                 *aresid = auio.uio_resid;
2395         } else if (auio.uio_resid) {
2396                 error = EIO;
2397         }
2398         return (error);
2399 }
2400 
2401 /*
2402  * Free behind hacks.  The pager is busted.
2403  * XXX - need to pass the information down to writedone() in a flag like B_SEQ
2404  * or B_FREE_IF_TIGHT_ON_MEMORY.
2405  */
2406 int32_t ud_freebehind = 1;
2407 int32_t ud_smallfile = 32 * 1024;
2408 
2409 /* ARGSUSED */
2410 int32_t
2411 ud_getpage_miss(struct vnode *vp, u_offset_t off,
2412         size_t len, struct seg *seg, caddr_t addr, page_t *pl[],
2413         size_t plsz, enum seg_rw rw, int32_t seq)
2414 {
2415         struct ud_inode *ip = VTOI(vp);
2416         int32_t err = 0;
2417         size_t io_len;
2418         u_offset_t io_off;
2419         u_offset_t pgoff;
2420         page_t *pp;
2421 
2422         pl[0] = NULL;
2423 
2424         /*
2425          * Figure out whether the page can be created, or must be
2426          * read from the disk
2427          */
2428         if (rw == S_CREATE) {
2429                 if ((pp = page_create_va(vp, off,
2430                     PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
2431                         cmn_err(CE_WARN, "ud_getpage_miss: page_create");
2432                         return (EINVAL);
2433                 }
2434                 io_len = PAGESIZE;
2435         } else {
2436                 pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
2437                     &io_len, off, PAGESIZE, 0);
2438 
2439                 /*
2440                  * Some other thread has entered the page.
2441                  * ud_getpage will retry page_lookup.
2442                  */
2443                 if (pp == NULL) {
2444                         return (0);
2445                 }
2446 
2447                 /*
2448                  * Fill the page with as much data as we can from the file.
2449                  */
2450                 err = ud_page_fill(ip, pp, off, B_READ, &pgoff);
2451                 if (err) {
2452                         pvn_read_done(pp, B_ERROR);
2453                         return (err);
2454                 }
2455 
2456                 /*
2457                  * XXX ??? ufs has io_len instead of pgoff below
2458                  */
2459                 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2460 
2461                 /*
2462                  * If the file access is sequential, initiate read ahead
2463                  * of the next cluster.
2464                  */
2465                 if (seq && ip->i_nextrio < ip->i_size) {
2466                         ud_getpage_ra(vp, off, seg, addr);
2467                 }
2468         }
2469 
2470 outmiss:
2471         pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw);
2472         return (err);
2473 }
2474 
2475 /* ARGSUSED */
2476 void
2477 ud_getpage_ra(struct vnode *vp,
2478         u_offset_t off, struct seg *seg, caddr_t addr)
2479 {
2480         page_t *pp;
2481         size_t io_len;
2482         struct ud_inode *ip = VTOI(vp);
2483         u_offset_t io_off = ip->i_nextrio, pgoff;
2484         caddr_t addr2 = addr + (io_off - off);
2485         daddr_t bn;
2486         int32_t contig = 0;
2487 
2488         /*
2489          * Is this test needed?
2490          */
2491 
2492         if (addr2 >= seg->s_base + seg->s_size) {
2493                 return;
2494         }
2495 
2496         contig = 0;
2497         if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) {
2498                 return;
2499         }
2500 
2501         pp = pvn_read_kluster(vp, io_off, seg, addr2,
2502             &io_off, &io_len, io_off, PAGESIZE, 1);
2503 
2504         /*
2505          * Some other thread has entered the page.
2506          * So no read head done here (ie we will have to and wait
2507          * for the read when needed).
2508          */
2509 
2510         if (pp == NULL) {
2511                 return;
2512         }
2513 
2514         (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff);
2515         ip->i_nextrio =  io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2516 }
2517 
2518 int
2519 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
2520         uint32_t bflgs, u_offset_t *pg_off)
2521 {
2522         daddr_t bn;
2523         struct buf *bp;
2524         caddr_t kaddr, caddr;
2525         int32_t error = 0, contig = 0, multi_io = 0;
2526         int32_t lbsize = ip->i_udf->udf_lbsize;
2527         int32_t lbmask = ip->i_udf->udf_lbmask;
2528         uint64_t isize;
2529 
2530         isize = (ip->i_size + lbmask) & (~lbmask);
2531         if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2532 
2533                 /*
2534                  * Embedded file read file_entry
2535                  * from buffer cache and copy the required
2536                  * portions
2537                  */
2538                 bp = ud_bread(ip->i_dev,
2539                     ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
2540                 if ((bp->b_error == 0) &&
2541                     (bp->b_resid == 0)) {
2542 
2543                         caddr = bp->b_un.b_addr + ip->i_data_off;
2544 
2545                         /*
2546                          * mapin to kvm
2547                          */
2548                         kaddr = (caddr_t)ppmapin(pp,
2549                             PROT_READ | PROT_WRITE, (caddr_t)-1);
2550                         (void) kcopy(caddr, kaddr, ip->i_size);
2551 
2552                         /*
2553                          * mapout of kvm
2554                          */
2555                         ppmapout(kaddr);
2556                 }
2557                 brelse(bp);
2558                 contig = ip->i_size;
2559         } else {
2560 
2561                 /*
2562                  * Get the continuous size and block number
2563                  * at offset "off"
2564                  */
2565                 if (error = ud_bmap_read(ip, off, &bn, &contig))
2566                         goto out;
2567                 contig = MIN(contig, PAGESIZE);
2568                 contig = (contig + lbmask) & (~lbmask);
2569 
2570                 /*
2571                  * Zero part of the page which we are not
2572                  * going to read from the disk.
2573                  */
2574 
2575                 if (bn == UDF_HOLE) {
2576 
2577                         /*
2578                          * This is a HOLE. Just zero out
2579                          * the page
2580                          */
2581                         if (((off + contig) == isize) ||
2582                             (contig == PAGESIZE)) {
2583                                 pagezero(pp->p_prev, 0, PAGESIZE);
2584                                 goto out;
2585                         }
2586                 }
2587 
2588                 if (contig < PAGESIZE) {
2589                         uint64_t count;
2590 
2591                         count = isize - off;
2592                         if (contig != count) {
2593                                 multi_io = 1;
2594                                 contig = (int32_t)(MIN(count, PAGESIZE));
2595                         } else {
2596                                 pagezero(pp->p_prev, contig, PAGESIZE - contig);
2597                         }
2598                 }
2599 
2600                 /*
2601                  * Get a bp and initialize it
2602                  */
2603                 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs);
2604                 ASSERT(bp != NULL);
2605 
2606                 bp->b_edev = ip->i_dev;
2607                 bp->b_dev = cmpdev(ip->i_dev);
2608                 bp->b_blkno = bn;
2609                 bp->b_un.b_addr = 0;
2610                 bp->b_file = ip->i_vnode;
2611 
2612                 /*
2613                  * Start I/O
2614                  */
2615                 if (multi_io == 0) {
2616 
2617                         /*
2618                          * Single I/O is sufficient for this page
2619                          */
2620                         (void) bdev_strategy(bp);
2621                 } else {
2622 
2623                         /*
2624                          * We need to do the I/O in
2625                          * piece's
2626                          */
2627                         error = ud_multi_strat(ip, pp, bp, off);
2628                         if (error != 0) {
2629                                 goto out;
2630                         }
2631                 }
2632                 if ((bflgs & B_ASYNC) == 0) {
2633 
2634                         /*
2635                          * Wait for i/o to complete.
2636                          */
2637 
2638                         error = biowait(bp);
2639                         pageio_done(bp);
2640                         if (error) {
2641                                 goto out;
2642                         }
2643                 }
2644         }
2645         if ((off + contig) >= ip->i_size) {
2646                 contig = ip->i_size - off;
2647         }
2648 
2649 out:
2650         *pg_off = contig;
2651         return (error);
2652 }
2653 
2654 int32_t
2655 ud_putpages(struct vnode *vp, offset_t off,
2656         size_t len, int32_t flags, struct cred *cr)
2657 {
2658         struct ud_inode *ip;
2659         page_t *pp;
2660         u_offset_t io_off;
2661         size_t io_len;
2662         u_offset_t eoff;
2663         int32_t err = 0;
2664         int32_t dolock;
2665 
2666         ud_printf("ud_putpages\n");
2667 
2668         if (vp->v_count == 0) {
2669                 cmn_err(CE_WARN, "ud_putpages: bad v_count");
2670                 return (EINVAL);
2671         }
2672 
2673         ip = VTOI(vp);
2674 
2675         /*
2676          * Acquire the readers/write inode lock before locking
2677          * any pages in this inode.
2678          * The inode lock is held during i/o.
2679          */
2680         if (len == 0) {
2681                 mutex_enter(&ip->i_tlock);
2682                 ip->i_delayoff = ip->i_delaylen = 0;
2683                 mutex_exit(&ip->i_tlock);
2684         }
2685 #ifdef  __lock_lint
2686         rw_enter(&ip->i_contents, RW_READER);
2687 #else
2688         dolock = (rw_owner(&ip->i_contents) != curthread);
2689         if (dolock) {
2690                 rw_enter(&ip->i_contents, RW_READER);
2691         }
2692 #endif
2693 
2694         if (!vn_has_cached_data(vp)) {
2695 #ifdef  __lock_lint
2696                 rw_exit(&ip->i_contents);
2697 #else
2698                 if (dolock) {
2699                         rw_exit(&ip->i_contents);
2700                 }
2701 #endif
2702                 return (0);
2703         }
2704 
2705         if (len == 0) {
2706                 /*
2707                  * Search the entire vp list for pages >= off.
2708                  */
2709                 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage,
2710                     flags, cr);
2711         } else {
2712                 /*
2713                  * Loop over all offsets in the range looking for
2714                  * pages to deal with.
2715                  */
2716                 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) {
2717                         eoff = MIN(off + len, eoff);
2718                 } else {
2719                         eoff = off + len;
2720                 }
2721 
2722                 for (io_off = off; io_off < eoff; io_off += io_len) {
2723                         /*
2724                          * If we are not invalidating, synchronously
2725                          * freeing or writing pages, use the routine
2726                          * page_lookup_nowait() to prevent reclaiming
2727                          * them from the free list.
2728                          */
2729                         if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
2730                                 pp = page_lookup(vp, io_off,
2731                                     (flags & (B_INVAL | B_FREE)) ?
2732                                     SE_EXCL : SE_SHARED);
2733                         } else {
2734                                 pp = page_lookup_nowait(vp, io_off,
2735                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
2736                         }
2737 
2738                         if (pp == NULL || pvn_getdirty(pp, flags) == 0) {
2739                                 io_len = PAGESIZE;
2740                         } else {
2741 
2742                                 err = ud_putapage(vp, pp,
2743                                     &io_off, &io_len, flags, cr);
2744                                 if (err != 0) {
2745                                         break;
2746                                 }
2747                                 /*
2748                                  * "io_off" and "io_len" are returned as
2749                                  * the range of pages we actually wrote.
2750                                  * This allows us to skip ahead more quickly
2751                                  * since several pages may've been dealt
2752                                  * with by this iteration of the loop.
2753                                  */
2754                         }
2755                 }
2756         }
2757         if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) {
2758                 /*
2759                  * We have just sync'ed back all the pages on
2760                  * the inode, turn off the IMODTIME flag.
2761                  */
2762                 mutex_enter(&ip->i_tlock);
2763                 ip->i_flag &= ~IMODTIME;
2764                 mutex_exit(&ip->i_tlock);
2765         }
2766 #ifdef  __lock_lint
2767         rw_exit(&ip->i_contents);
2768 #else
2769         if (dolock) {
2770                 rw_exit(&ip->i_contents);
2771         }
2772 #endif
2773         return (err);
2774 }
2775 
2776 /* ARGSUSED */
2777 int32_t
2778 ud_putapage(struct vnode *vp,
2779         page_t *pp, u_offset_t *offp,
2780         size_t *lenp, int32_t flags, struct cred *cr)
2781 {
2782         daddr_t bn;
2783         size_t io_len;
2784         struct ud_inode *ip;
2785         int32_t error = 0, contig, multi_io = 0;
2786         struct udf_vfs *udf_vfsp;
2787         u_offset_t off, io_off;
2788         caddr_t kaddr, caddr;
2789         struct buf *bp = NULL;
2790         int32_t lbmask;
2791         uint64_t isize;
2792         uint16_t crc_len;
2793         struct file_entry *fe;
2794 
2795         ud_printf("ud_putapage\n");
2796 
2797         ip = VTOI(vp);
2798         ASSERT(ip);
2799         ASSERT(RW_LOCK_HELD(&ip->i_contents));
2800         lbmask = ip->i_udf->udf_lbmask;
2801         isize = (ip->i_size + lbmask) & (~lbmask);
2802 
2803         udf_vfsp = ip->i_udf;
2804         ASSERT(udf_vfsp->udf_flags & UDF_FL_RW);
2805 
2806         /*
2807          * If the modified time on the inode has not already been
2808          * set elsewhere (e.g. for write/setattr) we set the time now.
2809          * This gives us approximate modified times for mmap'ed files
2810          * which are modified via stores in the user address space.
2811          */
2812         if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) {
2813                 mutex_enter(&ip->i_tlock);
2814                 ip->i_flag |= IUPD;
2815                 ITIMES_NOLOCK(ip);
2816                 mutex_exit(&ip->i_tlock);
2817         }
2818 
2819 
2820         /*
2821          * Align the request to a block boundry (for old file systems),
2822          * and go ask bmap() how contiguous things are for this file.
2823          */
2824         off = pp->p_offset & ~(offset_t)lbmask;
2825                                 /* block align it */
2826 
2827 
2828         if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2829                 ASSERT(ip->i_size <= ip->i_max_emb);
2830 
2831                 pp = pvn_write_kluster(vp, pp, &io_off,
2832                     &io_len, off, PAGESIZE, flags);
2833                 if (io_len == 0) {
2834                         io_len = PAGESIZE;
2835                 }
2836 
2837                 bp = ud_bread(ip->i_dev,
2838                     ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
2839                     udf_vfsp->udf_lbsize);
2840                 fe = (struct file_entry *)bp->b_un.b_addr;
2841                 if ((bp->b_flags & B_ERROR) ||
2842                     (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
2843                     ip->i_icb_block,
2844                     1, udf_vfsp->udf_lbsize) != 0)) {
2845                         if (pp != NULL)
2846                                 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2847                         if (bp->b_flags & B_ERROR) {
2848                                 error = EIO;
2849                         } else {
2850                                 error = EINVAL;
2851                         }
2852                         brelse(bp);
2853                         return (error);
2854                 }
2855                 if ((bp->b_error == 0) &&
2856                     (bp->b_resid == 0)) {
2857 
2858                         caddr = bp->b_un.b_addr + ip->i_data_off;
2859                         kaddr = (caddr_t)ppmapin(pp,
2860                             PROT_READ | PROT_WRITE, (caddr_t)-1);
2861                         (void) kcopy(kaddr, caddr, ip->i_size);
2862                         ppmapout(kaddr);
2863                 }
2864                 crc_len = offsetof(struct file_entry, fe_spec) +
2865                     SWAP_32(fe->fe_len_ear);
2866                 crc_len += ip->i_size;
2867                 ud_make_tag(ip->i_udf, &fe->fe_tag,
2868                     UD_FILE_ENTRY, ip->i_icb_block, crc_len);
2869 
2870                 bwrite(bp);
2871 
2872                 if (flags & B_ASYNC) {
2873                         pvn_write_done(pp, flags);
2874                 }
2875                 contig = ip->i_size;
2876         } else {
2877 
2878                 if (error = ud_bmap_read(ip, off, &bn, &contig)) {
2879                         goto out;
2880                 }
2881                 contig = MIN(contig, PAGESIZE);
2882                 contig = (contig + lbmask) & (~lbmask);
2883 
2884                 if (contig < PAGESIZE) {
2885                         uint64_t count;
2886 
2887                         count = isize - off;
2888                         if (contig != count) {
2889                                 multi_io = 1;
2890                                 contig = (int32_t)(MIN(count, PAGESIZE));
2891                         }
2892                 }
2893 
2894                 if ((off + contig) > isize) {
2895                         contig = isize - off;
2896                 }
2897 
2898                 if (contig > PAGESIZE) {
2899                         if (contig & PAGEOFFSET) {
2900                                 contig &= PAGEMASK;
2901                         }
2902                 }
2903 
2904                 pp = pvn_write_kluster(vp, pp, &io_off,
2905                     &io_len, off, contig, flags);
2906                 if (io_len == 0) {
2907                         io_len = PAGESIZE;
2908                 }
2909 
2910                 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags);
2911                 ASSERT(bp != NULL);
2912 
2913                 bp->b_edev = ip->i_dev;
2914                 bp->b_dev = cmpdev(ip->i_dev);
2915                 bp->b_blkno = bn;
2916                 bp->b_un.b_addr = 0;
2917                 bp->b_file = vp;
2918                 bp->b_offset = (offset_t)off;
2919 
2920 
2921                 /*
2922                  * write throttle
2923                  */
2924                 ASSERT(bp->b_iodone == NULL);
2925                 bp->b_iodone = ud_iodone;
2926                 mutex_enter(&ip->i_tlock);
2927                 ip->i_writes += bp->b_bcount;
2928                 mutex_exit(&ip->i_tlock);
2929 
2930                 if (multi_io == 0) {
2931 
2932                         (void) bdev_strategy(bp);
2933                 } else {
2934                         error = ud_multi_strat(ip, pp, bp, off);
2935                         if (error != 0) {
2936                                 goto out;
2937                         }
2938                 }
2939 
2940                 if ((flags & B_ASYNC) == 0) {
2941                         /*
2942                          * Wait for i/o to complete.
2943                          */
2944                         error = biowait(bp);
2945                         pageio_done(bp);
2946                 }
2947         }
2948 
2949         if ((flags & B_ASYNC) == 0) {
2950                 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
2951         }
2952 
2953         pp = NULL;
2954 
2955 out:
2956         if (error != 0 && pp != NULL) {
2957                 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2958         }
2959 
2960         if (offp) {
2961                 *offp = io_off;
2962         }
2963         if (lenp) {
2964                 *lenp = io_len;
2965         }
2966 
2967         return (error);
2968 }
2969 
2970 
2971 int32_t
2972 ud_iodone(struct buf *bp)
2973 {
2974         struct ud_inode *ip;
2975 
2976         ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ));
2977 
2978         bp->b_iodone = NULL;
2979 
2980         ip = VTOI(bp->b_pages->p_vnode);
2981 
2982         mutex_enter(&ip->i_tlock);
2983         if (ip->i_writes >= ud_LW) {
2984                 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) {
2985                         if (ud_WRITES) {
2986                                 cv_broadcast(&ip->i_wrcv); /* wake all up */
2987                         }
2988                 }
2989         } else {
2990                 ip->i_writes -= bp->b_bcount;
2991         }
2992         mutex_exit(&ip->i_tlock);
2993         iodone(bp);
2994         return (0);
2995 }
2996 
2997 /* ARGSUSED3 */
2998 int32_t
2999 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
3000 {
3001         struct vnode *vp;
3002         struct udf_vfs *udf_vfsp;
3003         krw_t rwtype;
3004         caddr_t base;
3005         uint32_t flags;
3006         int32_t error, n, on, mapon, dofree;
3007         u_offset_t off;
3008         long oresid = uio->uio_resid;
3009 
3010         ASSERT(RW_LOCK_HELD(&ip->i_contents));
3011         if ((ip->i_type != VREG) &&
3012             (ip->i_type != VDIR) &&
3013             (ip->i_type != VLNK)) {
3014                 return (EIO);
3015         }
3016 
3017         if (uio->uio_loffset > MAXOFFSET_T) {
3018                 return (0);
3019         }
3020 
3021         if ((uio->uio_loffset < (offset_t)0) ||
3022             ((uio->uio_loffset + uio->uio_resid) < 0)) {
3023                 return (EINVAL);
3024         }
3025         if (uio->uio_resid == 0) {
3026                 return (0);
3027         }
3028 
3029         vp = ITOV(ip);
3030         udf_vfsp = ip->i_udf;
3031         mutex_enter(&ip->i_tlock);
3032         ip->i_flag |= IACC;
3033         mutex_exit(&ip->i_tlock);
3034 
3035         rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER);
3036 
3037         do {
3038                 offset_t diff;
3039                 u_offset_t uoff = uio->uio_loffset;
3040                 off = uoff & (offset_t)MAXBMASK;
3041                 mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3042                 on = (int)blkoff(udf_vfsp, uoff);
3043                 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3044 
3045                 diff = ip->i_size - uoff;
3046 
3047                 if (diff <= (offset_t)0) {
3048                         error = 0;
3049                         goto out;
3050                 }
3051                 if (diff < (offset_t)n) {
3052                         n = (int)diff;
3053                 }
3054                 dofree = ud_freebehind &&
3055                     ip->i_nextr == (off & PAGEMASK) &&
3056                     off > ud_smallfile;
3057 
3058 #ifndef __lock_lint
3059                 if (rwtype == RW_READER) {
3060                         rw_exit(&ip->i_contents);
3061                 }
3062 #endif
3063 
3064                 base = segmap_getmapflt(segkmap, vp, (off + mapon),
3065                     (uint32_t)n, 1, S_READ);
3066                 error = uiomove(base + mapon, (long)n, UIO_READ, uio);
3067 
3068                 flags = 0;
3069                 if (!error) {
3070                         /*
3071                          * If read a whole block, or read to eof,
3072                          * won't need this buffer again soon.
3073                          */
3074                         if (n + on == MAXBSIZE && ud_freebehind && dofree &&
3075                             freemem < lotsfree + pages_before_pager) {
3076                                 flags = SM_FREE | SM_DONTNEED |SM_ASYNC;
3077                         }
3078                         /*
3079                          * In POSIX SYNC (FSYNC and FDSYNC) read mode,
3080                          * we want to make sure that the page which has
3081                          * been read, is written on disk if it is dirty.
3082                          * And corresponding indirect blocks should also
3083                          * be flushed out.
3084                          */
3085                         if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) {
3086                                 flags &= ~SM_ASYNC;
3087                                 flags |= SM_WRITE;
3088                         }
3089                         error = segmap_release(segkmap, base, flags);
3090                 } else    {
3091                         (void) segmap_release(segkmap, base, flags);
3092                 }
3093 
3094 #ifndef __lock_lint
3095                 if (rwtype == RW_READER) {
3096                         rw_enter(&ip->i_contents, rwtype);
3097                 }
3098 #endif
3099         } while (error == 0 && uio->uio_resid > 0 && n != 0);
3100 out:
3101         /*
3102          * Inode is updated according to this table if FRSYNC is set.
3103          *
3104          *      FSYNC   FDSYNC(posix.4)
3105          *      --------------------------
3106          *      always  IATTCHG|IBDWRITE
3107          */
3108         if (ioflag & FRSYNC) {
3109                 if ((ioflag & FSYNC) ||
3110                     ((ioflag & FDSYNC) &&
3111                     (ip->i_flag & (IATTCHG|IBDWRITE)))) {
3112                 rw_exit(&ip->i_contents);
3113                 rw_enter(&ip->i_contents, RW_WRITER);
3114                 ud_iupdat(ip, 1);
3115                 }
3116         }
3117         /*
3118          * If we've already done a partial read, terminate
3119          * the read but return no error.
3120          */
3121         if (oresid != uio->uio_resid) {
3122                 error = 0;
3123         }
3124         ITIMES(ip);
3125 
3126         return (error);
3127 }
3128 
3129 int32_t
3130 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
3131 {
3132         caddr_t base;
3133         struct vnode *vp;
3134         struct udf_vfs *udf_vfsp;
3135         uint32_t flags;
3136         int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0;
3137         int32_t pagecreate, newpage;
3138         uint64_t old_i_size;
3139         u_offset_t off;
3140         long start_resid = uio->uio_resid, premove_resid;
3141         rlim64_t limit = uio->uio_limit;
3142 
3143 
3144         ASSERT(RW_WRITE_HELD(&ip->i_contents));
3145         if ((ip->i_type != VREG) &&
3146             (ip->i_type != VDIR) &&
3147             (ip->i_type != VLNK)) {
3148                 return (EIO);
3149         }
3150 
3151         if (uio->uio_loffset >= MAXOFFSET_T) {
3152                 return (EFBIG);
3153         }
3154         /*
3155          * see udf_l_pathconf
3156          */
3157         if (limit > (((uint64_t)1 << 40) - 1)) {
3158                 limit = ((uint64_t)1 << 40) - 1;
3159         }
3160         if (uio->uio_loffset >= limit) {
3161                 proc_t *p = ttoproc(curthread);
3162 
3163                 mutex_enter(&p->p_lock);
3164                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
3165                     p, RCA_UNSAFE_SIGINFO);
3166                 mutex_exit(&p->p_lock);
3167                 return (EFBIG);
3168         }
3169         if ((uio->uio_loffset < (offset_t)0) ||
3170             ((uio->uio_loffset + uio->uio_resid) < 0)) {
3171                 return (EINVAL);
3172         }
3173         if (uio->uio_resid == 0) {
3174                 return (0);
3175         }
3176 
3177         mutex_enter(&ip->i_tlock);
3178         ip->i_flag |= INOACC;
3179 
3180         if (ioflag & (FSYNC | FDSYNC)) {
3181                 ip->i_flag |= ISYNC;
3182                 iupdat_flag = 1;
3183         }
3184         mutex_exit(&ip->i_tlock);
3185 
3186         udf_vfsp = ip->i_udf;
3187         vp = ITOV(ip);
3188 
3189         do {
3190                 u_offset_t uoff = uio->uio_loffset;
3191                 off = uoff & (offset_t)MAXBMASK;
3192                 mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3193                 on = (int)blkoff(udf_vfsp, uoff);
3194                 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3195 
3196                 if (ip->i_type == VREG && uoff + n >= limit) {
3197                         if (uoff >= limit) {
3198                                 error = EFBIG;
3199                                 goto out;
3200                         }
3201                         n = (int)(limit - (rlim64_t)uoff);
3202                 }
3203                 if (uoff + n > ip->i_size) {
3204                         /*
3205                          * We are extending the length of the file.
3206                          * bmap is used so that we are sure that
3207                          * if we need to allocate new blocks, that it
3208                          * is done here before we up the file size.
3209                          */
3210                         error = ud_bmap_write(ip, uoff,
3211                             (int)(on + n), mapon == 0, cr);
3212                         if (error) {
3213                                 break;
3214                         }
3215                         i_size_changed = 1;
3216                         old_i_size = ip->i_size;
3217                         ip->i_size = uoff + n;
3218                         /*
3219                          * If we are writing from the beginning of
3220                          * the mapping, we can just create the
3221                          * pages without having to read them.
3222                          */
3223                         pagecreate = (mapon == 0);
3224                 } else if (n == MAXBSIZE) {
3225                         /*
3226                          * Going to do a whole mappings worth,
3227                          * so we can just create the pages w/o
3228                          * having to read them in.  But before
3229                          * we do that, we need to make sure any
3230                          * needed blocks are allocated first.
3231                          */
3232                         error = ud_bmap_write(ip, uoff,
3233                             (int)(on + n), 1, cr);
3234                         if (error) {
3235                                 break;
3236                         }
3237                         pagecreate = 1;
3238                 } else {
3239                         pagecreate = 0;
3240                 }
3241 
3242                 rw_exit(&ip->i_contents);
3243 
3244                 /*
3245                  * Touch the page and fault it in if it is not in
3246                  * core before segmap_getmapflt can lock it. This
3247                  * is to avoid the deadlock if the buffer is mapped
3248                  * to the same file through mmap which we want to
3249                  * write to.
3250                  */
3251                 uio_prefaultpages((long)n, uio);
3252 
3253                 base = segmap_getmapflt(segkmap, vp, (off + mapon),
3254                     (uint32_t)n, !pagecreate, S_WRITE);
3255 
3256                 /*
3257                  * segmap_pagecreate() returns 1 if it calls
3258                  * page_create_va() to allocate any pages.
3259                  */
3260                 newpage = 0;
3261                 if (pagecreate) {
3262                         newpage = segmap_pagecreate(segkmap, base,
3263                             (size_t)n, 0);
3264                 }
3265 
3266                 premove_resid = uio->uio_resid;
3267                 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
3268 
3269                 if (pagecreate &&
3270                     uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
3271                         /*
3272                          * We created pages w/o initializing them completely,
3273                          * thus we need to zero the part that wasn't set up.
3274                          * This happens on most EOF write cases and if
3275                          * we had some sort of error during the uiomove.
3276                          */
3277                         int nzero, nmoved;
3278 
3279                         nmoved = (int)(uio->uio_loffset - (off + mapon));
3280                         ASSERT(nmoved >= 0 && nmoved <= n);
3281                         nzero = roundup(on + n, PAGESIZE) - nmoved;
3282                         ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE);
3283                         (void) kzero(base + mapon + nmoved, (uint32_t)nzero);
3284                 }
3285 
3286                 /*
3287                  * Unlock the pages allocated by page_create_va()
3288                  * in segmap_pagecreate()
3289                  */
3290                 if (newpage) {
3291                         segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
3292                 }
3293 
3294                 if (error) {
3295                         /*
3296                          * If we failed on a write, we may have already
3297                          * allocated file blocks as well as pages.  It's
3298                          * hard to undo the block allocation, but we must
3299                          * be sure to invalidate any pages that may have
3300                          * been allocated.
3301                          */
3302                         (void) segmap_release(segkmap, base, SM_INVAL);
3303                 } else {
3304                         flags = 0;
3305                         /*
3306                          * Force write back for synchronous write cases.
3307                          */
3308                         if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) {
3309                                 /*
3310                                  * If the sticky bit is set but the
3311                                  * execute bit is not set, we do a
3312                                  * synchronous write back and free
3313                                  * the page when done.  We set up swap
3314                                  * files to be handled this way to
3315                                  * prevent servers from keeping around
3316                                  * the client's swap pages too long.
3317                                  * XXX - there ought to be a better way.
3318                                  */
3319                                 if (IS_SWAPVP(vp)) {
3320                                         flags = SM_WRITE | SM_FREE |
3321                                             SM_DONTNEED;
3322                                         iupdat_flag = 0;
3323                                 } else {
3324                                         flags = SM_WRITE;
3325                                 }
3326                         } else if (((mapon + n) == MAXBSIZE) ||
3327                             IS_SWAPVP(vp)) {
3328                                 /*
3329                                  * Have written a whole block.
3330                                  * Start an asynchronous write and
3331                                  * mark the buffer to indicate that
3332                                  * it won't be needed again soon.
3333                                  */
3334                                 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED;
3335                         }
3336                         error = segmap_release(segkmap, base, flags);
3337 
3338                         /*
3339                          * If the operation failed and is synchronous,
3340                          * then we need to unwind what uiomove() last
3341                          * did so we can potentially return an error to
3342                          * the caller.  If this write operation was
3343                          * done in two pieces and the first succeeded,
3344                          * then we won't return an error for the second
3345                          * piece that failed.  However, we only want to
3346                          * return a resid value that reflects what was
3347                          * really done.
3348                          *
3349                          * Failures for non-synchronous operations can
3350                          * be ignored since the page subsystem will
3351                          * retry the operation until it succeeds or the
3352                          * file system is unmounted.
3353                          */
3354                         if (error) {
3355                                 if ((ioflag & (FSYNC | FDSYNC)) ||
3356                                     ip->i_type == VDIR) {
3357                                         uio->uio_resid = premove_resid;
3358                                 } else {
3359                                         error = 0;
3360                                 }
3361                         }
3362                 }
3363 
3364                 /*
3365                  * Re-acquire contents lock.
3366                  */
3367                 rw_enter(&ip->i_contents, RW_WRITER);
3368                 /*
3369                  * If the uiomove() failed or if a synchronous
3370                  * page push failed, fix up i_size.
3371                  */
3372                 if (error) {
3373                         if (i_size_changed) {
3374                                 /*
3375                                  * The uiomove failed, and we
3376                                  * allocated blocks,so get rid
3377                                  * of them.
3378                                  */
3379                                 (void) ud_itrunc(ip, old_i_size, 0, cr);
3380                         }
3381                 } else {
3382                         /*
3383                          * XXX - Can this be out of the loop?
3384                          */
3385                         ip->i_flag |= IUPD | ICHG;
3386                         if (i_size_changed) {
3387                                 ip->i_flag |= IATTCHG;
3388                         }
3389                         if ((ip->i_perm & (IEXEC | (IEXEC >> 5) |
3390                             (IEXEC >> 10))) != 0 &&
3391                             (ip->i_char & (ISUID | ISGID)) != 0 &&
3392                             secpolicy_vnode_setid_retain(cr,
3393                             (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) {
3394                                 /*
3395                                  * Clear Set-UID & Set-GID bits on
3396                                  * successful write if not privileged
3397                                  * and at least one of the execute bits
3398                                  * is set.  If we always clear Set-GID,
3399                                  * mandatory file and record locking is
3400                                  * unuseable.
3401                                  */
3402                                 ip->i_char &= ~(ISUID | ISGID);
3403                         }
3404                 }
3405         } while (error == 0 && uio->uio_resid > 0 && n != 0);
3406 
3407 out:
3408         /*
3409          * Inode is updated according to this table -
3410          *
3411          *      FSYNC   FDSYNC(posix.4)
3412          *      --------------------------
3413          *      always@ IATTCHG|IBDWRITE
3414          *
3415          * @ -  If we are doing synchronous write the only time we should
3416          *      not be sync'ing the ip here is if we have the stickyhack
3417          *      activated, the file is marked with the sticky bit and
3418          *      no exec bit, the file length has not been changed and
3419          *      no new blocks have been allocated during this write.
3420          */
3421         if ((ip->i_flag & ISYNC) != 0) {
3422                 /*
3423                  * we have eliminated nosync
3424                  */
3425                 if ((ip->i_flag & (IATTCHG|IBDWRITE)) ||
3426                     ((ioflag & FSYNC) && iupdat_flag)) {
3427                         ud_iupdat(ip, 1);
3428                 }
3429         }
3430 
3431         /*
3432          * If we've already done a partial-write, terminate
3433          * the write but return no error.
3434          */
3435         if (start_resid != uio->uio_resid) {
3436                 error = 0;
3437         }
3438         ip->i_flag &= ~(INOACC | ISYNC);
3439         ITIMES_NOLOCK(ip);
3440 
3441         return (error);
3442 }
3443 
3444 int32_t
3445 ud_multi_strat(struct ud_inode *ip,
3446         page_t *pp, struct buf *bp, u_offset_t start)
3447 {
3448         daddr_t bn;
3449         int32_t error = 0, io_count, contig, alloc_sz, i;
3450         uint32_t io_off;
3451         mio_master_t *mm = NULL;
3452         mio_slave_t *ms = NULL;
3453         struct buf *rbp;
3454 
3455         ASSERT(!(start & PAGEOFFSET));
3456 
3457         /*
3458          * Figure out how many buffers to allocate
3459          */
3460         io_count = 0;
3461         for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3462                 contig = 0;
3463                 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off),
3464                     &bn, &contig)) {
3465                         goto end;
3466                 }
3467                 if (contig == 0) {
3468                         goto end;
3469                 }
3470                 contig = MIN(contig, PAGESIZE - io_off);
3471                 if (bn != UDF_HOLE) {
3472                         io_count ++;
3473                 } else {
3474                         /*
3475                          * HOLE
3476                          */
3477                         if (bp->b_flags & B_READ) {
3478 
3479                                 /*
3480                                  * This is a hole and is read
3481                                  * it should be filled with 0's
3482                                  */
3483                                 pagezero(pp, io_off, contig);
3484                         }
3485                 }
3486         }
3487 
3488 
3489         if (io_count != 0) {
3490 
3491                 /*
3492                  * Allocate memory for all the
3493                  * required number of buffers
3494                  */
3495                 alloc_sz = sizeof (mio_master_t) +
3496                     (sizeof (mio_slave_t) * io_count);
3497                 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP);
3498                 if (mm == NULL) {
3499                         error = ENOMEM;
3500                         goto end;
3501                 }
3502 
3503                 /*
3504                  * initialize master
3505                  */
3506                 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL);
3507                 mm->mm_size = alloc_sz;
3508                 mm->mm_bp = bp;
3509                 mm->mm_resid = 0;
3510                 mm->mm_error = 0;
3511                 mm->mm_index = master_index++;
3512 
3513                 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3514 
3515                 /*
3516                  * Initialize buffers
3517                  */
3518                 io_count = 0;
3519                 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3520                         contig = 0;
3521                         if (error = ud_bmap_read(ip,
3522                             (u_offset_t)(start + io_off),
3523                             &bn, &contig)) {
3524                                 goto end;
3525                         }
3526                         ASSERT(contig);
3527                         if ((io_off + contig) > bp->b_bcount) {
3528                                 contig = bp->b_bcount - io_off;
3529                         }
3530                         if (bn != UDF_HOLE) {
3531                                 /*
3532                                  * Clone the buffer
3533                                  * and prepare to start I/O
3534                                  */
3535                                 ms->ms_ptr = mm;
3536                                 bioinit(&ms->ms_buf);
3537                                 rbp = bioclone(bp, io_off, (size_t)contig,
3538                                     bp->b_edev, bn, ud_slave_done,
3539                                     &ms->ms_buf, KM_NOSLEEP);
3540                                 ASSERT(rbp == &ms->ms_buf);
3541                                 mm->mm_resid += contig;
3542                                 io_count++;
3543                                 ms ++;
3544                         }
3545                 }
3546 
3547                 /*
3548                  * Start I/O's
3549                  */
3550                 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3551                 for (i = 0; i < io_count; i++) {
3552                         (void) bdev_strategy(&ms->ms_buf);
3553                         ms ++;
3554                 }
3555         }
3556 
3557 end:
3558         if (error != 0) {
3559                 bp->b_flags |= B_ERROR;
3560                 bp->b_error = error;
3561                 if (mm != NULL) {
3562                         mutex_destroy(&mm->mm_mutex);
3563                         kmem_free(mm, mm->mm_size);
3564                 }
3565         }
3566         return (error);
3567 }
3568 
3569 int32_t
3570 ud_slave_done(struct buf *bp)
3571 {
3572         mio_master_t *mm;
3573         int32_t resid;
3574 
3575         ASSERT(SEMA_HELD(&bp->b_sem));
3576         ASSERT((bp->b_flags & B_DONE) == 0);
3577 
3578         mm = ((mio_slave_t *)bp)->ms_ptr;
3579 
3580         /*
3581          * Propagate error and byte count info from slave struct to
3582          * the master struct
3583          */
3584         mutex_enter(&mm->mm_mutex);
3585         if (bp->b_flags & B_ERROR) {
3586 
3587                 /*
3588                  * If multiple slave buffers get
3589                  * error we forget the old errors
3590                  * this is ok because we any way
3591                  * cannot return multiple errors
3592                  */
3593                 mm->mm_error = bp->b_error;
3594         }
3595         mm->mm_resid -= bp->b_bcount;
3596         resid = mm->mm_resid;
3597         mutex_exit(&mm->mm_mutex);
3598 
3599         /*
3600          * free up the resources allocated to cloned buffers.
3601          */
3602         bp_mapout(bp);
3603         biofini(bp);
3604 
3605         if (resid == 0) {
3606 
3607                 /*
3608                  * This is the last I/O operation
3609                  * clean up and return the original buffer
3610                  */
3611                 if (mm->mm_error) {
3612                         mm->mm_bp->b_flags |= B_ERROR;
3613                         mm->mm_bp->b_error = mm->mm_error;
3614                 }
3615                 biodone(mm->mm_bp);
3616                 mutex_destroy(&mm->mm_mutex);
3617                 kmem_free(mm, mm->mm_size);
3618         }
3619         return (0);
3620 }