ilwluts New usr/src/uts/common/fs/udfs/udf

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2015, Joyent, Inc.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/t_lock.h>
  32 #include <sys/param.h>
  33 #include <sys/time.h>
  34 #include <sys/systm.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/resource.h>
  37 #include <sys/signal.h>
  38 #include <sys/cred.h>
  39 #include <sys/user.h>
  40 #include <sys/buf.h>
  41 #include <sys/vfs.h>
  42 #include <sys/vfs_opreg.h>
  43 #include <sys/stat.h>
  44 #include <sys/vnode.h>
  45 #include <sys/mode.h>
  46 #include <sys/proc.h>
  47 #include <sys/disp.h>
  48 #include <sys/file.h>
  49 #include <sys/fcntl.h>
  50 #include <sys/flock.h>
  51 #include <sys/kmem.h>
  52 #include <sys/uio.h>
  53 #include <sys/dnlc.h>
  54 #include <sys/conf.h>
  55 #include <sys/errno.h>
  56 #include <sys/mman.h>
  57 #include <sys/fbuf.h>
  58 #include <sys/pathname.h>
  59 #include <sys/debug.h>
  60 #include <sys/vmsystm.h>
  61 #include <sys/cmn_err.h>
  62 #include <sys/dirent.h>
  63 #include <sys/errno.h>
  64 #include <sys/modctl.h>
  65 #include <sys/statvfs.h>
  66 #include <sys/mount.h>
  67 #include <sys/sunddi.h>
  68 #include <sys/bootconf.h>
  69 #include <sys/policy.h>
  70 
  71 #include <vm/hat.h>
  72 #include <vm/page.h>
  73 #include <vm/pvn.h>
  74 #include <vm/as.h>
  75 #include <vm/seg.h>
  76 #include <vm/seg_map.h>
  77 #include <vm/seg_kmem.h>
  78 #include <vm/seg_vn.h>
  79 #include <vm/rm.h>
  80 #include <vm/page.h>
  81 #include <sys/swap.h>
  82 
  83 #include <fs/fs_subr.h>
  84 
  85 #include <sys/fs/udf_volume.h>
  86 #include <sys/fs/udf_inode.h>
  87 
  88 static int32_t udf_open(struct vnode **,
  89         int32_t, struct cred *, caller_context_t *);
  90 static int32_t udf_close(struct vnode *,
  91         int32_t, int32_t, offset_t, struct cred *, caller_context_t *);
  92 static int32_t udf_read(struct vnode *,
  93         struct uio *, int32_t, struct cred *, caller_context_t *);
  94 static int32_t udf_write(struct vnode *,
  95         struct uio *, int32_t, struct cred *, caller_context_t *);
  96 static int32_t udf_ioctl(struct vnode *,
  97         int32_t, intptr_t, int32_t, struct cred *, int32_t *,
  98         caller_context_t *);
  99 static int32_t udf_getattr(struct vnode *,
 100         struct vattr *, int32_t, struct cred *, caller_context_t *);
 101 static int32_t udf_setattr(struct vnode *,
 102         struct vattr *, int32_t, struct cred *, caller_context_t *);
 103 static int32_t udf_access(struct vnode *,
 104         int32_t, int32_t, struct cred *, caller_context_t *);
 105 static int32_t udf_lookup(struct vnode *,
 106         char *, struct vnode **, struct pathname *,
 107         int32_t, struct vnode *, struct cred *,
 108         caller_context_t *, int *, pathname_t *);
 109 static int32_t udf_create(struct vnode *,
 110         char *, struct vattr *, enum vcexcl,
 111         int32_t, struct vnode **, struct cred *, int32_t,
 112         caller_context_t *, vsecattr_t *);
 113 static int32_t udf_remove(struct vnode *,
 114         char *, struct cred *, caller_context_t *, int);
 115 static int32_t udf_link(struct vnode *,
 116         struct vnode *, char *, struct cred *, caller_context_t *, int);
 117 static int32_t udf_rename(struct vnode *,
 118         char *, struct vnode *, char *, struct cred *, caller_context_t *, int);
 119 static int32_t udf_mkdir(struct vnode *,
 120         char *, struct vattr *, struct vnode **, struct cred *,
 121         caller_context_t *, int, vsecattr_t *);
 122 static int32_t udf_rmdir(struct vnode *,
 123         char *, struct vnode *, struct cred *, caller_context_t *, int);
 124 static int32_t udf_readdir(struct vnode *,
 125         struct uio *, struct cred *, int32_t *, caller_context_t *, int);
 126 static int32_t udf_symlink(struct vnode *,
 127         char *, struct vattr *, char *, struct cred *, caller_context_t *, int);
 128 static int32_t udf_readlink(struct vnode *,
 129         struct uio *, struct cred *, caller_context_t *);
 130 static int32_t udf_fsync(struct vnode *,
 131         int32_t, struct cred *, caller_context_t *);
 132 static void udf_inactive(struct vnode *,
 133         struct cred *, caller_context_t *);
 134 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *);
 135 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *);
 136 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *);
 137 static int32_t udf_seek(struct vnode *, offset_t, offset_t *,
 138         caller_context_t *);
 139 static int32_t udf_frlock(struct vnode *, int32_t,
 140         struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *,
 141         caller_context_t *);
 142 static int32_t udf_space(struct vnode *, int32_t,
 143         struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *);
 144 static int32_t udf_getpage(struct vnode *, offset_t,
 145         size_t, uint32_t *, struct page **, size_t,
 146         struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *);
 147 static int32_t udf_putpage(struct vnode *, offset_t,
 148         size_t, int32_t, struct cred *, caller_context_t *);
 149 static int32_t udf_map(struct vnode *, offset_t, struct as *,
 150         caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 151         caller_context_t *);
 152 static int32_t udf_addmap(struct vnode *, offset_t, struct as *,
 153         caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 154         caller_context_t *);
 155 static int32_t udf_delmap(struct vnode *, offset_t, struct as *,
 156         caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *,
 157         caller_context_t *);
 158 static int32_t udf_l_pathconf(struct vnode *, int32_t,
 159         ulong_t *, struct cred *, caller_context_t *);
 160 static int32_t udf_pageio(struct vnode *, struct page *,
 161         u_offset_t, size_t, int32_t, struct cred *, caller_context_t *);
 162 
 163 int32_t ud_getpage_miss(struct vnode *, u_offset_t,
 164         size_t, struct seg *, caddr_t, page_t *pl[],
 165         size_t, enum seg_rw, int32_t);
 166 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t);
 167 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *);
 168 int32_t ud_page_fill(struct ud_inode *, page_t *,
 169         u_offset_t, uint32_t, u_offset_t *);
 170 int32_t ud_iodone(struct buf *);
 171 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 172 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 173 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t);
 174 int32_t ud_slave_done(struct buf *);
 175 
 176 /*
 177  * Structures to control multiple IO operations to get or put pages
 178  * that are backed by discontiguous blocks. The master struct is
 179  * a dummy that holds the original bp from pageio_setup. The
 180  * slave struct holds the working bp's to do the actual IO. Once
 181  * all the slave IOs complete. The master is processed as if a single
 182  * IO op has completed.
 183  */
 184 uint32_t master_index = 0;
 185 typedef struct mio_master {
 186         kmutex_t        mm_mutex;       /* protect the fields below */
 187         int32_t         mm_size;
 188         buf_t           *mm_bp;         /* original bp */
 189         int32_t         mm_resid;       /* bytes remaining to transfer */
 190         int32_t         mm_error;       /* accumulated error from slaves */
 191         int32_t         mm_index;       /* XXX debugging */
 192 } mio_master_t;
 193 
 194 typedef struct mio_slave {
 195         buf_t           ms_buf;         /* working buffer for this IO chunk */
 196         mio_master_t    *ms_ptr;        /* pointer to master */
 197 } mio_slave_t;
 198 
 199 struct vnodeops *udf_vnodeops;
 200 
 201 const fs_operation_def_t udf_vnodeops_template[] = {
 202         VOPNAME_OPEN,           { .vop_open = udf_open },
 203         VOPNAME_CLOSE,          { .vop_close = udf_close },
 204         VOPNAME_READ,           { .vop_read = udf_read },
 205         VOPNAME_WRITE,          { .vop_write = udf_write },
 206         VOPNAME_IOCTL,          { .vop_ioctl = udf_ioctl },
 207         VOPNAME_GETATTR,        { .vop_getattr = udf_getattr },
 208         VOPNAME_SETATTR,        { .vop_setattr = udf_setattr },
 209         VOPNAME_ACCESS,         { .vop_access = udf_access },
 210         VOPNAME_LOOKUP,         { .vop_lookup = udf_lookup },
 211         VOPNAME_CREATE,         { .vop_create = udf_create },
 212         VOPNAME_REMOVE,         { .vop_remove = udf_remove },
 213         VOPNAME_LINK,           { .vop_link = udf_link },
 214         VOPNAME_RENAME,         { .vop_rename = udf_rename },
 215         VOPNAME_MKDIR,          { .vop_mkdir = udf_mkdir },
 216         VOPNAME_RMDIR,          { .vop_rmdir = udf_rmdir },
 217         VOPNAME_READDIR,        { .vop_readdir = udf_readdir },
 218         VOPNAME_SYMLINK,        { .vop_symlink = udf_symlink },
 219         VOPNAME_READLINK,       { .vop_readlink = udf_readlink },
 220         VOPNAME_FSYNC,          { .vop_fsync = udf_fsync },
 221         VOPNAME_INACTIVE,       { .vop_inactive = udf_inactive },
 222         VOPNAME_FID,            { .vop_fid = udf_fid },
 223         VOPNAME_RWLOCK,         { .vop_rwlock = udf_rwlock },
 224         VOPNAME_RWUNLOCK,       { .vop_rwunlock = udf_rwunlock },
 225         VOPNAME_SEEK,           { .vop_seek = udf_seek },
 226         VOPNAME_FRLOCK,         { .vop_frlock = udf_frlock },
 227         VOPNAME_SPACE,          { .vop_space = udf_space },
 228         VOPNAME_GETPAGE,        { .vop_getpage = udf_getpage },
 229         VOPNAME_PUTPAGE,        { .vop_putpage = udf_putpage },
 230         VOPNAME_MAP,            { .vop_map = udf_map },
 231         VOPNAME_ADDMAP,         { .vop_addmap = udf_addmap },
 232         VOPNAME_DELMAP,         { .vop_delmap = udf_delmap },
 233         VOPNAME_PATHCONF,       { .vop_pathconf = udf_l_pathconf },
 234         VOPNAME_PAGEIO,         { .vop_pageio = udf_pageio },
 235         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 236         NULL,                   NULL
 237 };
 238 
 239 /* ARGSUSED */
 240 static int32_t
 241 udf_open(
 242         struct vnode **vpp,
 243         int32_t flag,
 244         struct cred *cr,
 245         caller_context_t *ct)
 246 {
 247         ud_printf("udf_open\n");
 248 
 249         return (0);
 250 }
 251 
 252 /* ARGSUSED */
 253 static int32_t
 254 udf_close(
 255         struct vnode *vp,
 256         int32_t flag,
 257         int32_t count,
 258         offset_t offset,
 259         struct cred *cr,
 260         caller_context_t *ct)
 261 {
 262         struct ud_inode *ip = VTOI(vp);
 263 
 264         ud_printf("udf_close\n");
 265 
 266         ITIMES(ip);
 267 
 268         cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 269         cleanshares(vp, ttoproc(curthread)->p_pid);
 270 
 271         /*
 272          * Push partially filled cluster at last close.
 273          * ``last close'' is approximated because the dnlc
 274          * may have a hold on the vnode.
 275          */
 276         if (vp->v_count <= 2 && vp->v_type != VBAD) {
 277                 struct ud_inode *ip = VTOI(vp);
 278                 if (ip->i_delaylen) {
 279                         (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen,
 280                             B_ASYNC | B_FREE, cr);
 281                         ip->i_delaylen = 0;
 282                 }
 283         }
 284 
 285         return (0);
 286 }
 287 
 288 /* ARGSUSED */
 289 static int32_t
 290 udf_read(
 291         struct vnode *vp,
 292         struct uio *uiop,
 293         int32_t ioflag,
 294         struct cred *cr,
 295         caller_context_t *ct)
 296 {
 297         struct ud_inode *ip = VTOI(vp);
 298         int32_t error;
 299 
 300         ud_printf("udf_read\n");
 301 
 302         ASSERT(RW_READ_HELD(&ip->i_rwlock));
 303 
 304         if (MANDLOCK(vp, ip->i_char)) {
 305                 /*
 306                  * udf_getattr ends up being called by chklock
 307                  */
 308                 error = chklock(vp, FREAD, uiop->uio_loffset,
 309                     uiop->uio_resid, uiop->uio_fmode, ct);
 310                 if (error) {
 311                         goto end;
 312                 }
 313         }
 314 
 315         rw_enter(&ip->i_contents, RW_READER);
 316         error = ud_rdip(ip, uiop, ioflag, cr);
 317         rw_exit(&ip->i_contents);
 318 
 319 end:
 320         return (error);
 321 }
 322 
 323 
 324 int32_t ud_WRITES = 1;
 325 int32_t ud_HW = 96 * 1024;
 326 int32_t ud_LW = 64 * 1024;
 327 int32_t ud_throttles = 0;
 328 
 329 /* ARGSUSED */
 330 static int32_t
 331 udf_write(
 332         struct vnode *vp,
 333         struct uio *uiop,
 334         int32_t ioflag,
 335         struct cred *cr,
 336         caller_context_t *ct)
 337 {
 338         struct ud_inode *ip = VTOI(vp);
 339         int32_t error = 0;
 340 
 341         ud_printf("udf_write\n");
 342 
 343         ASSERT(RW_WRITE_HELD(&ip->i_rwlock));
 344 
 345         if (MANDLOCK(vp, ip->i_char)) {
 346                 /*
 347                  * ud_getattr ends up being called by chklock
 348                  */
 349                 error = chklock(vp, FWRITE, uiop->uio_loffset,
 350                     uiop->uio_resid, uiop->uio_fmode, ct);
 351                 if (error) {
 352                         goto end;
 353                 }
 354         }
 355         /*
 356          * Throttle writes.
 357          */
 358         mutex_enter(&ip->i_tlock);
 359         if (ud_WRITES && (ip->i_writes > ud_HW)) {
 360                 while (ip->i_writes > ud_HW) {
 361                         ud_throttles++;
 362                         cv_wait(&ip->i_wrcv, &ip->i_tlock);
 363                 }
 364         }
 365         mutex_exit(&ip->i_tlock);
 366 
 367         /*
 368          * Write to the file
 369          */
 370         rw_enter(&ip->i_contents, RW_WRITER);
 371         if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) {
 372                 /*
 373                  * In append mode start at end of file.
 374                  */
 375                 uiop->uio_loffset = ip->i_size;
 376         }
 377         error = ud_wrip(ip, uiop, ioflag, cr);
 378         rw_exit(&ip->i_contents);
 379 
 380 end:
 381         return (error);
 382 }
 383 
 384 /* ARGSUSED */
 385 static int32_t
 386 udf_ioctl(
 387         struct vnode *vp,
 388         int32_t cmd,
 389         intptr_t arg,
 390         int32_t flag,
 391         struct cred *cr,
 392         int32_t *rvalp,
 393         caller_context_t *ct)
 394 {
 395         return (ENOTTY);
 396 }
 397 
 398 /* ARGSUSED */
 399 static int32_t
 400 udf_getattr(
 401         struct vnode *vp,
 402         struct vattr *vap,
 403         int32_t flags,
 404         struct cred *cr,
 405         caller_context_t *ct)
 406 {
 407         struct ud_inode *ip = VTOI(vp);
 408 
 409         ud_printf("udf_getattr\n");
 410 
 411         if (vap->va_mask == AT_SIZE) {
 412                 /*
 413                  * for performance, if only the size is requested don't bother
 414                  * with anything else.
 415                  */
 416                 vap->va_size = ip->i_size;
 417                 return (0);
 418         }
 419 
 420         rw_enter(&ip->i_contents, RW_READER);
 421 
 422         vap->va_type = vp->v_type;
 423         vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 424 
 425         vap->va_uid = ip->i_uid;
 426         vap->va_gid = ip->i_gid;
 427         vap->va_fsid = ip->i_dev;
 428         vap->va_nodeid = ip->i_icb_lbano;
 429         vap->va_nlink = ip->i_nlink;
 430         vap->va_size = ip->i_size;
 431         vap->va_seq = ip->i_seq;
 432         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 433                 vap->va_rdev = ip->i_rdev;
 434         } else {
 435                 vap->va_rdev = 0;
 436         }
 437 
 438         mutex_enter(&ip->i_tlock);
 439         ITIMES_NOLOCK(ip);      /* mark correct time in inode */
 440         vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec;
 441         vap->va_atime.tv_nsec = ip->i_atime.tv_nsec;
 442         vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec;
 443         vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec;
 444         vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec;
 445         vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec;
 446         mutex_exit(&ip->i_tlock);
 447 
 448         switch (ip->i_type) {
 449                 case VBLK:
 450                         vap->va_blksize = MAXBSIZE;
 451                         break;
 452                 case VCHR:
 453                         vap->va_blksize = MAXBSIZE;
 454                         break;
 455                 default:
 456                         vap->va_blksize = ip->i_udf->udf_lbsize;
 457                         break;
 458         }
 459         vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift;
 460 
 461         rw_exit(&ip->i_contents);
 462 
 463         return (0);
 464 }
 465 
 466 static int
 467 ud_iaccess_vmode(void *ip, int mode, struct cred *cr)
 468 {
 469         return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0));
 470 }
 471 
 472 /*ARGSUSED4*/
 473 static int32_t
 474 udf_setattr(
 475         struct vnode *vp,
 476         struct vattr *vap,
 477         int32_t flags,
 478         struct cred *cr,
 479         caller_context_t *ct)
 480 {
 481         int32_t error = 0;
 482         uint32_t mask = vap->va_mask;
 483         struct ud_inode *ip;
 484         timestruc_t now;
 485         struct vattr ovap;
 486 
 487         ud_printf("udf_setattr\n");
 488 
 489         ip = VTOI(vp);
 490 
 491         /*
 492          * not updates allowed to 4096 files
 493          */
 494         if (ip->i_astrat == STRAT_TYPE4096) {
 495                 return (EINVAL);
 496         }
 497 
 498         /*
 499          * Cannot set these attributes
 500          */
 501         if (mask & AT_NOSET) {
 502                 return (EINVAL);
 503         }
 504 
 505         rw_enter(&ip->i_rwlock, RW_WRITER);
 506         rw_enter(&ip->i_contents, RW_WRITER);
 507 
 508         ovap.va_uid = ip->i_uid;
 509         ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 510         error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags,
 511             ud_iaccess_vmode, ip);
 512         if (error)
 513                 goto update_inode;
 514 
 515         mask = vap->va_mask;
 516         /*
 517          * Change file access modes.
 518          */
 519         if (mask & AT_MODE) {
 520                 ip->i_perm = VA2UD_PERM(vap->va_mode);
 521                 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX);
 522                 mutex_enter(&ip->i_tlock);
 523                 ip->i_flag |= ICHG;
 524                 mutex_exit(&ip->i_tlock);
 525         }
 526         if (mask & (AT_UID|AT_GID)) {
 527                 if (mask & AT_UID) {
 528                         ip->i_uid = vap->va_uid;
 529                 }
 530                 if (mask & AT_GID) {
 531                         ip->i_gid = vap->va_gid;
 532                 }
 533                 mutex_enter(&ip->i_tlock);
 534                 ip->i_flag |= ICHG;
 535                 mutex_exit(&ip->i_tlock);
 536         }
 537         /*
 538          * Truncate file.  Must have write permission and not be a directory.
 539          */
 540         if (mask & AT_SIZE) {
 541                 if (vp->v_type == VDIR) {
 542                         error = EISDIR;
 543                         goto update_inode;
 544                 }
 545                 if (error = ud_iaccess(ip, IWRITE, cr, 0)) {
 546                         goto update_inode;
 547                 }
 548                 if (vap->va_size > MAXOFFSET_T) {
 549                         error = EFBIG;
 550                         goto update_inode;
 551                 }
 552                 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) {
 553                         goto update_inode;
 554                 }
 555 
 556                 if (vap->va_size == 0)
 557                         vnevent_truncate(vp, ct);
 558         }
 559         /*
 560          * Change file access or modified times.
 561          */
 562         if (mask & (AT_ATIME|AT_MTIME)) {
 563                 mutex_enter(&ip->i_tlock);
 564                 if (mask & AT_ATIME) {
 565                         ip->i_atime.tv_sec = vap->va_atime.tv_sec;
 566                         ip->i_atime.tv_nsec = vap->va_atime.tv_nsec;
 567                         ip->i_flag &= ~IACC;
 568                 }
 569                 if (mask & AT_MTIME) {
 570                         ip->i_mtime.tv_sec = vap->va_mtime.tv_sec;
 571                         ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec;
 572                         gethrestime(&now);
 573                         ip->i_ctime.tv_sec = now.tv_sec;
 574                         ip->i_ctime.tv_nsec = now.tv_nsec;
 575                         ip->i_flag &= ~(IUPD|ICHG);
 576                         ip->i_flag |= IMODTIME;
 577                 }
 578                 ip->i_flag |= IMOD;
 579                 mutex_exit(&ip->i_tlock);
 580         }
 581 
 582 update_inode:
 583         if (curthread->t_flag & T_DONTPEND) {
 584                 ud_iupdat(ip, 1);
 585         } else {
 586                 ITIMES_NOLOCK(ip);
 587         }
 588         rw_exit(&ip->i_contents);
 589         rw_exit(&ip->i_rwlock);
 590 
 591         return (error);
 592 }
 593 
 594 /* ARGSUSED */
 595 static int32_t
 596 udf_access(
 597         struct vnode *vp,
 598         int32_t mode,
 599         int32_t flags,
 600         struct cred *cr,
 601         caller_context_t *ct)
 602 {
 603         struct ud_inode *ip = VTOI(vp);
 604 
 605         ud_printf("udf_access\n");
 606 
 607         if (ip->i_udf == NULL) {
 608                 return (EIO);
 609         }
 610 
 611         return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1));
 612 }
 613 
 614 int32_t udfs_stickyhack = 1;
 615 
 616 /* ARGSUSED */
 617 static int32_t
 618 udf_lookup(
 619         struct vnode *dvp,
 620         char *nm,
 621         struct vnode **vpp,
 622         struct pathname *pnp,
 623         int32_t flags,
 624         struct vnode *rdir,
 625         struct cred *cr,
 626         caller_context_t *ct,
 627         int *direntflags,
 628         pathname_t *realpnp)
 629 {
 630         int32_t error;
 631         struct vnode *vp;
 632         struct ud_inode *ip, *xip;
 633 
 634         ud_printf("udf_lookup\n");
 635         /*
 636          * Null component name is a synonym for directory being searched.
 637          */
 638         if (*nm == '\0') {
 639                 VN_HOLD(dvp);
 640                 *vpp = dvp;
 641                 error = 0;
 642                 goto out;
 643         }
 644 
 645         /*
 646          * Fast path: Check the directory name lookup cache.
 647          */
 648         ip = VTOI(dvp);
 649         if (vp = dnlc_lookup(dvp, nm)) {
 650                 /*
 651                  * Check accessibility of directory.
 652                  */
 653                 if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) {
 654                         VN_RELE(vp);
 655                 }
 656                 xip = VTOI(vp);
 657         } else {
 658                 error = ud_dirlook(ip, nm, &xip, cr, 1);
 659                 ITIMES(ip);
 660         }
 661 
 662         if (error == 0) {
 663                 ip = xip;
 664                 *vpp = ITOV(ip);
 665                 if ((ip->i_type != VDIR) &&
 666                     (ip->i_char & ISVTX) &&
 667                     ((ip->i_perm & IEXEC) == 0) &&
 668                     udfs_stickyhack) {
 669                         mutex_enter(&(*vpp)->v_lock);
 670                         (*vpp)->v_flag |= VISSWAP;
 671                         mutex_exit(&(*vpp)->v_lock);
 672                 }
 673                 ITIMES(ip);
 674                 /*
 675                  * If vnode is a device return special vnode instead.
 676                  */
 677                 if (IS_DEVVP(*vpp)) {
 678                         struct vnode *newvp;
 679                         newvp = specvp(*vpp, (*vpp)->v_rdev,
 680                             (*vpp)->v_type, cr);
 681                         VN_RELE(*vpp);
 682                         if (newvp == NULL) {
 683                                 error = ENOSYS;
 684                         } else {
 685                                 *vpp = newvp;
 686                         }
 687                 }
 688         }
 689 out:
 690         return (error);
 691 }
 692 
 693 /* ARGSUSED */
 694 static int32_t
 695 udf_create(
 696         struct vnode *dvp,
 697         char *name,
 698         struct vattr *vap,
 699         enum vcexcl excl,
 700         int32_t mode,
 701         struct vnode **vpp,
 702         struct cred *cr,
 703         int32_t flag,
 704         caller_context_t *ct,
 705         vsecattr_t *vsecp)
 706 {
 707         int32_t error;
 708         struct ud_inode *ip = VTOI(dvp), *xip;
 709 
 710         ud_printf("udf_create\n");
 711 
 712         if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
 713                 vap->va_mode &= ~VSVTX;
 714 
 715         if (*name == '\0') {
 716                 /*
 717                  * Null component name refers to the directory itself.
 718                  */
 719                 VN_HOLD(dvp);
 720                 ITIMES(ip);
 721                 error = EEXIST;
 722         } else {
 723                 xip = NULL;
 724                 rw_enter(&ip->i_rwlock, RW_WRITER);
 725                 error = ud_direnter(ip, name, DE_CREATE,
 726                     (struct ud_inode *)0, (struct ud_inode *)0,
 727                     vap, &xip, cr, ct);
 728                 rw_exit(&ip->i_rwlock);
 729                 ITIMES(ip);
 730                 ip = xip;
 731         }
 732         if (ip != NULL) {
 733                 rw_enter(&ip->i_contents, RW_WRITER);
 734         }
 735 
 736         /*
 737          * If the file already exists and this is a non-exclusive create,
 738          * check permissions and allow access for non-directories.
 739          * Read-only create of an existing directory is also allowed.
 740          * We fail an exclusive create of anything which already exists.
 741          */
 742         if (error == EEXIST) {
 743                 if (excl == NONEXCL) {
 744                         if ((ip->i_type == VDIR) && (mode & VWRITE)) {
 745                                 error = EISDIR;
 746                         } else if (mode) {
 747                                 error = ud_iaccess(ip,
 748                                     UD_UPERM2DPERM(mode), cr, 0);
 749                         } else {
 750                                 error = 0;
 751                         }
 752                 }
 753                 if (error) {
 754                         rw_exit(&ip->i_contents);
 755                         VN_RELE(ITOV(ip));
 756                         goto out;
 757                 } else if ((ip->i_type == VREG) &&
 758                     (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
 759                         /*
 760                          * Truncate regular files, if requested by caller.
 761                          * Grab i_rwlock to make sure no one else is
 762                          * currently writing to the file (we promised
 763                          * bmap we would do this).
 764                          * Must get the locks in the correct order.
 765                          */
 766                         if (ip->i_size == 0) {
 767                                 ip->i_flag |= ICHG | IUPD;
 768                         } else {
 769                                 rw_exit(&ip->i_contents);
 770                                 rw_enter(&ip->i_rwlock, RW_WRITER);
 771                                 rw_enter(&ip->i_contents, RW_WRITER);
 772                                 (void) ud_itrunc(ip, 0, 0, cr);
 773                                 rw_exit(&ip->i_rwlock);
 774                         }
 775                         vnevent_create(ITOV(ip), ct);
 776                 }
 777         }
 778 
 779         if (error == 0) {
 780                 *vpp = ITOV(ip);
 781                 ITIMES(ip);
 782         }
 783         if (ip != NULL) {
 784                 rw_exit(&ip->i_contents);
 785         }
 786         if (error) {
 787                 goto out;
 788         }
 789 
 790         /*
 791          * If vnode is a device return special vnode instead.
 792          */
 793         if (!error && IS_DEVVP(*vpp)) {
 794                 struct vnode *newvp;
 795 
 796                 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
 797                 VN_RELE(*vpp);
 798                 if (newvp == NULL) {
 799                         error = ENOSYS;
 800                         goto out;
 801                 }
 802                 *vpp = newvp;
 803         }
 804 out:
 805         return (error);
 806 }
 807 
 808 /* ARGSUSED */
 809 static int32_t
 810 udf_remove(
 811         struct vnode *vp,
 812         char *nm,
 813         struct cred *cr,
 814         caller_context_t *ct,
 815         int flags)
 816 {
 817         int32_t error;
 818         struct ud_inode *ip = VTOI(vp);
 819 
 820         ud_printf("udf_remove\n");
 821 
 822         rw_enter(&ip->i_rwlock, RW_WRITER);
 823         error = ud_dirremove(ip, nm,
 824             (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
 825         rw_exit(&ip->i_rwlock);
 826         ITIMES(ip);
 827 
 828         return (error);
 829 }
 830 
 831 /* ARGSUSED */
 832 static int32_t
 833 udf_link(
 834         struct vnode *tdvp,
 835         struct vnode *svp,
 836         char *tnm,
 837         struct cred *cr,
 838         caller_context_t *ct,
 839         int flags)
 840 {
 841         int32_t error;
 842         struct vnode *realvp;
 843         struct ud_inode *sip;
 844         struct ud_inode *tdp;
 845 
 846         ud_printf("udf_link\n");
 847         if (VOP_REALVP(svp, &realvp, ct) == 0) {
 848                 svp = realvp;
 849         }
 850 
 851         /*
 852          * Do not allow links to directories
 853          */
 854         if (svp->v_type == VDIR) {
 855                 return (EPERM);
 856         }
 857 
 858         sip = VTOI(svp);
 859 
 860         if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
 861                 return (EPERM);
 862 
 863         tdp = VTOI(tdvp);
 864 
 865         rw_enter(&tdp->i_rwlock, RW_WRITER);
 866         error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0,
 867             sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
 868         rw_exit(&tdp->i_rwlock);
 869         ITIMES(sip);
 870         ITIMES(tdp);
 871 
 872         if (error == 0) {
 873                 vnevent_link(svp, ct);
 874         }
 875 
 876         return (error);
 877 }
 878 
 879 /* ARGSUSED */
 880 static int32_t
 881 udf_rename(
 882         struct vnode *sdvp,
 883         char *snm,
 884         struct vnode *tdvp,
 885         char *tnm,
 886         struct cred *cr,
 887         caller_context_t *ct,
 888         int flags)
 889 {
 890         int32_t error = 0;
 891         struct udf_vfs *udf_vfsp;
 892         struct ud_inode *sip;           /* source inode */
 893         struct ud_inode *tip;           /* target inode */
 894         struct ud_inode *sdp, *tdp;     /* source and target parent inode */
 895         struct vnode *realvp;
 896 
 897         ud_printf("udf_rename\n");
 898 
 899         if (VOP_REALVP(tdvp, &realvp, ct) == 0) {
 900                 tdvp = realvp;
 901         }
 902 
 903         sdp = VTOI(sdvp);
 904         tdp = VTOI(tdvp);
 905 
 906         udf_vfsp = sdp->i_udf;
 907 
 908         mutex_enter(&udf_vfsp->udf_rename_lck);
 909         /*
 910          * Look up inode of file we're supposed to rename.
 911          */
 912         if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) {
 913                 mutex_exit(&udf_vfsp->udf_rename_lck);
 914                 return (error);
 915         }
 916         /*
 917          * be sure this is not a directory with another file system mounted
 918          * over it.  If it is just give up the locks, and return with
 919          * EBUSY
 920          */
 921         if (vn_mountedvfs(ITOV(sip)) != NULL) {
 922                 error = EBUSY;
 923                 goto errout;
 924         }
 925         /*
 926          * Make sure we can delete the source entry.  This requires
 927          * write permission on the containing directory.  If that
 928          * directory is "sticky" it further requires (except for
 929          * privileged users) that the user own the directory or the
 930          * source entry, or else have permission to write the source
 931          * entry.
 932          */
 933         rw_enter(&sdp->i_contents, RW_READER);
 934         rw_enter(&sip->i_contents, RW_READER);
 935         if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 ||
 936             (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) {
 937                 rw_exit(&sip->i_contents);
 938                 rw_exit(&sdp->i_contents);
 939                 ITIMES(sip);
 940                 goto errout;
 941         }
 942 
 943         /*
 944          * Check for renaming '.' or '..' or alias of '.'
 945          */
 946         if ((strcmp(snm, ".") == 0) ||
 947             (strcmp(snm, "..") == 0) ||
 948             (sdp == sip)) {
 949                 error = EINVAL;
 950                 rw_exit(&sip->i_contents);
 951                 rw_exit(&sdp->i_contents);
 952                 goto errout;
 953         }
 954 
 955         rw_exit(&sip->i_contents);
 956         rw_exit(&sdp->i_contents);
 957 
 958         if (ud_dirlook(tdp, tnm, &tip, cr, 0) == 0) {
 959                 vnevent_pre_rename_dest(ITOV(tip), tdvp, tnm, ct);
 960                 VN_RELE(ITOV(tip));
 961         }
 962 
 963         /* Notify the target dir. if not the same as the source dir. */
 964         if (sdvp != tdvp)
 965                 vnevent_pre_rename_dest_dir(tdvp, ITOV(sip), tnm, ct);
 966 
 967         vnevent_pre_rename_src(ITOV(sip), sdvp, snm, ct);
 968 
 969         /*
 970          * Link source to the target.
 971          */
 972         rw_enter(&tdp->i_rwlock, RW_WRITER);
 973         if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip,
 974             (struct vattr *)0, (struct ud_inode **)0, cr, ct)) {
 975                 /*
 976                  * ESAME isn't really an error; it indicates that the
 977                  * operation should not be done because the source and target
 978                  * are the same file, but that no error should be reported.
 979                  */
 980                 if (error == ESAME) {
 981                         error = 0;
 982                 }
 983                 rw_exit(&tdp->i_rwlock);
 984                 goto errout;
 985         }
 986         rw_exit(&tdp->i_rwlock);
 987 
 988         rw_enter(&sdp->i_rwlock, RW_WRITER);
 989         /*
 990          * Unlink the source.
 991          * Remove the source entry.  ud_dirremove() checks that the entry
 992          * still reflects sip, and returns an error if it doesn't.
 993          * If the entry has changed just forget about it.  Release
 994          * the source inode.
 995          */
 996         if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
 997             DR_RENAME, cr, ct)) == ENOENT) {
 998                 error = 0;
 999         }
1000         rw_exit(&sdp->i_rwlock);
1001 
1002         if (error == 0) {
1003                 vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
1004                 /*
1005                  * vnevent_rename_dest and vnevent_rename_dest_dir are called
1006                  * in ud_direnter().
1007                  */
1008         }
1009 
1010 errout:
1011         ITIMES(sdp);
1012         ITIMES(tdp);
1013         VN_RELE(ITOV(sip));
1014         mutex_exit(&udf_vfsp->udf_rename_lck);
1015 
1016         return (error);
1017 }
1018 
1019 /* ARGSUSED */
1020 static int32_t
1021 udf_mkdir(
1022         struct vnode *dvp,
1023         char *dirname,
1024         struct vattr *vap,
1025         struct vnode **vpp,
1026         struct cred *cr,
1027         caller_context_t *ct,
1028         int flags,
1029         vsecattr_t *vsecp)
1030 {
1031         int32_t error;
1032         struct ud_inode *ip;
1033         struct ud_inode *xip;
1034 
1035         ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1036 
1037         ud_printf("udf_mkdir\n");
1038 
1039         ip = VTOI(dvp);
1040         rw_enter(&ip->i_rwlock, RW_WRITER);
1041         error = ud_direnter(ip, dirname, DE_MKDIR,
1042             (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
1043         rw_exit(&ip->i_rwlock);
1044         ITIMES(ip);
1045         if (error == 0) {
1046                 ip = xip;
1047                 *vpp = ITOV(ip);
1048                 ITIMES(ip);
1049         } else if (error == EEXIST) {
1050                 ITIMES(xip);
1051                 VN_RELE(ITOV(xip));
1052         }
1053 
1054         return (error);
1055 }
1056 
1057 /* ARGSUSED */
1058 static int32_t
1059 udf_rmdir(
1060         struct vnode *vp,
1061         char *nm,
1062         struct vnode *cdir,
1063         struct cred *cr,
1064         caller_context_t *ct,
1065         int flags)
1066 {
1067         int32_t error;
1068         struct ud_inode *ip = VTOI(vp);
1069 
1070         ud_printf("udf_rmdir\n");
1071 
1072         rw_enter(&ip->i_rwlock, RW_WRITER);
1073         error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR,
1074             cr, ct);
1075         rw_exit(&ip->i_rwlock);
1076         ITIMES(ip);
1077 
1078         return (error);
1079 }
1080 
1081 /* ARGSUSED */
1082 static int32_t
1083 udf_readdir(
1084         struct vnode *vp,
1085         struct uio *uiop,
1086         struct cred *cr,
1087         int32_t *eofp,
1088         caller_context_t *ct,
1089         int flags)
1090 {
1091         struct ud_inode *ip;
1092         struct dirent64 *nd;
1093         struct udf_vfs *udf_vfsp;
1094         int32_t error = 0, len, outcount = 0;
1095         uint32_t dirsiz, offset;
1096         uint32_t bufsize, ndlen, dummy;
1097         caddr_t outbuf;
1098         caddr_t outb, end_outb;
1099         struct iovec *iovp;
1100 
1101         uint8_t *dname;
1102         int32_t length;
1103 
1104         uint8_t *buf = NULL;
1105 
1106         struct fbuf *fbp = NULL;
1107         struct file_id *fid;
1108         uint8_t *name;
1109 
1110 
1111         ud_printf("udf_readdir\n");
1112 
1113         ip = VTOI(vp);
1114         udf_vfsp = ip->i_udf;
1115 
1116         dirsiz = ip->i_size;
1117         if ((uiop->uio_offset >= dirsiz) ||
1118             (ip->i_nlink <= 0)) {
1119                 if (eofp) {
1120                         *eofp = 1;
1121                 }
1122                 return (0);
1123         }
1124 
1125         offset = uiop->uio_offset;
1126         iovp = uiop->uio_iov;
1127         bufsize = iovp->iov_len;
1128 
1129         outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP);
1130         end_outb = outb + bufsize;
1131         nd = (struct dirent64 *)outbuf;
1132 
1133         dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP);
1134         buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);
1135 
1136         if (offset == 0) {
1137                 len = DIRENT64_RECLEN(1);
1138                 if (((caddr_t)nd + len) >= end_outb) {
1139                         error = EINVAL;
1140                         goto end;
1141                 }
1142                 nd->d_ino = ip->i_icb_lbano;
1143                 nd->d_reclen = (uint16_t)len;
1144                 nd->d_off = 0x10;
1145                 nd->d_name[0] = '.';
1146                 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1);
1147                 nd = (struct dirent64 *)((char *)nd + nd->d_reclen);
1148                 outcount++;
1149         } else if (offset == 0x10) {
1150                 offset = 0;
1151         }
1152 
1153         while (offset < dirsiz) {
1154                 error = ud_get_next_fid(ip, &fbp,
1155                     offset, &fid, &name, buf);
1156                 if (error != 0) {
1157                         break;
1158                 }
1159 
1160                 if ((fid->fid_flags & FID_DELETED) == 0) {
1161                         if (fid->fid_flags & FID_PARENT) {
1162 
1163                                 len = DIRENT64_RECLEN(2);
1164                                 if (((caddr_t)nd + len) >= end_outb) {
1165                                         error = EINVAL;
1166                                         break;
1167                                 }
1168 
1169                                 nd->d_ino = ip->i_icb_lbano;
1170                                 nd->d_reclen = (uint16_t)len;
1171                                 nd->d_off = offset + FID_LEN(fid);
1172                                 nd->d_name[0] = '.';
1173                                 nd->d_name[1] = '.';
1174                                 bzero(&nd->d_name[2],
1175                                     DIRENT64_NAMELEN(len) - 2);
1176                                 nd = (struct dirent64 *)
1177                                     ((char *)nd + nd->d_reclen);
1178                         } else {
1179                                 if ((error = ud_uncompress(fid->fid_idlen,
1180                                     &length, name, dname)) != 0) {
1181                                         break;
1182                                 }
1183                                 if (length == 0) {
1184                                         offset += FID_LEN(fid);
1185                                         continue;
1186                                 }
1187                                 len = DIRENT64_RECLEN(length);
1188                                 if (((caddr_t)nd + len) >= end_outb) {
1189                                         if (!outcount) {
1190                                                 error = EINVAL;
1191                                         }
1192                                         break;
1193                                 }
1194                                 (void) strncpy(nd->d_name,
1195                                     (caddr_t)dname, length);
1196                                 bzero(&nd->d_name[length],
1197                                     DIRENT64_NAMELEN(len) - length);
1198                                 nd->d_ino = ud_xlate_to_daddr(udf_vfsp,
1199                                     SWAP_16(fid->fid_icb.lad_ext_prn),
1200                                     SWAP_32(fid->fid_icb.lad_ext_loc), 1,
1201                                     &dummy);
1202                                 nd->d_reclen = (uint16_t)len;
1203                                 nd->d_off = offset + FID_LEN(fid);
1204                                 nd = (struct dirent64 *)
1205                                     ((char *)nd + nd->d_reclen);
1206                         }
1207                         outcount++;
1208                 }
1209 
1210                 offset += FID_LEN(fid);
1211         }
1212 
1213 end:
1214         if (fbp != NULL) {
1215                 fbrelse(fbp, S_OTHER);
1216         }
1217         ndlen = ((char *)nd - outbuf);
1218         /*
1219          * In case of error do not call uiomove.
1220          * Return the error to the caller.
1221          */
1222         if ((error == 0) && (ndlen != 0)) {
1223                 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop);
1224                 uiop->uio_offset = offset;
1225         }
1226         kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize);
1227         kmem_free((caddr_t)dname, 1024);
1228         kmem_free(outbuf, (uint32_t)bufsize);
1229         if (eofp && error == 0) {
1230                 *eofp = (uiop->uio_offset >= dirsiz);
1231         }
1232         return (error);
1233 }
1234 
1235 /* ARGSUSED */
1236 static int32_t
1237 udf_symlink(
1238         struct vnode *dvp,
1239         char *linkname,
1240         struct vattr *vap,
1241         char *target,
1242         struct cred *cr,
1243         caller_context_t *ct,
1244         int flags)
1245 {
1246         int32_t error = 0, outlen;
1247         uint32_t ioflag = 0;
1248         struct ud_inode *ip, *dip = VTOI(dvp);
1249 
1250         struct path_comp *pc;
1251         int8_t *dname = NULL, *uname = NULL, *sp;
1252 
1253         ud_printf("udf_symlink\n");
1254 
1255         ip = (struct ud_inode *)0;
1256         vap->va_type = VLNK;
1257         vap->va_rdev = 0;
1258 
1259         rw_enter(&dip->i_rwlock, RW_WRITER);
1260         error = ud_direnter(dip, linkname, DE_CREATE,
1261             (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
1262         rw_exit(&dip->i_rwlock);
1263         if (error == 0) {
1264                 dname = kmem_zalloc(1024, KM_SLEEP);
1265                 uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1266 
1267                 pc = (struct path_comp *)uname;
1268                 /*
1269                  * If the first character in target is "/"
1270                  * then skip it and create entry for it
1271                  */
1272                 if (*target == '/') {
1273                         pc->pc_type = 2;
1274                         pc->pc_len = 0;
1275                         pc = (struct path_comp *)(((char *)pc) + 4);
1276                         while (*target == '/') {
1277                                 target++;
1278                         }
1279                 }
1280 
1281                 while (*target != NULL) {
1282                         sp = target;
1283                         while ((*target != '/') && (*target != '\0')) {
1284                                 target ++;
1285                         }
1286                         /*
1287                          * We got the next component of the
1288                          * path name. Create path_comp of
1289                          * appropriate type
1290                          */
1291                         if (((target - sp) == 1) && (*sp == '.')) {
1292                                 /*
1293                                  * Dot entry.
1294                                  */
1295                                 pc->pc_type = 4;
1296                                 pc = (struct path_comp *)(((char *)pc) + 4);
1297                         } else if (((target - sp) == 2) &&
1298                             (*sp == '.') && ((*(sp + 1)) == '.')) {
1299                                 /*
1300                                  * DotDot entry.
1301                                  */
1302                                 pc->pc_type = 3;
1303                                 pc = (struct path_comp *)(((char *)pc) + 4);
1304                         } else {
1305                                 /*
1306                                  * convert the user given name
1307                                  * into appropriate form to be put
1308                                  * on the media
1309                                  */
1310                                 outlen = 1024;  /* set to size of dname */
1311                                 if (error = ud_compress(target - sp, &outlen,
1312                                     (uint8_t *)sp, (uint8_t *)dname)) {
1313                                         break;
1314                                 }
1315                                 pc->pc_type = 5;
1316                                 /* LINTED */
1317                                 pc->pc_len = outlen;
1318                                 dname[outlen] = '\0';
1319                                 (void) strcpy((char *)pc->pc_id, dname);
1320                                 pc = (struct path_comp *)
1321                                     (((char *)pc) + 4 + outlen);
1322                         }
1323                         while (*target == '/') {
1324                                 target++;
1325                         }
1326                         if (*target == NULL) {
1327                                 break;
1328                         }
1329                 }
1330 
1331                 rw_enter(&ip->i_contents, RW_WRITER);
1332                 if (error == 0) {
1333                         ioflag = FWRITE;
1334                         if (curthread->t_flag & T_DONTPEND) {
1335                                 ioflag |= FDSYNC;
1336                         }
1337                         error = ud_rdwri(UIO_WRITE, ioflag, ip,
1338                             uname, ((int8_t *)pc) - uname,
1339                             (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
1340                 }
1341                 if (error) {
1342                         ud_idrop(ip);
1343                         rw_exit(&ip->i_contents);
1344                         rw_enter(&dip->i_rwlock, RW_WRITER);
1345                         (void) ud_dirremove(dip, linkname, (struct ud_inode *)0,
1346                             (struct vnode *)0, DR_REMOVE, cr, ct);
1347                         rw_exit(&dip->i_rwlock);
1348                         goto update_inode;
1349                 }
1350                 rw_exit(&ip->i_contents);
1351         }
1352 
1353         if ((error == 0) || (error == EEXIST)) {
1354                 VN_RELE(ITOV(ip));
1355         }
1356 
1357 update_inode:
1358         ITIMES(VTOI(dvp));
1359         if (uname != NULL) {
1360                 kmem_free(uname, PAGESIZE);
1361         }
1362         if (dname != NULL) {
1363                 kmem_free(dname, 1024);
1364         }
1365 
1366         return (error);
1367 }
1368 
1369 /* ARGSUSED */
1370 static int32_t
1371 udf_readlink(
1372         struct vnode *vp,
1373         struct uio *uiop,
1374         struct cred *cr,
1375         caller_context_t *ct)
1376 {
1377         int32_t error = 0, off, id_len, size, len;
1378         int8_t *dname = NULL, *uname = NULL;
1379         struct ud_inode *ip;
1380         struct fbuf *fbp = NULL;
1381         struct path_comp *pc;
1382 
1383         ud_printf("udf_readlink\n");
1384 
1385         if (vp->v_type != VLNK) {
1386                 return (EINVAL);
1387         }
1388 
1389         ip = VTOI(vp);
1390         size = ip->i_size;
1391         if (size > PAGESIZE) {
1392                 return (EIO);
1393         }
1394 
1395         if (size == 0) {
1396                 return (0);
1397         }
1398 
1399         dname = kmem_zalloc(1024, KM_SLEEP);
1400         uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1401 
1402         rw_enter(&ip->i_contents, RW_READER);
1403 
1404         if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) {
1405                 goto end;
1406         }
1407 
1408         off = 0;
1409 
1410         while (off < size) {
1411                 pc = (struct path_comp *)(fbp->fb_addr + off);
1412                 switch (pc->pc_type) {
1413                         case 1 :
1414                                 (void) strcpy(uname, ip->i_udf->udf_fsmnt);
1415                                 (void) strcat(uname, "/");
1416                                 break;
1417                         case 2 :
1418                                 if (pc->pc_len != 0) {
1419                                         goto end;
1420                                 }
1421                                 uname[0] = '/';
1422                                 uname[1] = '\0';
1423                                 break;
1424                         case 3 :
1425                                 (void) strcat(uname, "../");
1426                                 break;
1427                         case 4 :
1428                                 (void) strcat(uname, "./");
1429                                 break;
1430                         case 5 :
1431                                 if ((error = ud_uncompress(pc->pc_len, &id_len,
1432                                     pc->pc_id, (uint8_t *)dname)) != 0) {
1433                                         break;
1434                                 }
1435                                 dname[id_len] = '\0';
1436                                 (void) strcat(uname, dname);
1437                                 (void) strcat(uname, "/");
1438                                 break;
1439                         default :
1440                                 error = EINVAL;
1441                                 goto end;
1442                 }
1443                 off += 4 + pc->pc_len;
1444         }
1445         len = strlen(uname) - 1;
1446         if (uname[len] == '/') {
1447                 if (len == 0) {
1448                         /*
1449                          * special case link to /
1450                          */
1451                         len = 1;
1452                 } else {
1453                         uname[len] = '\0';
1454                 }
1455         }
1456 
1457         error = uiomove(uname, len, UIO_READ, uiop);
1458 
1459         ITIMES(ip);
1460 
1461 end:
1462         if (fbp != NULL) {
1463                 fbrelse(fbp, S_OTHER);
1464         }
1465         rw_exit(&ip->i_contents);
1466         if (uname != NULL) {
1467                 kmem_free(uname, PAGESIZE);
1468         }
1469         if (dname != NULL) {
1470                 kmem_free(dname, 1024);
1471         }
1472         return (error);
1473 }
1474 
1475 /* ARGSUSED */
1476 static int32_t
1477 udf_fsync(
1478         struct vnode *vp,
1479         int32_t syncflag,
1480         struct cred *cr,
1481         caller_context_t *ct)
1482 {
1483         int32_t error = 0;
1484         struct ud_inode *ip = VTOI(vp);
1485 
1486         ud_printf("udf_fsync\n");
1487 
1488         rw_enter(&ip->i_contents, RW_WRITER);
1489         if (!(IS_SWAPVP(vp))) {
1490                 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */
1491         }
1492         if (error == 0) {
1493                 error = ud_sync_indir(ip);
1494         }
1495         ITIMES(ip);             /* XXX: is this necessary ??? */
1496         rw_exit(&ip->i_contents);
1497 
1498         return (error);
1499 }
1500 
1501 /* ARGSUSED */
1502 static void
1503 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
1504 {
1505         ud_printf("udf_iinactive\n");
1506 
1507         ud_iinactive(VTOI(vp), cr);
1508 }
1509 
1510 /* ARGSUSED */
1511 static int32_t
1512 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1513 {
1514         struct udf_fid *udfidp;
1515         struct ud_inode *ip = VTOI(vp);
1516 
1517         ud_printf("udf_fid\n");
1518 
1519         if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) {
1520                 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1521                 return (ENOSPC);
1522         }
1523 
1524         udfidp = (struct udf_fid *)fidp;
1525         bzero((char *)udfidp, sizeof (struct udf_fid));
1526         rw_enter(&ip->i_contents, RW_READER);
1527         udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1528         udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff;
1529         udfidp->udfid_prn = ip->i_icb_prn;
1530         udfidp->udfid_icb_lbn = ip->i_icb_block;
1531         rw_exit(&ip->i_contents);
1532 
1533         return (0);
1534 }
1535 
1536 /* ARGSUSED2 */
1537 static int
1538 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1539 {
1540         struct ud_inode *ip = VTOI(vp);
1541 
1542         ud_printf("udf_rwlock\n");
1543 
1544         if (write_lock) {
1545                 rw_enter(&ip->i_rwlock, RW_WRITER);
1546         } else {
1547                 rw_enter(&ip->i_rwlock, RW_READER);
1548         }
1549         return (write_lock);
1550 }
1551 
1552 /* ARGSUSED */
1553 static void
1554 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1555 {
1556         struct ud_inode *ip = VTOI(vp);
1557 
1558         ud_printf("udf_rwunlock\n");
1559 
1560         rw_exit(&ip->i_rwlock);
1561 
1562 }
1563 
1564 /* ARGSUSED */
1565 static int32_t
1566 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1567 {
1568         return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
1569 }
1570 
1571 static int32_t
1572 udf_frlock(
1573         struct vnode *vp,
1574         int32_t cmd,
1575         struct flock64 *bfp,
1576         int32_t flag,
1577         offset_t offset,
1578         struct flk_callback *flk_cbp,
1579         cred_t *cr,
1580         caller_context_t *ct)
1581 {
1582         struct ud_inode *ip = VTOI(vp);
1583 
1584         ud_printf("udf_frlock\n");
1585 
1586         /*
1587          * If file is being mapped, disallow frlock.
1588          * XXX I am not holding tlock while checking i_mapcnt because the
1589          * current locking strategy drops all locks before calling fs_frlock.
1590          * So, mapcnt could change before we enter fs_frlock making is
1591          * meaningless to have held tlock in the first place.
1592          */
1593         if ((ip->i_mapcnt > 0) &&
1594             (MANDLOCK(vp, ip->i_char))) {
1595                 return (EAGAIN);
1596         }
1597 
1598         return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1599 }
1600 
1601 /*ARGSUSED6*/
1602 static int32_t
1603 udf_space(
1604         struct vnode *vp,
1605         int32_t cmd,
1606         struct flock64 *bfp,
1607         int32_t flag,
1608         offset_t offset,
1609         cred_t *cr,
1610         caller_context_t *ct)
1611 {
1612         int32_t error = 0;
1613 
1614         ud_printf("udf_space\n");
1615 
1616         if (cmd != F_FREESP) {
1617                 error =  EINVAL;
1618         } else if ((error = convoff(vp, bfp, 0, offset)) == 0) {
1619                 error = ud_freesp(vp, bfp, flag, cr);
1620 
1621                 if (error == 0 && bfp->l_start == 0)
1622                         vnevent_truncate(vp, ct);
1623         }
1624 
1625         return (error);
1626 }
1627 
1628 /* ARGSUSED */
1629 static int32_t
1630 udf_getpage(
1631         struct vnode *vp,
1632         offset_t off,
1633         size_t len,
1634         uint32_t *protp,
1635         struct page **plarr,
1636         size_t plsz,
1637         struct seg *seg,
1638         caddr_t addr,
1639         enum seg_rw rw,
1640         struct cred *cr,
1641         caller_context_t *ct)
1642 {
1643         struct ud_inode *ip = VTOI(vp);
1644         int32_t error, has_holes, beyond_eof, seqmode, dolock;
1645         int32_t pgsize = PAGESIZE;
1646         struct udf_vfs *udf_vfsp = ip->i_udf;
1647         page_t **pl;
1648         u_offset_t pgoff, eoff, uoff;
1649         krw_t rwtype;
1650         caddr_t pgaddr;
1651 
1652         ud_printf("udf_getpage\n");
1653 
1654         uoff = (u_offset_t)off; /* type conversion */
1655         if (protp) {
1656                 *protp = PROT_ALL;
1657         }
1658         if (vp->v_flag & VNOMAP) {
1659                 return (ENOSYS);
1660         }
1661         seqmode = ip->i_nextr == uoff && rw != S_CREATE;
1662 
1663         rwtype = RW_READER;
1664         dolock = (rw_owner(&ip->i_contents) != curthread);
1665 retrylock:
1666         if (dolock) {
1667                 rw_enter(&ip->i_contents, rwtype);
1668         }
1669 
1670         /*
1671          * We may be getting called as a side effect of a bmap using
1672          * fbread() when the blocks might be being allocated and the
1673          * size has not yet been up'ed.  In this case we want to be
1674          * able to return zero pages if we get back UDF_HOLE from
1675          * calling bmap for a non write case here.  We also might have
1676          * to read some frags from the disk into a page if we are
1677          * extending the number of frags for a given lbn in bmap().
1678          */
1679         beyond_eof = uoff + len > ip->i_size + PAGEOFFSET;
1680         if (beyond_eof && seg != segkmap) {
1681                 if (dolock) {
1682                         rw_exit(&ip->i_contents);
1683                 }
1684                 return (EFAULT);
1685         }
1686 
1687         /*
1688          * Must hold i_contents lock throughout the call to pvn_getpages
1689          * since locked pages are returned from each call to ud_getapage.
1690          * Must *not* return locked pages and then try for contents lock
1691          * due to lock ordering requirements (inode > page)
1692          */
1693 
1694         has_holes = ud_bmap_has_holes(ip);
1695 
1696         if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) {
1697                 int32_t blk_size, count;
1698                 u_offset_t offset;
1699 
1700                 /*
1701                  * We must acquire the RW_WRITER lock in order to
1702                  * call bmap_write().
1703                  */
1704                 if (dolock && rwtype == RW_READER) {
1705                         rwtype = RW_WRITER;
1706 
1707                         if (!rw_tryupgrade(&ip->i_contents)) {
1708 
1709                                 rw_exit(&ip->i_contents);
1710 
1711                                 goto retrylock;
1712                         }
1713                 }
1714 
1715                 /*
1716                  * May be allocating disk blocks for holes here as
1717                  * a result of mmap faults. write(2) does the bmap_write
1718                  * in rdip/wrip, not here. We are not dealing with frags
1719                  * in this case.
1720                  */
1721                 offset = uoff;
1722                 while ((offset < uoff + len) &&
1723                     (offset < ip->i_size)) {
1724                         /*
1725                          * the variable "bnp" is to simplify the expression for
1726                          * the compiler; * just passing in &bn to bmap_write
1727                          * causes a compiler "loop"
1728                          */
1729 
1730                         blk_size = udf_vfsp->udf_lbsize;
1731                         if ((offset + blk_size) > ip->i_size) {
1732                                 count = ip->i_size - offset;
1733                         } else {
1734                                 count = blk_size;
1735                         }
1736                         error = ud_bmap_write(ip, offset, count, 0, cr);
1737                         if (error) {
1738                                 goto update_inode;
1739                         }
1740                         offset += count; /* XXX - make this contig */
1741                 }
1742         }
1743 
1744         /*
1745          * Can be a reader from now on.
1746          */
1747         if (dolock && rwtype == RW_WRITER) {
1748                 rw_downgrade(&ip->i_contents);
1749         }
1750 
1751         /*
1752          * We remove PROT_WRITE in cases when the file has UDF holes
1753          * because we don't  want to call bmap_read() to check each
1754          * page if it is backed with a disk block.
1755          */
1756         if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) {
1757                 *protp &= ~PROT_WRITE;
1758         }
1759 
1760         error = 0;
1761 
1762         /*
1763          * The loop looks up pages in the range <off, off + len).
1764          * For each page, we first check if we should initiate an asynchronous
1765          * read ahead before we call page_lookup (we may sleep in page_lookup
1766          * for a previously initiated disk read).
1767          */
1768         eoff = (uoff + len);
1769         for (pgoff = uoff, pgaddr = addr, pl = plarr;
1770             pgoff < eoff; /* empty */) {
1771                 page_t  *pp;
1772                 u_offset_t      nextrio;
1773                 se_t    se;
1774 
1775                 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED);
1776 
1777                 /*
1778                  * Handle async getpage (faultahead)
1779                  */
1780                 if (plarr == NULL) {
1781                         ip->i_nextrio = pgoff;
1782                         ud_getpage_ra(vp, pgoff, seg, pgaddr);
1783                         pgoff += pgsize;
1784                         pgaddr += pgsize;
1785                         continue;
1786                 }
1787 
1788                 /*
1789                  * Check if we should initiate read ahead of next cluster.
1790                  * We call page_exists only when we need to confirm that
1791                  * we have the current page before we initiate the read ahead.
1792                  */
1793                 nextrio = ip->i_nextrio;
1794                 if (seqmode &&
1795                     pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio &&
1796                     nextrio < ip->i_size && page_exists(vp, pgoff))
1797                         ud_getpage_ra(vp, pgoff, seg, pgaddr);
1798 
1799                 if ((pp = page_lookup(vp, pgoff, se)) != NULL) {
1800 
1801                         /*
1802                          * We found the page in the page cache.
1803                          */
1804                         *pl++ = pp;
1805                         pgoff += pgsize;
1806                         pgaddr += pgsize;
1807                         len -= pgsize;
1808                         plsz -= pgsize;
1809                 } else  {
1810 
1811                         /*
1812                          * We have to create the page, or read it from disk.
1813                          */
1814                         if (error = ud_getpage_miss(vp, pgoff, len,
1815                             seg, pgaddr, pl, plsz, rw, seqmode)) {
1816                                 goto error_out;
1817                         }
1818 
1819                         while (*pl != NULL) {
1820                                 pl++;
1821                                 pgoff += pgsize;
1822                                 pgaddr += pgsize;
1823                                 len -= pgsize;
1824                                 plsz -= pgsize;
1825                         }
1826                 }
1827         }
1828 
1829         /*
1830          * Return pages up to plsz if they are in the page cache.
1831          * We cannot return pages if there is a chance that they are
1832          * backed with a UDF hole and rw is S_WRITE or S_CREATE.
1833          */
1834         if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) {
1835 
1836                 ASSERT((protp == NULL) ||
1837                     !(has_holes && (*protp & PROT_WRITE)));
1838 
1839                 eoff = pgoff + plsz;
1840                 while (pgoff < eoff) {
1841                         page_t          *pp;
1842 
1843                         if ((pp = page_lookup_nowait(vp, pgoff,
1844                             SE_SHARED)) == NULL)
1845                                 break;
1846 
1847                         *pl++ = pp;
1848                         pgoff += pgsize;
1849                         plsz -= pgsize;
1850                 }
1851         }
1852 
1853         if (plarr)
1854                 *pl = NULL;                     /* Terminate page list */
1855         ip->i_nextr = pgoff;
1856 
1857 error_out:
1858         if (error && plarr) {
1859                 /*
1860                  * Release any pages we have locked.
1861                  */
1862                 while (pl > &plarr[0])
1863                         page_unlock(*--pl);
1864 
1865                 plarr[0] = NULL;
1866         }
1867 
1868 update_inode:
1869         if (dolock) {
1870                 rw_exit(&ip->i_contents);
1871         }
1872 
1873         /*
1874          * If the inode is not already marked for IACC (in rwip() for read)
1875          * and the inode is not marked for no access time update (in rwip()
1876          * for write) then update the inode access time and mod time now.
1877          */
1878         mutex_enter(&ip->i_tlock);
1879         if ((ip->i_flag & (IACC | INOACC)) == 0) {
1880                 if ((rw != S_OTHER) && (ip->i_type != VDIR)) {
1881                         ip->i_flag |= IACC;
1882                 }
1883                 if (rw == S_WRITE) {
1884                         ip->i_flag |= IUPD;
1885                 }
1886                 ITIMES_NOLOCK(ip);
1887         }
1888         mutex_exit(&ip->i_tlock);
1889 
1890         return (error);
1891 }
1892 
1893 int32_t ud_delay = 1;
1894 
1895 /* ARGSUSED */
1896 static int32_t
1897 udf_putpage(
1898         struct vnode *vp,
1899         offset_t off,
1900         size_t len,
1901         int32_t flags,
1902         struct cred *cr,
1903         caller_context_t *ct)
1904 {
1905         struct ud_inode *ip;
1906         int32_t error = 0;
1907 
1908         ud_printf("udf_putpage\n");
1909 
1910         ip = VTOI(vp);
1911 
1912         if (vp->v_count == 0) {
1913                 cmn_err(CE_WARN, "ud_putpage : bad v_count");
1914                 error = EINVAL;
1915                 goto out;
1916         }
1917 
1918         if (vp->v_flag & VNOMAP) {
1919                 error = ENOSYS;
1920                 goto out;
1921         }
1922 
1923         if (flags & B_ASYNC) {
1924                 if (ud_delay && len &&
1925                     (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) {
1926                         mutex_enter(&ip->i_tlock);
1927 
1928                         /*
1929                          * If nobody stalled, start a new cluster.
1930                          */
1931                         if (ip->i_delaylen == 0) {
1932                                 ip->i_delayoff = off;
1933                                 ip->i_delaylen = len;
1934                                 mutex_exit(&ip->i_tlock);
1935                                 goto out;
1936                         }
1937 
1938                         /*
1939                          * If we have a full cluster or they are not contig,
1940                          * then push last cluster and start over.
1941                          */
1942                         if (ip->i_delaylen >= WR_CLUSTSZ(ip) ||
1943                             ip->i_delayoff + ip->i_delaylen != off) {
1944                                 u_offset_t doff;
1945                                 size_t dlen;
1946 
1947                                 doff = ip->i_delayoff;
1948                                 dlen = ip->i_delaylen;
1949                                 ip->i_delayoff = off;
1950                                 ip->i_delaylen = len;
1951                                 mutex_exit(&ip->i_tlock);
1952                                 error = ud_putpages(vp, doff, dlen, flags, cr);
1953                                 /* LMXXX - flags are new val, not old */
1954                                 goto out;
1955                         }
1956 
1957                         /*
1958                          * There is something there, it's not full, and
1959                          * it is contig.
1960                          */
1961                         ip->i_delaylen += len;
1962                         mutex_exit(&ip->i_tlock);
1963                         goto out;
1964                 }
1965 
1966                 /*
1967                  * Must have weird flags or we are not clustering.
1968                  */
1969         }
1970 
1971         error = ud_putpages(vp, off, len, flags, cr);
1972 
1973 out:
1974         return (error);
1975 }
1976 
1977 /* ARGSUSED */
1978 static int32_t
1979 udf_map(
1980         struct vnode *vp,
1981         offset_t off,
1982         struct as *as,
1983         caddr_t *addrp,
1984         size_t len,
1985         uint8_t prot,
1986         uint8_t maxprot,
1987         uint32_t flags,
1988         struct cred *cr,
1989         caller_context_t *ct)
1990 {
1991         struct segvn_crargs vn_a;
1992         int32_t error = 0;
1993 
1994         ud_printf("udf_map\n");
1995 
1996         if (vp->v_flag & VNOMAP) {
1997                 error = ENOSYS;
1998                 goto end;
1999         }
2000 
2001         if ((off < (offset_t)0) ||
2002             ((off + len) < (offset_t)0)) {
2003                 error = EINVAL;
2004                 goto end;
2005         }
2006 
2007         if (vp->v_type != VREG) {
2008                 error = ENODEV;
2009                 goto end;
2010         }
2011 
2012         /*
2013          * If file is being locked, disallow mapping.
2014          */
2015         if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) {
2016                 error = EAGAIN;
2017                 goto end;
2018         }
2019 
2020         as_rangelock(as);
2021         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
2022         if (error != 0) {
2023                 as_rangeunlock(as);
2024                 goto end;
2025         }
2026 
2027         vn_a.vp = vp;
2028         vn_a.offset = off;
2029         vn_a.type = flags & MAP_TYPE;
2030         vn_a.prot = prot;
2031         vn_a.maxprot = maxprot;
2032         vn_a.cred = cr;
2033         vn_a.amp = NULL;
2034         vn_a.flags = flags & ~MAP_TYPE;
2035         vn_a.szc = 0;
2036         vn_a.lgrp_mem_policy_flags = 0;
2037 
2038         error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a);
2039         as_rangeunlock(as);
2040 
2041 end:
2042         return (error);
2043 }
2044 
2045 /* ARGSUSED */
2046 static int32_t
2047 udf_addmap(struct vnode *vp,
2048         offset_t off,
2049         struct as *as,
2050         caddr_t addr,
2051         size_t len,
2052         uint8_t prot,
2053         uint8_t maxprot,
2054         uint32_t flags,
2055         struct cred *cr,
2056         caller_context_t *ct)
2057 {
2058         struct ud_inode *ip = VTOI(vp);
2059 
2060         ud_printf("udf_addmap\n");
2061 
2062         if (vp->v_flag & VNOMAP) {
2063                 return (ENOSYS);
2064         }
2065 
2066         mutex_enter(&ip->i_tlock);
2067         ip->i_mapcnt += btopr(len);
2068         mutex_exit(&ip->i_tlock);
2069 
2070         return (0);
2071 }
2072 
2073 /* ARGSUSED */
2074 static int32_t
2075 udf_delmap(
2076         struct vnode *vp, offset_t off,
2077         struct as *as,
2078         caddr_t addr,
2079         size_t len,
2080         uint32_t prot,
2081         uint32_t maxprot,
2082         uint32_t flags,
2083         struct cred *cr,
2084         caller_context_t *ct)
2085 {
2086         struct ud_inode *ip = VTOI(vp);
2087 
2088         ud_printf("udf_delmap\n");
2089 
2090         if (vp->v_flag & VNOMAP) {
2091                 return (ENOSYS);
2092         }
2093 
2094         mutex_enter(&ip->i_tlock);
2095         ip->i_mapcnt -= btopr(len);  /* Count released mappings */
2096         ASSERT(ip->i_mapcnt >= 0);
2097         mutex_exit(&ip->i_tlock);
2098 
2099         return (0);
2100 }
2101 
2102 /* ARGSUSED */
2103 static int32_t
2104 udf_l_pathconf(
2105         struct vnode *vp,
2106         int32_t cmd,
2107         ulong_t *valp,
2108         struct cred *cr,
2109         caller_context_t *ct)
2110 {
2111         int32_t error = 0;
2112 
2113         ud_printf("udf_l_pathconf\n");
2114 
2115         if (cmd == _PC_FILESIZEBITS) {
2116                 /*
2117                  * udf supports 64 bits as file size
2118                  * but there are several other restrictions
2119                  * it only supports 32-bit block numbers and
2120                  * daddr32_t is only and int32_t so taking these
2121                  * into account we can stay just as where ufs is
2122                  */
2123                 *valp = 41;
2124         } else if (cmd == _PC_TIMESTAMP_RESOLUTION) {
2125                 /* nanosecond timestamp resolution */
2126                 *valp = 1L;
2127         } else {
2128                 error = fs_pathconf(vp, cmd, valp, cr, ct);
2129         }
2130 
2131         return (error);
2132 }
2133 
2134 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0;
2135 
2136 /*
2137  * Assumption is that there will not be a pageio request
2138  * to a enbedded file
2139  */
2140 /* ARGSUSED */
2141 static int32_t
2142 udf_pageio(
2143         struct vnode *vp,
2144         struct page *pp,
2145         u_offset_t io_off,
2146         size_t io_len,
2147         int32_t flags,
2148         struct cred *cr,
2149         caller_context_t *ct)
2150 {
2151         daddr_t bn;
2152         struct buf *bp;
2153         struct ud_inode *ip = VTOI(vp);
2154         int32_t dolock, error = 0, contig, multi_io;
2155         size_t done_len = 0, cur_len = 0;
2156         page_t *npp = NULL, *opp = NULL, *cpp = pp;
2157 
2158         if (pp == NULL) {
2159                 return (EINVAL);
2160         }
2161 
2162         dolock = (rw_owner(&ip->i_contents) != curthread);
2163 
2164         /*
2165          * We need a better check.  Ideally, we would use another
2166          * vnodeops so that hlocked and forcibly unmounted file
2167          * systems would return EIO where appropriate and w/o the
2168          * need for these checks.
2169          */
2170         if (ip->i_udf == NULL) {
2171                 return (EIO);
2172         }
2173 
2174         if (dolock) {
2175                 rw_enter(&ip->i_contents, RW_READER);
2176         }
2177 
2178         /*
2179          * Break the io request into chunks, one for each contiguous
2180          * stretch of disk blocks in the target file.
2181          */
2182         while (done_len < io_len) {
2183                 ASSERT(cpp);
2184                 bp = NULL;
2185                 contig = 0;
2186                 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len),
2187                     &bn, &contig)) {
2188                         break;
2189                 }
2190 
2191                 if (bn == UDF_HOLE) {   /* No holey swapfiles */
2192                         cmn_err(CE_WARN, "SWAP file has HOLES");
2193                         error = EINVAL;
2194                         break;
2195                 }
2196 
2197                 cur_len = MIN(io_len - done_len, contig);
2198 
2199                 /*
2200                  * Check if more than one I/O is
2201                  * required to complete the given
2202                  * I/O operation
2203                  */
2204                 if (ip->i_udf->udf_lbsize < PAGESIZE) {
2205                         if (cur_len >= PAGESIZE) {
2206                                 multi_io = 0;
2207                                 cur_len &= PAGEMASK;
2208                         } else {
2209                                 multi_io = 1;
2210                                 cur_len = MIN(io_len - done_len, PAGESIZE);
2211                         }
2212                 }
2213                 page_list_break(&cpp, &npp, btop(cur_len));
2214 
2215                 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags);
2216                 ASSERT(bp != NULL);
2217 
2218                 bp->b_edev = ip->i_dev;
2219                 bp->b_dev = cmpdev(ip->i_dev);
2220                 bp->b_blkno = bn;
2221                 bp->b_un.b_addr = (caddr_t)0;
2222                 bp->b_file = vp;
2223                 bp->b_offset = (offset_t)(io_off + done_len);
2224 
2225 /*
2226  *              ub.ub_pageios.value.ul++;
2227  */
2228                 if (multi_io == 0) {
2229                         (void) bdev_strategy(bp);
2230                 } else {
2231                         error = ud_multi_strat(ip, cpp, bp,
2232                             (u_offset_t)(io_off + done_len));
2233                         if (error != 0) {
2234                                 pageio_done(bp);
2235                                 break;
2236                         }
2237                 }
2238                 if (flags & B_READ) {
2239                         ud_pageio_reads++;
2240                 } else {
2241                         ud_pageio_writes++;
2242                 }
2243 
2244                 /*
2245                  * If the request is not B_ASYNC, wait for i/o to complete
2246                  * and re-assemble the page list to return to the caller.
2247                  * If it is B_ASYNC we leave the page list in pieces and
2248                  * cleanup() will dispose of them.
2249                  */
2250                 if ((flags & B_ASYNC) == 0) {
2251                         error = biowait(bp);
2252                         pageio_done(bp);
2253                         if (error) {
2254                                 break;
2255                         }
2256                         page_list_concat(&opp, &cpp);
2257                 }
2258                 cpp = npp;
2259                 npp = NULL;
2260                 done_len += cur_len;
2261         }
2262 
2263         ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len));
2264         if (error) {
2265                 if (flags & B_ASYNC) {
2266                         /* Cleanup unprocessed parts of list */
2267                         page_list_concat(&cpp, &npp);
2268                         if (flags & B_READ) {
2269                                 pvn_read_done(cpp, B_ERROR);
2270                         } else {
2271                                 pvn_write_done(cpp, B_ERROR);
2272                         }
2273                 } else {
2274                         /* Re-assemble list and let caller clean up */
2275                         page_list_concat(&opp, &cpp);
2276                         page_list_concat(&opp, &npp);
2277                 }
2278         }
2279 
2280         if (dolock) {
2281                 rw_exit(&ip->i_contents);
2282         }
2283 
2284         return (error);
2285 }
2286 
2287 
2288 
2289 
2290 /* -------------------- local functions --------------------------- */
2291 
2292 
2293 
2294 int32_t
2295 ud_rdwri(enum uio_rw rw, int32_t ioflag,
2296         struct ud_inode *ip, caddr_t base, int32_t len,
2297         offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr)
2298 {
2299         int32_t error;
2300         struct uio auio;
2301         struct iovec aiov;
2302 
2303         ud_printf("ud_rdwri\n");
2304 
2305         bzero((caddr_t)&auio, sizeof (uio_t));
2306         bzero((caddr_t)&aiov, sizeof (iovec_t));
2307 
2308         aiov.iov_base = base;
2309         aiov.iov_len = len;
2310         auio.uio_iov = &aiov;
2311         auio.uio_iovcnt = 1;
2312         auio.uio_loffset = offset;
2313         auio.uio_segflg = (int16_t)seg;
2314         auio.uio_resid = len;
2315 
2316         if (rw == UIO_WRITE) {
2317                 auio.uio_fmode = FWRITE;
2318                 auio.uio_extflg = UIO_COPY_DEFAULT;
2319                 auio.uio_llimit = curproc->p_fsz_ctl;
2320                 error = ud_wrip(ip, &auio, ioflag, cr);
2321         } else {
2322                 auio.uio_fmode = FREAD;
2323                 auio.uio_extflg = UIO_COPY_CACHED;
2324                 auio.uio_llimit = MAXOFFSET_T;
2325                 error = ud_rdip(ip, &auio, ioflag, cr);
2326         }
2327 
2328         if (aresid) {
2329                 *aresid = auio.uio_resid;
2330         } else if (auio.uio_resid) {
2331                 error = EIO;
2332         }
2333         return (error);
2334 }
2335 
2336 /*
2337  * Free behind hacks.  The pager is busted.
2338  * XXX - need to pass the information down to writedone() in a flag like B_SEQ
2339  * or B_FREE_IF_TIGHT_ON_MEMORY.
2340  */
2341 int32_t ud_freebehind = 1;
2342 int32_t ud_smallfile = 32 * 1024;
2343 
2344 /* ARGSUSED */
2345 int32_t
2346 ud_getpage_miss(struct vnode *vp, u_offset_t off,
2347         size_t len, struct seg *seg, caddr_t addr, page_t *pl[],
2348         size_t plsz, enum seg_rw rw, int32_t seq)
2349 {
2350         struct ud_inode *ip = VTOI(vp);
2351         int32_t err = 0;
2352         size_t io_len;
2353         u_offset_t io_off;
2354         u_offset_t pgoff;
2355         page_t *pp;
2356 
2357         pl[0] = NULL;
2358 
2359         /*
2360          * Figure out whether the page can be created, or must be
2361          * read from the disk
2362          */
2363         if (rw == S_CREATE) {
2364                 if ((pp = page_create_va(vp, off,
2365                     PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
2366                         cmn_err(CE_WARN, "ud_getpage_miss: page_create");
2367                         return (EINVAL);
2368                 }
2369                 io_len = PAGESIZE;
2370         } else {
2371                 pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
2372                     &io_len, off, PAGESIZE, 0);
2373 
2374                 /*
2375                  * Some other thread has entered the page.
2376                  * ud_getpage will retry page_lookup.
2377                  */
2378                 if (pp == NULL) {
2379                         return (0);
2380                 }
2381 
2382                 /*
2383                  * Fill the page with as much data as we can from the file.
2384                  */
2385                 err = ud_page_fill(ip, pp, off, B_READ, &pgoff);
2386                 if (err) {
2387                         pvn_read_done(pp, B_ERROR);
2388                         return (err);
2389                 }
2390 
2391                 /*
2392                  * XXX ??? ufs has io_len instead of pgoff below
2393                  */
2394                 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2395 
2396                 /*
2397                  * If the file access is sequential, initiate read ahead
2398                  * of the next cluster.
2399                  */
2400                 if (seq && ip->i_nextrio < ip->i_size) {
2401                         ud_getpage_ra(vp, off, seg, addr);
2402                 }
2403         }
2404 
2405 outmiss:
2406         pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw);
2407         return (err);
2408 }
2409 
2410 /* ARGSUSED */
2411 void
2412 ud_getpage_ra(struct vnode *vp,
2413         u_offset_t off, struct seg *seg, caddr_t addr)
2414 {
2415         page_t *pp;
2416         size_t io_len;
2417         struct ud_inode *ip = VTOI(vp);
2418         u_offset_t io_off = ip->i_nextrio, pgoff;
2419         caddr_t addr2 = addr + (io_off - off);
2420         daddr_t bn;
2421         int32_t contig = 0;
2422 
2423         /*
2424          * Is this test needed?
2425          */
2426 
2427         if (addr2 >= seg->s_base + seg->s_size) {
2428                 return;
2429         }
2430 
2431         contig = 0;
2432         if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) {
2433                 return;
2434         }
2435 
2436         pp = pvn_read_kluster(vp, io_off, seg, addr2,
2437             &io_off, &io_len, io_off, PAGESIZE, 1);
2438 
2439         /*
2440          * Some other thread has entered the page.
2441          * So no read head done here (ie we will have to and wait
2442          * for the read when needed).
2443          */
2444 
2445         if (pp == NULL) {
2446                 return;
2447         }
2448 
2449         (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff);
2450         ip->i_nextrio =  io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2451 }
2452 
2453 int
2454 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
2455         uint32_t bflgs, u_offset_t *pg_off)
2456 {
2457         daddr_t bn;
2458         struct buf *bp;
2459         caddr_t kaddr, caddr;
2460         int32_t error = 0, contig = 0, multi_io = 0;
2461         int32_t lbsize = ip->i_udf->udf_lbsize;
2462         int32_t lbmask = ip->i_udf->udf_lbmask;
2463         uint64_t isize;
2464 
2465         isize = (ip->i_size + lbmask) & (~lbmask);
2466         if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2467 
2468                 /*
2469                  * Embedded file read file_entry
2470                  * from buffer cache and copy the required
2471                  * portions
2472                  */
2473                 bp = ud_bread(ip->i_dev,
2474                     ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
2475                 if ((bp->b_error == 0) &&
2476                     (bp->b_resid == 0)) {
2477 
2478                         caddr = bp->b_un.b_addr + ip->i_data_off;
2479 
2480                         /*
2481                          * mapin to kvm
2482                          */
2483                         kaddr = (caddr_t)ppmapin(pp,
2484                             PROT_READ | PROT_WRITE, (caddr_t)-1);
2485                         (void) kcopy(caddr, kaddr, ip->i_size);
2486 
2487                         /*
2488                          * mapout of kvm
2489                          */
2490                         ppmapout(kaddr);
2491                 }
2492                 brelse(bp);
2493                 contig = ip->i_size;
2494         } else {
2495 
2496                 /*
2497                  * Get the continuous size and block number
2498                  * at offset "off"
2499                  */
2500                 if (error = ud_bmap_read(ip, off, &bn, &contig))
2501                         goto out;
2502                 contig = MIN(contig, PAGESIZE);
2503                 contig = (contig + lbmask) & (~lbmask);
2504 
2505                 /*
2506                  * Zero part of the page which we are not
2507                  * going to read from the disk.
2508                  */
2509 
2510                 if (bn == UDF_HOLE) {
2511 
2512                         /*
2513                          * This is a HOLE. Just zero out
2514                          * the page
2515                          */
2516                         if (((off + contig) == isize) ||
2517                             (contig == PAGESIZE)) {
2518                                 pagezero(pp->p_prev, 0, PAGESIZE);
2519                                 goto out;
2520                         }
2521                 }
2522 
2523                 if (contig < PAGESIZE) {
2524                         uint64_t count;
2525 
2526                         count = isize - off;
2527                         if (contig != count) {
2528                                 multi_io = 1;
2529                                 contig = (int32_t)(MIN(count, PAGESIZE));
2530                         } else {
2531                                 pagezero(pp->p_prev, contig, PAGESIZE - contig);
2532                         }
2533                 }
2534 
2535                 /*
2536                  * Get a bp and initialize it
2537                  */
2538                 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs);
2539                 ASSERT(bp != NULL);
2540 
2541                 bp->b_edev = ip->i_dev;
2542                 bp->b_dev = cmpdev(ip->i_dev);
2543                 bp->b_blkno = bn;
2544                 bp->b_un.b_addr = 0;
2545                 bp->b_file = ip->i_vnode;
2546 
2547                 /*
2548                  * Start I/O
2549                  */
2550                 if (multi_io == 0) {
2551 
2552                         /*
2553                          * Single I/O is sufficient for this page
2554                          */
2555                         (void) bdev_strategy(bp);
2556                 } else {
2557 
2558                         /*
2559                          * We need to do the I/O in
2560                          * piece's
2561                          */
2562                         error = ud_multi_strat(ip, pp, bp, off);
2563                         if (error != 0) {
2564                                 goto out;
2565                         }
2566                 }
2567                 if ((bflgs & B_ASYNC) == 0) {
2568 
2569                         /*
2570                          * Wait for i/o to complete.
2571                          */
2572 
2573                         error = biowait(bp);
2574                         pageio_done(bp);
2575                         if (error) {
2576                                 goto out;
2577                         }
2578                 }
2579         }
2580         if ((off + contig) >= ip->i_size) {
2581                 contig = ip->i_size - off;
2582         }
2583 
2584 out:
2585         *pg_off = contig;
2586         return (error);
2587 }
2588 
2589 int32_t
2590 ud_putpages(struct vnode *vp, offset_t off,
2591         size_t len, int32_t flags, struct cred *cr)
2592 {
2593         struct ud_inode *ip;
2594         page_t *pp;
2595         u_offset_t io_off;
2596         size_t io_len;
2597         u_offset_t eoff;
2598         int32_t err = 0;
2599         int32_t dolock;
2600 
2601         ud_printf("ud_putpages\n");
2602 
2603         if (vp->v_count == 0) {
2604                 cmn_err(CE_WARN, "ud_putpages: bad v_count");
2605                 return (EINVAL);
2606         }
2607 
2608         ip = VTOI(vp);
2609 
2610         /*
2611          * Acquire the readers/write inode lock before locking
2612          * any pages in this inode.
2613          * The inode lock is held during i/o.
2614          */
2615         if (len == 0) {
2616                 mutex_enter(&ip->i_tlock);
2617                 ip->i_delayoff = ip->i_delaylen = 0;
2618                 mutex_exit(&ip->i_tlock);
2619         }
2620         dolock = (rw_owner(&ip->i_contents) != curthread);
2621         if (dolock) {
2622                 rw_enter(&ip->i_contents, RW_READER);
2623         }
2624 
2625         if (!vn_has_cached_data(vp)) {
2626                 if (dolock) {
2627                         rw_exit(&ip->i_contents);
2628                 }
2629                 return (0);
2630         }
2631 
2632         if (len == 0) {
2633                 /*
2634                  * Search the entire vp list for pages >= off.
2635                  */
2636                 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage,
2637                     flags, cr);
2638         } else {
2639                 /*
2640                  * Loop over all offsets in the range looking for
2641                  * pages to deal with.
2642                  */
2643                 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) {
2644                         eoff = MIN(off + len, eoff);
2645                 } else {
2646                         eoff = off + len;
2647                 }
2648 
2649                 for (io_off = off; io_off < eoff; io_off += io_len) {
2650                         /*
2651                          * If we are not invalidating, synchronously
2652                          * freeing or writing pages, use the routine
2653                          * page_lookup_nowait() to prevent reclaiming
2654                          * them from the free list.
2655                          */
2656                         if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
2657                                 pp = page_lookup(vp, io_off,
2658                                     (flags & (B_INVAL | B_FREE)) ?
2659                                     SE_EXCL : SE_SHARED);
2660                         } else {
2661                                 pp = page_lookup_nowait(vp, io_off,
2662                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
2663                         }
2664 
2665                         if (pp == NULL || pvn_getdirty(pp, flags) == 0) {
2666                                 io_len = PAGESIZE;
2667                         } else {
2668 
2669                                 err = ud_putapage(vp, pp,
2670                                     &io_off, &io_len, flags, cr);
2671                                 if (err != 0) {
2672                                         break;
2673                                 }
2674                                 /*
2675                                  * "io_off" and "io_len" are returned as
2676                                  * the range of pages we actually wrote.
2677                                  * This allows us to skip ahead more quickly
2678                                  * since several pages may've been dealt
2679                                  * with by this iteration of the loop.
2680                                  */
2681                         }
2682                 }
2683         }
2684         if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) {
2685                 /*
2686                  * We have just sync'ed back all the pages on
2687                  * the inode, turn off the IMODTIME flag.
2688                  */
2689                 mutex_enter(&ip->i_tlock);
2690                 ip->i_flag &= ~IMODTIME;
2691                 mutex_exit(&ip->i_tlock);
2692         }
2693         if (dolock) {
2694                 rw_exit(&ip->i_contents);
2695         }
2696         return (err);
2697 }
2698 
2699 /* ARGSUSED */
2700 int32_t
2701 ud_putapage(struct vnode *vp,
2702         page_t *pp, u_offset_t *offp,
2703         size_t *lenp, int32_t flags, struct cred *cr)
2704 {
2705         daddr_t bn;
2706         size_t io_len;
2707         struct ud_inode *ip;
2708         int32_t error = 0, contig, multi_io = 0;
2709         struct udf_vfs *udf_vfsp;
2710         u_offset_t off, io_off;
2711         caddr_t kaddr, caddr;
2712         struct buf *bp = NULL;
2713         int32_t lbmask;
2714         uint64_t isize;
2715         uint16_t crc_len;
2716         struct file_entry *fe;
2717 
2718         ud_printf("ud_putapage\n");
2719 
2720         ip = VTOI(vp);
2721         ASSERT(ip);
2722         ASSERT(RW_LOCK_HELD(&ip->i_contents));
2723         lbmask = ip->i_udf->udf_lbmask;
2724         isize = (ip->i_size + lbmask) & (~lbmask);
2725 
2726         udf_vfsp = ip->i_udf;
2727         ASSERT(udf_vfsp->udf_flags & UDF_FL_RW);
2728 
2729         /*
2730          * If the modified time on the inode has not already been
2731          * set elsewhere (e.g. for write/setattr) we set the time now.
2732          * This gives us approximate modified times for mmap'ed files
2733          * which are modified via stores in the user address space.
2734          */
2735         if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) {
2736                 mutex_enter(&ip->i_tlock);
2737                 ip->i_flag |= IUPD;
2738                 ITIMES_NOLOCK(ip);
2739                 mutex_exit(&ip->i_tlock);
2740         }
2741 
2742 
2743         /*
2744          * Align the request to a block boundry (for old file systems),
2745          * and go ask bmap() how contiguous things are for this file.
2746          */
2747         off = pp->p_offset & ~(offset_t)lbmask;
2748                                 /* block align it */
2749 
2750 
2751         if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2752                 ASSERT(ip->i_size <= ip->i_max_emb);
2753 
2754                 pp = pvn_write_kluster(vp, pp, &io_off,
2755                     &io_len, off, PAGESIZE, flags);
2756                 if (io_len == 0) {
2757                         io_len = PAGESIZE;
2758                 }
2759 
2760                 bp = ud_bread(ip->i_dev,
2761                     ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
2762                     udf_vfsp->udf_lbsize);
2763                 fe = (struct file_entry *)bp->b_un.b_addr;
2764                 if ((bp->b_flags & B_ERROR) ||
2765                     (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
2766                     ip->i_icb_block,
2767                     1, udf_vfsp->udf_lbsize) != 0)) {
2768                         if (pp != NULL)
2769                                 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2770                         if (bp->b_flags & B_ERROR) {
2771                                 error = EIO;
2772                         } else {
2773                                 error = EINVAL;
2774                         }
2775                         brelse(bp);
2776                         return (error);
2777                 }
2778                 if ((bp->b_error == 0) &&
2779                     (bp->b_resid == 0)) {
2780 
2781                         caddr = bp->b_un.b_addr + ip->i_data_off;
2782                         kaddr = (caddr_t)ppmapin(pp,
2783                             PROT_READ | PROT_WRITE, (caddr_t)-1);
2784                         (void) kcopy(kaddr, caddr, ip->i_size);
2785                         ppmapout(kaddr);
2786                 }
2787                 crc_len = offsetof(struct file_entry, fe_spec) +
2788                     SWAP_32(fe->fe_len_ear);
2789                 crc_len += ip->i_size;
2790                 ud_make_tag(ip->i_udf, &fe->fe_tag,
2791                     UD_FILE_ENTRY, ip->i_icb_block, crc_len);
2792 
2793                 bwrite(bp);
2794 
2795                 if (flags & B_ASYNC) {
2796                         pvn_write_done(pp, flags);
2797                 }
2798                 contig = ip->i_size;
2799         } else {
2800 
2801                 if (error = ud_bmap_read(ip, off, &bn, &contig)) {
2802                         goto out;
2803                 }
2804                 contig = MIN(contig, PAGESIZE);
2805                 contig = (contig + lbmask) & (~lbmask);
2806 
2807                 if (contig < PAGESIZE) {
2808                         uint64_t count;
2809 
2810                         count = isize - off;
2811                         if (contig != count) {
2812                                 multi_io = 1;
2813                                 contig = (int32_t)(MIN(count, PAGESIZE));
2814                         }
2815                 }
2816 
2817                 if ((off + contig) > isize) {
2818                         contig = isize - off;
2819                 }
2820 
2821                 if (contig > PAGESIZE) {
2822                         if (contig & PAGEOFFSET) {
2823                                 contig &= PAGEMASK;
2824                         }
2825                 }
2826 
2827                 pp = pvn_write_kluster(vp, pp, &io_off,
2828                     &io_len, off, contig, flags);
2829                 if (io_len == 0) {
2830                         io_len = PAGESIZE;
2831                 }
2832 
2833                 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags);
2834                 ASSERT(bp != NULL);
2835 
2836                 bp->b_edev = ip->i_dev;
2837                 bp->b_dev = cmpdev(ip->i_dev);
2838                 bp->b_blkno = bn;
2839                 bp->b_un.b_addr = 0;
2840                 bp->b_file = vp;
2841                 bp->b_offset = (offset_t)off;
2842 
2843 
2844                 /*
2845                  * write throttle
2846                  */
2847                 ASSERT(bp->b_iodone == NULL);
2848                 bp->b_iodone = ud_iodone;
2849                 mutex_enter(&ip->i_tlock);
2850                 ip->i_writes += bp->b_bcount;
2851                 mutex_exit(&ip->i_tlock);
2852 
2853                 if (multi_io == 0) {
2854 
2855                         (void) bdev_strategy(bp);
2856                 } else {
2857                         error = ud_multi_strat(ip, pp, bp, off);
2858                         if (error != 0) {
2859                                 goto out;
2860                         }
2861                 }
2862 
2863                 if ((flags & B_ASYNC) == 0) {
2864                         /*
2865                          * Wait for i/o to complete.
2866                          */
2867                         error = biowait(bp);
2868                         pageio_done(bp);
2869                 }
2870         }
2871 
2872         if ((flags & B_ASYNC) == 0) {
2873                 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
2874         }
2875 
2876         pp = NULL;
2877 
2878 out:
2879         if (error != 0 && pp != NULL) {
2880                 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2881         }
2882 
2883         if (offp) {
2884                 *offp = io_off;
2885         }
2886         if (lenp) {
2887                 *lenp = io_len;
2888         }
2889 
2890         return (error);
2891 }
2892 
2893 
2894 int32_t
2895 ud_iodone(struct buf *bp)
2896 {
2897         struct ud_inode *ip;
2898 
2899         ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ));
2900 
2901         bp->b_iodone = NULL;
2902 
2903         ip = VTOI(bp->b_pages->p_vnode);
2904 
2905         mutex_enter(&ip->i_tlock);
2906         if (ip->i_writes >= ud_LW) {
2907                 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) {
2908                         if (ud_WRITES) {
2909                                 cv_broadcast(&ip->i_wrcv); /* wake all up */
2910                         }
2911                 }
2912         } else {
2913                 ip->i_writes -= bp->b_bcount;
2914         }
2915         mutex_exit(&ip->i_tlock);
2916         iodone(bp);
2917         return (0);
2918 }
2919 
2920 /* ARGSUSED3 */
2921 int32_t
2922 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
2923 {
2924         struct vnode *vp;
2925         struct udf_vfs *udf_vfsp;
2926         krw_t rwtype;
2927         caddr_t base;
2928         uint32_t flags;
2929         int32_t error, n, on, mapon, dofree;
2930         u_offset_t off;
2931         long oresid = uio->uio_resid;
2932 
2933         ASSERT(RW_LOCK_HELD(&ip->i_contents));
2934         if ((ip->i_type != VREG) &&
2935             (ip->i_type != VDIR) &&
2936             (ip->i_type != VLNK)) {
2937                 return (EIO);
2938         }
2939 
2940         if (uio->uio_loffset > MAXOFFSET_T) {
2941                 return (0);
2942         }
2943 
2944         if ((uio->uio_loffset < (offset_t)0) ||
2945             ((uio->uio_loffset + uio->uio_resid) < 0)) {
2946                 return (EINVAL);
2947         }
2948         if (uio->uio_resid == 0) {
2949                 return (0);
2950         }
2951 
2952         vp = ITOV(ip);
2953         udf_vfsp = ip->i_udf;
2954         mutex_enter(&ip->i_tlock);
2955         ip->i_flag |= IACC;
2956         mutex_exit(&ip->i_tlock);
2957 
2958         rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER);
2959 
2960         do {
2961                 offset_t diff;
2962                 u_offset_t uoff = uio->uio_loffset;
2963                 off = uoff & (offset_t)MAXBMASK;
2964                 mapon = (int)(uoff & (offset_t)MAXBOFFSET);
2965                 on = (int)blkoff(udf_vfsp, uoff);
2966                 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
2967 
2968                 diff = ip->i_size - uoff;
2969 
2970                 if (diff <= (offset_t)0) {
2971                         error = 0;
2972                         goto out;
2973                 }
2974                 if (diff < (offset_t)n) {
2975                         n = (int)diff;
2976                 }
2977                 dofree = ud_freebehind &&
2978                     ip->i_nextr == (off & PAGEMASK) &&
2979                     off > ud_smallfile;
2980 
2981                 if (rwtype == RW_READER) {
2982                         rw_exit(&ip->i_contents);
2983                 }
2984 
2985                 base = segmap_getmapflt(segkmap, vp, (off + mapon),
2986                     (uint32_t)n, 1, S_READ);
2987                 error = uiomove(base + mapon, (long)n, UIO_READ, uio);
2988 
2989                 flags = 0;
2990                 if (!error) {
2991                         /*
2992                          * If read a whole block, or read to eof,
2993                          * won't need this buffer again soon.
2994                          */
2995                         if (n + on == MAXBSIZE && ud_freebehind && dofree &&
2996                             freemem < lotsfree + pages_before_pager) {
2997                                 flags = SM_FREE | SM_DONTNEED |SM_ASYNC;
2998                         }
2999                         /*
3000                          * In POSIX SYNC (FSYNC and FDSYNC) read mode,
3001                          * we want to make sure that the page which has
3002                          * been read, is written on disk if it is dirty.
3003                          * And corresponding indirect blocks should also
3004                          * be flushed out.
3005                          */
3006                         if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) {
3007                                 flags &= ~SM_ASYNC;
3008                                 flags |= SM_WRITE;
3009                         }
3010                         error = segmap_release(segkmap, base, flags);
3011                 } else    {
3012                         (void) segmap_release(segkmap, base, flags);
3013                 }
3014 
3015                 if (rwtype == RW_READER) {
3016                         rw_enter(&ip->i_contents, rwtype);
3017                 }
3018         } while (error == 0 && uio->uio_resid > 0 && n != 0);
3019 out:
3020         /*
3021          * Inode is updated according to this table if FRSYNC is set.
3022          *
3023          *      FSYNC   FDSYNC(posix.4)
3024          *      --------------------------
3025          *      always  IATTCHG|IBDWRITE
3026          */
3027         if (ioflag & FRSYNC) {
3028                 if ((ioflag & FSYNC) ||
3029                     ((ioflag & FDSYNC) &&
3030                     (ip->i_flag & (IATTCHG|IBDWRITE)))) {
3031                 rw_exit(&ip->i_contents);
3032                 rw_enter(&ip->i_contents, RW_WRITER);
3033                 ud_iupdat(ip, 1);
3034                 }
3035         }
3036         /*
3037          * If we've already done a partial read, terminate
3038          * the read but return no error.
3039          */
3040         if (oresid != uio->uio_resid) {
3041                 error = 0;
3042         }
3043         ITIMES(ip);
3044 
3045         return (error);
3046 }
3047 
3048 int32_t
3049 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
3050 {
3051         caddr_t base;
3052         struct vnode *vp;
3053         struct udf_vfs *udf_vfsp;
3054         uint32_t flags;
3055         int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0;
3056         int32_t pagecreate, newpage;
3057         uint64_t old_i_size;
3058         u_offset_t off;
3059         long start_resid = uio->uio_resid, premove_resid;
3060         rlim64_t limit = uio->uio_limit;
3061 
3062 
3063         ASSERT(RW_WRITE_HELD(&ip->i_contents));
3064         if ((ip->i_type != VREG) &&
3065             (ip->i_type != VDIR) &&
3066             (ip->i_type != VLNK)) {
3067                 return (EIO);
3068         }
3069 
3070         if (uio->uio_loffset >= MAXOFFSET_T) {
3071                 return (EFBIG);
3072         }
3073         /*
3074          * see udf_l_pathconf
3075          */
3076         if (limit > (((uint64_t)1 << 40) - 1)) {
3077                 limit = ((uint64_t)1 << 40) - 1;
3078         }
3079         if (uio->uio_loffset >= limit) {
3080                 proc_t *p = ttoproc(curthread);
3081 
3082                 mutex_enter(&p->p_lock);
3083                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
3084                     p, RCA_UNSAFE_SIGINFO);
3085                 mutex_exit(&p->p_lock);
3086                 return (EFBIG);
3087         }
3088         if ((uio->uio_loffset < (offset_t)0) ||
3089             ((uio->uio_loffset + uio->uio_resid) < 0)) {
3090                 return (EINVAL);
3091         }
3092         if (uio->uio_resid == 0) {
3093                 return (0);
3094         }
3095 
3096         mutex_enter(&ip->i_tlock);
3097         ip->i_flag |= INOACC;
3098 
3099         if (ioflag & (FSYNC | FDSYNC)) {
3100                 ip->i_flag |= ISYNC;
3101                 iupdat_flag = 1;
3102         }
3103         mutex_exit(&ip->i_tlock);
3104 
3105         udf_vfsp = ip->i_udf;
3106         vp = ITOV(ip);
3107 
3108         do {
3109                 u_offset_t uoff = uio->uio_loffset;
3110                 off = uoff & (offset_t)MAXBMASK;
3111                 mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3112                 on = (int)blkoff(udf_vfsp, uoff);
3113                 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3114 
3115                 if (ip->i_type == VREG && uoff + n >= limit) {
3116                         if (uoff >= limit) {
3117                                 error = EFBIG;
3118                                 goto out;
3119                         }
3120                         n = (int)(limit - (rlim64_t)uoff);
3121                 }
3122                 if (uoff + n > ip->i_size) {
3123                         /*
3124                          * We are extending the length of the file.
3125                          * bmap is used so that we are sure that
3126                          * if we need to allocate new blocks, that it
3127                          * is done here before we up the file size.
3128                          */
3129                         error = ud_bmap_write(ip, uoff,
3130                             (int)(on + n), mapon == 0, cr);
3131                         if (error) {
3132                                 break;
3133                         }
3134                         i_size_changed = 1;
3135                         old_i_size = ip->i_size;
3136                         ip->i_size = uoff + n;
3137                         /*
3138                          * If we are writing from the beginning of
3139                          * the mapping, we can just create the
3140                          * pages without having to read them.
3141                          */
3142                         pagecreate = (mapon == 0);
3143                 } else if (n == MAXBSIZE) {
3144                         /*
3145                          * Going to do a whole mappings worth,
3146                          * so we can just create the pages w/o
3147                          * having to read them in.  But before
3148                          * we do that, we need to make sure any
3149                          * needed blocks are allocated first.
3150                          */
3151                         error = ud_bmap_write(ip, uoff,
3152                             (int)(on + n), 1, cr);
3153                         if (error) {
3154                                 break;
3155                         }
3156                         pagecreate = 1;
3157                 } else {
3158                         pagecreate = 0;
3159                 }
3160 
3161                 rw_exit(&ip->i_contents);
3162 
3163                 /*
3164                  * Touch the page and fault it in if it is not in
3165                  * core before segmap_getmapflt can lock it. This
3166                  * is to avoid the deadlock if the buffer is mapped
3167                  * to the same file through mmap which we want to
3168                  * write to.
3169                  */
3170                 uio_prefaultpages((long)n, uio);
3171 
3172                 base = segmap_getmapflt(segkmap, vp, (off + mapon),
3173                     (uint32_t)n, !pagecreate, S_WRITE);
3174 
3175                 /*
3176                  * segmap_pagecreate() returns 1 if it calls
3177                  * page_create_va() to allocate any pages.
3178                  */
3179                 newpage = 0;
3180                 if (pagecreate) {
3181                         newpage = segmap_pagecreate(segkmap, base,
3182                             (size_t)n, 0);
3183                 }
3184 
3185                 premove_resid = uio->uio_resid;
3186                 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
3187 
3188                 if (pagecreate &&
3189                     uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
3190                         /*
3191                          * We created pages w/o initializing them completely,
3192                          * thus we need to zero the part that wasn't set up.
3193                          * This happens on most EOF write cases and if
3194                          * we had some sort of error during the uiomove.
3195                          */
3196                         int nzero, nmoved;
3197 
3198                         nmoved = (int)(uio->uio_loffset - (off + mapon));
3199                         ASSERT(nmoved >= 0 && nmoved <= n);
3200                         nzero = roundup(on + n, PAGESIZE) - nmoved;
3201                         ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE);
3202                         (void) kzero(base + mapon + nmoved, (uint32_t)nzero);
3203                 }
3204 
3205                 /*
3206                  * Unlock the pages allocated by page_create_va()
3207                  * in segmap_pagecreate()
3208                  */
3209                 if (newpage) {
3210                         segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
3211                 }
3212 
3213                 if (error) {
3214                         /*
3215                          * If we failed on a write, we may have already
3216                          * allocated file blocks as well as pages.  It's
3217                          * hard to undo the block allocation, but we must
3218                          * be sure to invalidate any pages that may have
3219                          * been allocated.
3220                          */
3221                         (void) segmap_release(segkmap, base, SM_INVAL);
3222                 } else {
3223                         flags = 0;
3224                         /*
3225                          * Force write back for synchronous write cases.
3226                          */
3227                         if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) {
3228                                 /*
3229                                  * If the sticky bit is set but the
3230                                  * execute bit is not set, we do a
3231                                  * synchronous write back and free
3232                                  * the page when done.  We set up swap
3233                                  * files to be handled this way to
3234                                  * prevent servers from keeping around
3235                                  * the client's swap pages too long.
3236                                  * XXX - there ought to be a better way.
3237                                  */
3238                                 if (IS_SWAPVP(vp)) {
3239                                         flags = SM_WRITE | SM_FREE |
3240                                             SM_DONTNEED;
3241                                         iupdat_flag = 0;
3242                                 } else {
3243                                         flags = SM_WRITE;
3244                                 }
3245                         } else if (((mapon + n) == MAXBSIZE) ||
3246                             IS_SWAPVP(vp)) {
3247                                 /*
3248                                  * Have written a whole block.
3249                                  * Start an asynchronous write and
3250                                  * mark the buffer to indicate that
3251                                  * it won't be needed again soon.
3252                                  */
3253                                 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED;
3254                         }
3255                         error = segmap_release(segkmap, base, flags);
3256 
3257                         /*
3258                          * If the operation failed and is synchronous,
3259                          * then we need to unwind what uiomove() last
3260                          * did so we can potentially return an error to
3261                          * the caller.  If this write operation was
3262                          * done in two pieces and the first succeeded,
3263                          * then we won't return an error for the second
3264                          * piece that failed.  However, we only want to
3265                          * return a resid value that reflects what was
3266                          * really done.
3267                          *
3268                          * Failures for non-synchronous operations can
3269                          * be ignored since the page subsystem will
3270                          * retry the operation until it succeeds or the
3271                          * file system is unmounted.
3272                          */
3273                         if (error) {
3274                                 if ((ioflag & (FSYNC | FDSYNC)) ||
3275                                     ip->i_type == VDIR) {
3276                                         uio->uio_resid = premove_resid;
3277                                 } else {
3278                                         error = 0;
3279                                 }
3280                         }
3281                 }
3282 
3283                 /*
3284                  * Re-acquire contents lock.
3285                  */
3286                 rw_enter(&ip->i_contents, RW_WRITER);
3287                 /*
3288                  * If the uiomove() failed or if a synchronous
3289                  * page push failed, fix up i_size.
3290                  */
3291                 if (error) {
3292                         if (i_size_changed) {
3293                                 /*
3294                                  * The uiomove failed, and we
3295                                  * allocated blocks,so get rid
3296                                  * of them.
3297                                  */
3298                                 (void) ud_itrunc(ip, old_i_size, 0, cr);
3299                         }
3300                 } else {
3301                         /*
3302                          * XXX - Can this be out of the loop?
3303                          */
3304                         ip->i_flag |= IUPD | ICHG;
3305                         if (i_size_changed) {
3306                                 ip->i_flag |= IATTCHG;
3307                         }
3308                         if ((ip->i_perm & (IEXEC | (IEXEC >> 5) |
3309                             (IEXEC >> 10))) != 0 &&
3310                             (ip->i_char & (ISUID | ISGID)) != 0 &&
3311                             secpolicy_vnode_setid_retain(cr,
3312                             (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) {
3313                                 /*
3314                                  * Clear Set-UID & Set-GID bits on
3315                                  * successful write if not privileged
3316                                  * and at least one of the execute bits
3317                                  * is set.  If we always clear Set-GID,
3318                                  * mandatory file and record locking is
3319                                  * unuseable.
3320                                  */
3321                                 ip->i_char &= ~(ISUID | ISGID);
3322                         }
3323                 }
3324         } while (error == 0 && uio->uio_resid > 0 && n != 0);
3325 
3326 out:
3327         /*
3328          * Inode is updated according to this table -
3329          *
3330          *      FSYNC   FDSYNC(posix.4)
3331          *      --------------------------
3332          *      always@ IATTCHG|IBDWRITE
3333          *
3334          * @ -  If we are doing synchronous write the only time we should
3335          *      not be sync'ing the ip here is if we have the stickyhack
3336          *      activated, the file is marked with the sticky bit and
3337          *      no exec bit, the file length has not been changed and
3338          *      no new blocks have been allocated during this write.
3339          */
3340         if ((ip->i_flag & ISYNC) != 0) {
3341                 /*
3342                  * we have eliminated nosync
3343                  */
3344                 if ((ip->i_flag & (IATTCHG|IBDWRITE)) ||
3345                     ((ioflag & FSYNC) && iupdat_flag)) {
3346                         ud_iupdat(ip, 1);
3347                 }
3348         }
3349 
3350         /*
3351          * If we've already done a partial-write, terminate
3352          * the write but return no error.
3353          */
3354         if (start_resid != uio->uio_resid) {
3355                 error = 0;
3356         }
3357         ip->i_flag &= ~(INOACC | ISYNC);
3358         ITIMES_NOLOCK(ip);
3359 
3360         return (error);
3361 }
3362 
3363 int32_t
3364 ud_multi_strat(struct ud_inode *ip,
3365         page_t *pp, struct buf *bp, u_offset_t start)
3366 {
3367         daddr_t bn;
3368         int32_t error = 0, io_count, contig, alloc_sz, i;
3369         uint32_t io_off;
3370         mio_master_t *mm = NULL;
3371         mio_slave_t *ms = NULL;
3372         struct buf *rbp;
3373 
3374         ASSERT(!(start & PAGEOFFSET));
3375 
3376         /*
3377          * Figure out how many buffers to allocate
3378          */
3379         io_count = 0;
3380         for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3381                 contig = 0;
3382                 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off),
3383                     &bn, &contig)) {
3384                         goto end;
3385                 }
3386                 if (contig == 0) {
3387                         goto end;
3388                 }
3389                 contig = MIN(contig, PAGESIZE - io_off);
3390                 if (bn != UDF_HOLE) {
3391                         io_count ++;
3392                 } else {
3393                         /*
3394                          * HOLE
3395                          */
3396                         if (bp->b_flags & B_READ) {
3397 
3398                                 /*
3399                                  * This is a hole and is read
3400                                  * it should be filled with 0's
3401                                  */
3402                                 pagezero(pp, io_off, contig);
3403                         }
3404                 }
3405         }
3406 
3407 
3408         if (io_count != 0) {
3409 
3410                 /*
3411                  * Allocate memory for all the
3412                  * required number of buffers
3413                  */
3414                 alloc_sz = sizeof (mio_master_t) +
3415                     (sizeof (mio_slave_t) * io_count);
3416                 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP);
3417                 if (mm == NULL) {
3418                         error = ENOMEM;
3419                         goto end;
3420                 }
3421 
3422                 /*
3423                  * initialize master
3424                  */
3425                 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL);
3426                 mm->mm_size = alloc_sz;
3427                 mm->mm_bp = bp;
3428                 mm->mm_resid = 0;
3429                 mm->mm_error = 0;
3430                 mm->mm_index = master_index++;
3431 
3432                 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3433 
3434                 /*
3435                  * Initialize buffers
3436                  */
3437                 io_count = 0;
3438                 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3439                         contig = 0;
3440                         if (error = ud_bmap_read(ip,
3441                             (u_offset_t)(start + io_off),
3442                             &bn, &contig)) {
3443                                 goto end;
3444                         }
3445                         ASSERT(contig);
3446                         if ((io_off + contig) > bp->b_bcount) {
3447                                 contig = bp->b_bcount - io_off;
3448                         }
3449                         if (bn != UDF_HOLE) {
3450                                 /*
3451                                  * Clone the buffer
3452                                  * and prepare to start I/O
3453                                  */
3454                                 ms->ms_ptr = mm;
3455                                 bioinit(&ms->ms_buf);
3456                                 rbp = bioclone(bp, io_off, (size_t)contig,
3457                                     bp->b_edev, bn, ud_slave_done,
3458                                     &ms->ms_buf, KM_NOSLEEP);
3459                                 ASSERT(rbp == &ms->ms_buf);
3460                                 mm->mm_resid += contig;
3461                                 io_count++;
3462                                 ms ++;
3463                         }
3464                 }
3465 
3466                 /*
3467                  * Start I/O's
3468                  */
3469                 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3470                 for (i = 0; i < io_count; i++) {
3471                         (void) bdev_strategy(&ms->ms_buf);
3472                         ms ++;
3473                 }
3474         }
3475 
3476 end:
3477         if (error != 0) {
3478                 bp->b_flags |= B_ERROR;
3479                 bp->b_error = error;
3480                 if (mm != NULL) {
3481                         mutex_destroy(&mm->mm_mutex);
3482                         kmem_free(mm, mm->mm_size);
3483                 }
3484         }
3485         return (error);
3486 }
3487 
3488 int32_t
3489 ud_slave_done(struct buf *bp)
3490 {
3491         mio_master_t *mm;
3492         int32_t resid;
3493 
3494         ASSERT(SEMA_HELD(&bp->b_sem));
3495         ASSERT((bp->b_flags & B_DONE) == 0);
3496 
3497         mm = ((mio_slave_t *)bp)->ms_ptr;
3498 
3499         /*
3500          * Propagate error and byte count info from slave struct to
3501          * the master struct
3502          */
3503         mutex_enter(&mm->mm_mutex);
3504         if (bp->b_flags & B_ERROR) {
3505 
3506                 /*
3507                  * If multiple slave buffers get
3508                  * error we forget the old errors
3509                  * this is ok because we any way
3510                  * cannot return multiple errors
3511                  */
3512                 mm->mm_error = bp->b_error;
3513         }
3514         mm->mm_resid -= bp->b_bcount;
3515         resid = mm->mm_resid;
3516         mutex_exit(&mm->mm_mutex);
3517 
3518         /*
3519          * free up the resources allocated to cloned buffers.
3520          */
3521         bp_mapout(bp);
3522         biofini(bp);
3523 
3524         if (resid == 0) {
3525 
3526                 /*
3527                  * This is the last I/O operation
3528                  * clean up and return the original buffer
3529                  */
3530                 if (mm->mm_error) {
3531                         mm->mm_bp->b_flags |= B_ERROR;
3532                         mm->mm_bp->b_error = mm->mm_error;
3533                 }
3534                 biodone(mm->mm_bp);
3535                 mutex_destroy(&mm->mm_mutex);
3536                 kmem_free(mm, mm->mm_size);
3537         }
3538         return (0);
3539 }