1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2015, Joyent, Inc. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/time.h> 34 #include <sys/systm.h> 35 #include <sys/sysmacros.h> 36 #include <sys/resource.h> 37 #include <sys/signal.h> 38 #include <sys/cred.h> 39 #include <sys/user.h> 40 #include <sys/buf.h> 41 #include <sys/vfs.h> 42 #include <sys/vfs_opreg.h> 43 #include <sys/stat.h> 44 #include <sys/vnode.h> 45 #include <sys/mode.h> 46 #include <sys/proc.h> 47 #include <sys/disp.h> 48 #include <sys/file.h> 49 #include <sys/fcntl.h> 50 #include <sys/flock.h> 51 #include <sys/kmem.h> 52 #include <sys/uio.h> 53 #include <sys/dnlc.h> 54 #include <sys/conf.h> 55 #include <sys/errno.h> 56 #include <sys/mman.h> 57 #include <sys/fbuf.h> 58 #include <sys/pathname.h> 59 #include <sys/debug.h> 60 #include <sys/vmsystm.h> 61 #include <sys/cmn_err.h> 62 #include <sys/dirent.h> 63 #include <sys/errno.h> 64 #include <sys/modctl.h> 65 #include <sys/statvfs.h> 66 #include <sys/mount.h> 67 #include <sys/sunddi.h> 68 #include <sys/bootconf.h> 69 #include <sys/policy.h> 70 71 #include <vm/hat.h> 72 #include <vm/page.h> 73 #include <vm/pvn.h> 74 #include <vm/as.h> 75 #include <vm/seg.h> 76 #include <vm/seg_map.h> 77 #include <vm/seg_kmem.h> 78 #include <vm/seg_vn.h> 79 #include <vm/rm.h> 80 #include <vm/page.h> 81 #include <sys/swap.h> 82 83 #include <fs/fs_subr.h> 84 85 #include <sys/fs/udf_volume.h> 86 #include <sys/fs/udf_inode.h> 87 88 static int32_t udf_open(struct vnode **, 89 int32_t, struct cred *, caller_context_t *); 90 static int32_t udf_close(struct vnode *, 91 int32_t, int32_t, offset_t, struct cred *, caller_context_t *); 92 static int32_t udf_read(struct vnode *, 93 struct uio *, int32_t, struct cred *, caller_context_t *); 94 static int32_t udf_write(struct vnode *, 95 struct uio *, int32_t, struct cred *, caller_context_t *); 96 static int32_t udf_ioctl(struct vnode *, 97 int32_t, intptr_t, int32_t, struct cred *, int32_t *, 98 caller_context_t *); 99 static int32_t udf_getattr(struct vnode *, 100 struct vattr *, int32_t, struct cred *, caller_context_t *); 101 static int32_t udf_setattr(struct vnode *, 102 struct vattr *, int32_t, struct cred *, caller_context_t *); 103 static int32_t udf_access(struct vnode *, 104 int32_t, int32_t, struct cred *, caller_context_t *); 105 static int32_t udf_lookup(struct vnode *, 106 char *, struct vnode **, struct pathname *, 107 int32_t, struct vnode *, struct cred *, 108 caller_context_t *, int *, pathname_t *); 109 static int32_t udf_create(struct vnode *, 110 char *, struct vattr *, enum vcexcl, 111 int32_t, struct vnode **, struct cred *, int32_t, 112 caller_context_t *, vsecattr_t *); 113 static int32_t udf_remove(struct vnode *, 114 char *, struct cred *, caller_context_t *, int); 115 static int32_t udf_link(struct vnode *, 116 struct vnode *, char *, struct cred *, caller_context_t *, int); 117 static int32_t udf_rename(struct vnode *, 118 char *, struct vnode *, char *, struct cred *, caller_context_t *, int); 119 static int32_t udf_mkdir(struct vnode *, 120 char *, struct vattr *, struct vnode **, struct cred *, 121 caller_context_t *, int, vsecattr_t *); 122 static int32_t udf_rmdir(struct vnode *, 123 char *, struct vnode *, struct cred *, caller_context_t *, int); 124 static int32_t udf_readdir(struct vnode *, 125 struct uio *, struct cred *, int32_t *, caller_context_t *, int); 126 static int32_t udf_symlink(struct vnode *, 127 char *, struct vattr *, char *, struct cred *, caller_context_t *, int); 128 static int32_t udf_readlink(struct vnode *, 129 struct uio *, struct cred *, caller_context_t *); 130 static int32_t udf_fsync(struct vnode *, 131 int32_t, struct cred *, caller_context_t *); 132 static void udf_inactive(struct vnode *, 133 struct cred *, caller_context_t *); 134 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *); 135 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *); 136 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *); 137 static int32_t udf_seek(struct vnode *, offset_t, offset_t *, 138 caller_context_t *); 139 static int32_t udf_frlock(struct vnode *, int32_t, 140 struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *, 141 caller_context_t *); 142 static int32_t udf_space(struct vnode *, int32_t, 143 struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *); 144 static int32_t udf_getpage(struct vnode *, offset_t, 145 size_t, uint32_t *, struct page **, size_t, 146 struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *); 147 static int32_t udf_putpage(struct vnode *, offset_t, 148 size_t, int32_t, struct cred *, caller_context_t *); 149 static int32_t udf_map(struct vnode *, offset_t, struct as *, 150 caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *, 151 caller_context_t *); 152 static int32_t udf_addmap(struct vnode *, offset_t, struct as *, 153 caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *, 154 caller_context_t *); 155 static int32_t udf_delmap(struct vnode *, offset_t, struct as *, 156 caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *, 157 caller_context_t *); 158 static int32_t udf_l_pathconf(struct vnode *, int32_t, 159 ulong_t *, struct cred *, caller_context_t *); 160 static int32_t udf_pageio(struct vnode *, struct page *, 161 u_offset_t, size_t, int32_t, struct cred *, caller_context_t *); 162 163 int32_t ud_getpage_miss(struct vnode *, u_offset_t, 164 size_t, struct seg *, caddr_t, page_t *pl[], 165 size_t, enum seg_rw, int32_t); 166 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t); 167 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *); 168 int32_t ud_page_fill(struct ud_inode *, page_t *, 169 u_offset_t, uint32_t, u_offset_t *); 170 int32_t ud_iodone(struct buf *); 171 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *); 172 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *); 173 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t); 174 int32_t ud_slave_done(struct buf *); 175 176 /* 177 * Structures to control multiple IO operations to get or put pages 178 * that are backed by discontiguous blocks. The master struct is 179 * a dummy that holds the original bp from pageio_setup. The 180 * slave struct holds the working bp's to do the actual IO. Once 181 * all the slave IOs complete. The master is processed as if a single 182 * IO op has completed. 183 */ 184 uint32_t master_index = 0; 185 typedef struct mio_master { 186 kmutex_t mm_mutex; /* protect the fields below */ 187 int32_t mm_size; 188 buf_t *mm_bp; /* original bp */ 189 int32_t mm_resid; /* bytes remaining to transfer */ 190 int32_t mm_error; /* accumulated error from slaves */ 191 int32_t mm_index; /* XXX debugging */ 192 } mio_master_t; 193 194 typedef struct mio_slave { 195 buf_t ms_buf; /* working buffer for this IO chunk */ 196 mio_master_t *ms_ptr; /* pointer to master */ 197 } mio_slave_t; 198 199 struct vnodeops *udf_vnodeops; 200 201 const fs_operation_def_t udf_vnodeops_template[] = { 202 { VOPNAME_OPEN, { .vop_open = udf_open } }, 203 { VOPNAME_CLOSE, { .vop_close = udf_close } }, 204 { VOPNAME_READ, { .vop_read = udf_read } }, 205 { VOPNAME_WRITE, { .vop_write = udf_write } }, 206 { VOPNAME_IOCTL, { .vop_ioctl = udf_ioctl } }, 207 { VOPNAME_GETATTR, { .vop_getattr = udf_getattr } }, 208 { VOPNAME_SETATTR, { .vop_setattr = udf_setattr } }, 209 { VOPNAME_ACCESS, { .vop_access = udf_access } }, 210 { VOPNAME_LOOKUP, { .vop_lookup = udf_lookup } }, 211 { VOPNAME_CREATE, { .vop_create = udf_create } }, 212 { VOPNAME_REMOVE, { .vop_remove = udf_remove } }, 213 { VOPNAME_LINK, { .vop_link = udf_link } }, 214 { VOPNAME_RENAME, { .vop_rename = udf_rename } }, 215 { VOPNAME_MKDIR, { .vop_mkdir = udf_mkdir } }, 216 { VOPNAME_RMDIR, { .vop_rmdir = udf_rmdir } }, 217 { VOPNAME_READDIR, { .vop_readdir = udf_readdir } }, 218 { VOPNAME_SYMLINK, { .vop_symlink = udf_symlink } }, 219 { VOPNAME_READLINK, { .vop_readlink = udf_readlink } }, 220 { VOPNAME_FSYNC, { .vop_fsync = udf_fsync } }, 221 { VOPNAME_INACTIVE, { .vop_inactive = udf_inactive } }, 222 { VOPNAME_FID, { .vop_fid = udf_fid } }, 223 { VOPNAME_RWLOCK, { .vop_rwlock = udf_rwlock } }, 224 { VOPNAME_RWUNLOCK, { .vop_rwunlock = udf_rwunlock } }, 225 { VOPNAME_SEEK, { .vop_seek = udf_seek } }, 226 { VOPNAME_FRLOCK, { .vop_frlock = udf_frlock } }, 227 { VOPNAME_SPACE, { .vop_space = udf_space } }, 228 { VOPNAME_GETPAGE, { .vop_getpage = udf_getpage } }, 229 { VOPNAME_PUTPAGE, { .vop_putpage = udf_putpage } }, 230 { VOPNAME_MAP, { .vop_map = udf_map } }, 231 { VOPNAME_ADDMAP, { .vop_addmap = udf_addmap } }, 232 { VOPNAME_DELMAP, { .vop_delmap = udf_delmap } }, 233 { VOPNAME_PATHCONF, { .vop_pathconf = udf_l_pathconf } }, 234 { VOPNAME_PAGEIO, { .vop_pageio = udf_pageio } }, 235 { VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support } }, 236 { NULL, { NULL } } 237 }; 238 239 /* ARGSUSED */ 240 static int32_t 241 udf_open( 242 struct vnode **vpp, 243 int32_t flag, 244 struct cred *cr, 245 caller_context_t *ct) 246 { 247 ud_printf("udf_open\n"); 248 249 return (0); 250 } 251 252 /* ARGSUSED */ 253 static int32_t 254 udf_close( 255 struct vnode *vp, 256 int32_t flag, 257 int32_t count, 258 offset_t offset, 259 struct cred *cr, 260 caller_context_t *ct) 261 { 262 struct ud_inode *ip = VTOI(vp); 263 264 ud_printf("udf_close\n"); 265 266 ITIMES(ip); 267 268 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 269 cleanshares(vp, ttoproc(curthread)->p_pid); 270 271 /* 272 * Push partially filled cluster at last close. 273 * ``last close'' is approximated because the dnlc 274 * may have a hold on the vnode. 275 */ 276 if (vp->v_count <= 2 && vp->v_type != VBAD) { 277 struct ud_inode *ip = VTOI(vp); 278 if (ip->i_delaylen) { 279 (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen, 280 B_ASYNC | B_FREE, cr); 281 ip->i_delaylen = 0; 282 } 283 } 284 285 return (0); 286 } 287 288 /* ARGSUSED */ 289 static int32_t 290 udf_read( 291 struct vnode *vp, 292 struct uio *uiop, 293 int32_t ioflag, 294 struct cred *cr, 295 caller_context_t *ct) 296 { 297 struct ud_inode *ip = VTOI(vp); 298 int32_t error; 299 300 ud_printf("udf_read\n"); 301 302 #ifdef __lock_lint 303 rw_enter(&ip->i_rwlock, RW_READER); 304 #endif 305 306 ASSERT(RW_READ_HELD(&ip->i_rwlock)); 307 308 if (MANDLOCK(vp, ip->i_char)) { 309 /* 310 * udf_getattr ends up being called by chklock 311 */ 312 error = chklock(vp, FREAD, uiop->uio_loffset, 313 uiop->uio_resid, uiop->uio_fmode, ct); 314 if (error) { 315 goto end; 316 } 317 } 318 319 rw_enter(&ip->i_contents, RW_READER); 320 error = ud_rdip(ip, uiop, ioflag, cr); 321 rw_exit(&ip->i_contents); 322 323 end: 324 #ifdef __lock_lint 325 rw_exit(&ip->i_rwlock); 326 #endif 327 328 return (error); 329 } 330 331 332 int32_t ud_WRITES = 1; 333 int32_t ud_HW = 96 * 1024; 334 int32_t ud_LW = 64 * 1024; 335 int32_t ud_throttles = 0; 336 337 /* ARGSUSED */ 338 static int32_t 339 udf_write( 340 struct vnode *vp, 341 struct uio *uiop, 342 int32_t ioflag, 343 struct cred *cr, 344 caller_context_t *ct) 345 { 346 struct ud_inode *ip = VTOI(vp); 347 int32_t error = 0; 348 349 ud_printf("udf_write\n"); 350 351 #ifdef __lock_lint 352 rw_enter(&ip->i_rwlock, RW_WRITER); 353 #endif 354 355 ASSERT(RW_WRITE_HELD(&ip->i_rwlock)); 356 357 if (MANDLOCK(vp, ip->i_char)) { 358 /* 359 * ud_getattr ends up being called by chklock 360 */ 361 error = chklock(vp, FWRITE, uiop->uio_loffset, 362 uiop->uio_resid, uiop->uio_fmode, ct); 363 if (error) { 364 goto end; 365 } 366 } 367 /* 368 * Throttle writes. 369 */ 370 mutex_enter(&ip->i_tlock); 371 if (ud_WRITES && (ip->i_writes > ud_HW)) { 372 while (ip->i_writes > ud_HW) { 373 ud_throttles++; 374 cv_wait(&ip->i_wrcv, &ip->i_tlock); 375 } 376 } 377 mutex_exit(&ip->i_tlock); 378 379 /* 380 * Write to the file 381 */ 382 rw_enter(&ip->i_contents, RW_WRITER); 383 if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) { 384 /* 385 * In append mode start at end of file. 386 */ 387 uiop->uio_loffset = ip->i_size; 388 } 389 error = ud_wrip(ip, uiop, ioflag, cr); 390 rw_exit(&ip->i_contents); 391 392 end: 393 #ifdef __lock_lint 394 rw_exit(&ip->i_rwlock); 395 #endif 396 397 return (error); 398 } 399 400 /* ARGSUSED */ 401 static int32_t 402 udf_ioctl( 403 struct vnode *vp, 404 int32_t cmd, 405 intptr_t arg, 406 int32_t flag, 407 struct cred *cr, 408 int32_t *rvalp, 409 caller_context_t *ct) 410 { 411 return (ENOTTY); 412 } 413 414 /* ARGSUSED */ 415 static int32_t 416 udf_getattr( 417 struct vnode *vp, 418 struct vattr *vap, 419 int32_t flags, 420 struct cred *cr, 421 caller_context_t *ct) 422 { 423 struct ud_inode *ip = VTOI(vp); 424 425 ud_printf("udf_getattr\n"); 426 427 if (vap->va_mask == AT_SIZE) { 428 /* 429 * for performance, if only the size is requested don't bother 430 * with anything else. 431 */ 432 vap->va_size = ip->i_size; 433 return (0); 434 } 435 436 rw_enter(&ip->i_contents, RW_READER); 437 438 vap->va_type = vp->v_type; 439 vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 440 441 vap->va_uid = ip->i_uid; 442 vap->va_gid = ip->i_gid; 443 vap->va_fsid = ip->i_dev; 444 vap->va_nodeid = ip->i_icb_lbano; 445 vap->va_nlink = ip->i_nlink; 446 vap->va_size = ip->i_size; 447 vap->va_seq = ip->i_seq; 448 if (vp->v_type == VCHR || vp->v_type == VBLK) { 449 vap->va_rdev = ip->i_rdev; 450 } else { 451 vap->va_rdev = 0; 452 } 453 454 mutex_enter(&ip->i_tlock); 455 ITIMES_NOLOCK(ip); /* mark correct time in inode */ 456 vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec; 457 vap->va_atime.tv_nsec = ip->i_atime.tv_nsec; 458 vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec; 459 vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec; 460 vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec; 461 vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec; 462 mutex_exit(&ip->i_tlock); 463 464 switch (ip->i_type) { 465 case VBLK: 466 vap->va_blksize = MAXBSIZE; 467 break; 468 case VCHR: 469 vap->va_blksize = MAXBSIZE; 470 break; 471 default: 472 vap->va_blksize = ip->i_udf->udf_lbsize; 473 break; 474 } 475 vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift; 476 477 rw_exit(&ip->i_contents); 478 479 return (0); 480 } 481 482 static int 483 ud_iaccess_vmode(void *ip, int mode, struct cred *cr) 484 { 485 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0)); 486 } 487 488 /*ARGSUSED4*/ 489 static int32_t 490 udf_setattr( 491 struct vnode *vp, 492 struct vattr *vap, 493 int32_t flags, 494 struct cred *cr, 495 caller_context_t *ct) 496 { 497 int32_t error = 0; 498 uint32_t mask = vap->va_mask; 499 struct ud_inode *ip; 500 timestruc_t now; 501 struct vattr ovap; 502 503 ud_printf("udf_setattr\n"); 504 505 ip = VTOI(vp); 506 507 /* 508 * not updates allowed to 4096 files 509 */ 510 if (ip->i_astrat == STRAT_TYPE4096) { 511 return (EINVAL); 512 } 513 514 /* 515 * Cannot set these attributes 516 */ 517 if (mask & AT_NOSET) { 518 return (EINVAL); 519 } 520 521 rw_enter(&ip->i_rwlock, RW_WRITER); 522 rw_enter(&ip->i_contents, RW_WRITER); 523 524 ovap.va_uid = ip->i_uid; 525 ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 526 error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags, 527 ud_iaccess_vmode, ip); 528 if (error) 529 goto update_inode; 530 531 mask = vap->va_mask; 532 /* 533 * Change file access modes. 534 */ 535 if (mask & AT_MODE) { 536 ip->i_perm = VA2UD_PERM(vap->va_mode); 537 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX); 538 mutex_enter(&ip->i_tlock); 539 ip->i_flag |= ICHG; 540 mutex_exit(&ip->i_tlock); 541 } 542 if (mask & (AT_UID|AT_GID)) { 543 if (mask & AT_UID) { 544 ip->i_uid = vap->va_uid; 545 } 546 if (mask & AT_GID) { 547 ip->i_gid = vap->va_gid; 548 } 549 mutex_enter(&ip->i_tlock); 550 ip->i_flag |= ICHG; 551 mutex_exit(&ip->i_tlock); 552 } 553 /* 554 * Truncate file. Must have write permission and not be a directory. 555 */ 556 if (mask & AT_SIZE) { 557 if (vp->v_type == VDIR) { 558 error = EISDIR; 559 goto update_inode; 560 } 561 if (error = ud_iaccess(ip, IWRITE, cr, 0)) { 562 goto update_inode; 563 } 564 if (vap->va_size > MAXOFFSET_T) { 565 error = EFBIG; 566 goto update_inode; 567 } 568 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) { 569 goto update_inode; 570 } 571 572 if (vap->va_size == 0) 573 vnevent_truncate(vp, ct); 574 } 575 /* 576 * Change file access or modified times. 577 */ 578 if (mask & (AT_ATIME|AT_MTIME)) { 579 mutex_enter(&ip->i_tlock); 580 if (mask & AT_ATIME) { 581 ip->i_atime.tv_sec = vap->va_atime.tv_sec; 582 ip->i_atime.tv_nsec = vap->va_atime.tv_nsec; 583 ip->i_flag &= ~IACC; 584 } 585 if (mask & AT_MTIME) { 586 ip->i_mtime.tv_sec = vap->va_mtime.tv_sec; 587 ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec; 588 gethrestime(&now); 589 ip->i_ctime.tv_sec = now.tv_sec; 590 ip->i_ctime.tv_nsec = now.tv_nsec; 591 ip->i_flag &= ~(IUPD|ICHG); 592 ip->i_flag |= IMODTIME; 593 } 594 ip->i_flag |= IMOD; 595 mutex_exit(&ip->i_tlock); 596 } 597 598 update_inode: 599 if (curthread->t_flag & T_DONTPEND) { 600 ud_iupdat(ip, 1); 601 } else { 602 ITIMES_NOLOCK(ip); 603 } 604 rw_exit(&ip->i_contents); 605 rw_exit(&ip->i_rwlock); 606 607 return (error); 608 } 609 610 /* ARGSUSED */ 611 static int32_t 612 udf_access( 613 struct vnode *vp, 614 int32_t mode, 615 int32_t flags, 616 struct cred *cr, 617 caller_context_t *ct) 618 { 619 struct ud_inode *ip = VTOI(vp); 620 621 ud_printf("udf_access\n"); 622 623 if (ip->i_udf == NULL) { 624 return (EIO); 625 } 626 627 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1)); 628 } 629 630 int32_t udfs_stickyhack = 1; 631 632 /* ARGSUSED */ 633 static int32_t 634 udf_lookup( 635 struct vnode *dvp, 636 char *nm, 637 struct vnode **vpp, 638 struct pathname *pnp, 639 int32_t flags, 640 struct vnode *rdir, 641 struct cred *cr, 642 caller_context_t *ct, 643 int *direntflags, 644 pathname_t *realpnp) 645 { 646 int32_t error; 647 struct vnode *vp; 648 struct ud_inode *ip, *xip; 649 650 ud_printf("udf_lookup\n"); 651 /* 652 * Null component name is a synonym for directory being searched. 653 */ 654 if (*nm == '\0') { 655 VN_HOLD(dvp); 656 *vpp = dvp; 657 error = 0; 658 goto out; 659 } 660 661 /* 662 * Fast path: Check the directory name lookup cache. 663 */ 664 ip = VTOI(dvp); 665 if (vp = dnlc_lookup(dvp, nm)) { 666 /* 667 * Check accessibility of directory. 668 */ 669 if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) { 670 VN_RELE(vp); 671 } 672 xip = VTOI(vp); 673 } else { 674 error = ud_dirlook(ip, nm, &xip, cr, 1); 675 ITIMES(ip); 676 } 677 678 if (error == 0) { 679 ip = xip; 680 *vpp = ITOV(ip); 681 if ((ip->i_type != VDIR) && 682 (ip->i_char & ISVTX) && 683 ((ip->i_perm & IEXEC) == 0) && 684 udfs_stickyhack) { 685 mutex_enter(&(*vpp)->v_lock); 686 (*vpp)->v_flag |= VISSWAP; 687 mutex_exit(&(*vpp)->v_lock); 688 } 689 ITIMES(ip); 690 /* 691 * If vnode is a device return special vnode instead. 692 */ 693 if (IS_DEVVP(*vpp)) { 694 struct vnode *newvp; 695 newvp = specvp(*vpp, (*vpp)->v_rdev, 696 (*vpp)->v_type, cr); 697 VN_RELE(*vpp); 698 if (newvp == NULL) { 699 error = ENOSYS; 700 } else { 701 *vpp = newvp; 702 } 703 } 704 } 705 out: 706 return (error); 707 } 708 709 /* ARGSUSED */ 710 static int32_t 711 udf_create( 712 struct vnode *dvp, 713 char *name, 714 struct vattr *vap, 715 enum vcexcl excl, 716 int32_t mode, 717 struct vnode **vpp, 718 struct cred *cr, 719 int32_t flag, 720 caller_context_t *ct, 721 vsecattr_t *vsecp) 722 { 723 int32_t error; 724 struct ud_inode *ip = VTOI(dvp), *xip; 725 726 ud_printf("udf_create\n"); 727 728 if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0) 729 vap->va_mode &= ~VSVTX; 730 731 if (*name == '\0') { 732 /* 733 * Null component name refers to the directory itself. 734 */ 735 VN_HOLD(dvp); 736 ITIMES(ip); 737 error = EEXIST; 738 } else { 739 xip = NULL; 740 rw_enter(&ip->i_rwlock, RW_WRITER); 741 error = ud_direnter(ip, name, DE_CREATE, 742 (struct ud_inode *)0, (struct ud_inode *)0, 743 vap, &xip, cr, ct); 744 rw_exit(&ip->i_rwlock); 745 ITIMES(ip); 746 ip = xip; 747 } 748 #ifdef __lock_lint 749 rw_enter(&ip->i_contents, RW_WRITER); 750 #else 751 if (ip != NULL) { 752 rw_enter(&ip->i_contents, RW_WRITER); 753 } 754 #endif 755 756 /* 757 * If the file already exists and this is a non-exclusive create, 758 * check permissions and allow access for non-directories. 759 * Read-only create of an existing directory is also allowed. 760 * We fail an exclusive create of anything which already exists. 761 */ 762 if (error == EEXIST) { 763 if (excl == NONEXCL) { 764 if ((ip->i_type == VDIR) && (mode & VWRITE)) { 765 error = EISDIR; 766 } else if (mode) { 767 error = ud_iaccess(ip, 768 UD_UPERM2DPERM(mode), cr, 0); 769 } else { 770 error = 0; 771 } 772 } 773 if (error) { 774 rw_exit(&ip->i_contents); 775 VN_RELE(ITOV(ip)); 776 goto out; 777 } else if ((ip->i_type == VREG) && 778 (vap->va_mask & AT_SIZE) && vap->va_size == 0) { 779 /* 780 * Truncate regular files, if requested by caller. 781 * Grab i_rwlock to make sure no one else is 782 * currently writing to the file (we promised 783 * bmap we would do this). 784 * Must get the locks in the correct order. 785 */ 786 if (ip->i_size == 0) { 787 ip->i_flag |= ICHG | IUPD; 788 } else { 789 rw_exit(&ip->i_contents); 790 rw_enter(&ip->i_rwlock, RW_WRITER); 791 rw_enter(&ip->i_contents, RW_WRITER); 792 (void) ud_itrunc(ip, 0, 0, cr); 793 rw_exit(&ip->i_rwlock); 794 } 795 vnevent_create(ITOV(ip), ct); 796 } 797 } 798 799 if (error == 0) { 800 *vpp = ITOV(ip); 801 ITIMES(ip); 802 } 803 #ifdef __lock_lint 804 rw_exit(&ip->i_contents); 805 #else 806 if (ip != NULL) { 807 rw_exit(&ip->i_contents); 808 } 809 #endif 810 if (error) { 811 goto out; 812 } 813 814 /* 815 * If vnode is a device return special vnode instead. 816 */ 817 if (!error && IS_DEVVP(*vpp)) { 818 struct vnode *newvp; 819 820 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 821 VN_RELE(*vpp); 822 if (newvp == NULL) { 823 error = ENOSYS; 824 goto out; 825 } 826 *vpp = newvp; 827 } 828 out: 829 return (error); 830 } 831 832 /* ARGSUSED */ 833 static int32_t 834 udf_remove( 835 struct vnode *vp, 836 char *nm, 837 struct cred *cr, 838 caller_context_t *ct, 839 int flags) 840 { 841 int32_t error; 842 struct ud_inode *ip = VTOI(vp); 843 844 ud_printf("udf_remove\n"); 845 846 rw_enter(&ip->i_rwlock, RW_WRITER); 847 error = ud_dirremove(ip, nm, 848 (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct); 849 rw_exit(&ip->i_rwlock); 850 ITIMES(ip); 851 852 return (error); 853 } 854 855 /* ARGSUSED */ 856 static int32_t 857 udf_link( 858 struct vnode *tdvp, 859 struct vnode *svp, 860 char *tnm, 861 struct cred *cr, 862 caller_context_t *ct, 863 int flags) 864 { 865 int32_t error; 866 struct vnode *realvp; 867 struct ud_inode *sip; 868 struct ud_inode *tdp; 869 870 ud_printf("udf_link\n"); 871 if (VOP_REALVP(svp, &realvp, ct) == 0) { 872 svp = realvp; 873 } 874 875 /* 876 * Do not allow links to directories 877 */ 878 if (svp->v_type == VDIR) { 879 return (EPERM); 880 } 881 882 sip = VTOI(svp); 883 884 if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0) 885 return (EPERM); 886 887 tdp = VTOI(tdvp); 888 889 rw_enter(&tdp->i_rwlock, RW_WRITER); 890 error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0, 891 sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct); 892 rw_exit(&tdp->i_rwlock); 893 ITIMES(sip); 894 ITIMES(tdp); 895 896 if (error == 0) { 897 vnevent_link(svp, ct); 898 } 899 900 return (error); 901 } 902 903 /* ARGSUSED */ 904 static int32_t 905 udf_rename( 906 struct vnode *sdvp, 907 char *snm, 908 struct vnode *tdvp, 909 char *tnm, 910 struct cred *cr, 911 caller_context_t *ct, 912 int flags) 913 { 914 int32_t error = 0; 915 struct udf_vfs *udf_vfsp; 916 struct ud_inode *sip; /* source inode */ 917 struct ud_inode *tip; /* target inode */ 918 struct ud_inode *sdp, *tdp; /* source and target parent inode */ 919 struct vnode *realvp; 920 921 ud_printf("udf_rename\n"); 922 923 if (VOP_REALVP(tdvp, &realvp, ct) == 0) { 924 tdvp = realvp; 925 } 926 927 sdp = VTOI(sdvp); 928 tdp = VTOI(tdvp); 929 930 udf_vfsp = sdp->i_udf; 931 932 mutex_enter(&udf_vfsp->udf_rename_lck); 933 /* 934 * Look up inode of file we're supposed to rename. 935 */ 936 if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) { 937 mutex_exit(&udf_vfsp->udf_rename_lck); 938 return (error); 939 } 940 /* 941 * be sure this is not a directory with another file system mounted 942 * over it. If it is just give up the locks, and return with 943 * EBUSY 944 */ 945 if (vn_mountedvfs(ITOV(sip)) != NULL) { 946 error = EBUSY; 947 goto errout; 948 } 949 /* 950 * Make sure we can delete the source entry. This requires 951 * write permission on the containing directory. If that 952 * directory is "sticky" it further requires (except for 953 * privileged users) that the user own the directory or the 954 * source entry, or else have permission to write the source 955 * entry. 956 */ 957 rw_enter(&sdp->i_contents, RW_READER); 958 rw_enter(&sip->i_contents, RW_READER); 959 if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 || 960 (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) { 961 rw_exit(&sip->i_contents); 962 rw_exit(&sdp->i_contents); 963 ITIMES(sip); 964 goto errout; 965 } 966 967 /* 968 * Check for renaming '.' or '..' or alias of '.' 969 */ 970 if ((strcmp(snm, ".") == 0) || 971 (strcmp(snm, "..") == 0) || 972 (sdp == sip)) { 973 error = EINVAL; 974 rw_exit(&sip->i_contents); 975 rw_exit(&sdp->i_contents); 976 goto errout; 977 } 978 979 rw_exit(&sip->i_contents); 980 rw_exit(&sdp->i_contents); 981 982 if (ud_dirlook(tdp, tnm, &tip, cr, 0) == 0) { 983 vnevent_pre_rename_dest(ITOV(tip), tdvp, tnm, ct); 984 VN_RELE(ITOV(tip)); 985 } 986 987 /* Notify the target dir. if not the same as the source dir. */ 988 if (sdvp != tdvp) 989 vnevent_pre_rename_dest_dir(tdvp, ITOV(sip), tnm, ct); 990 991 vnevent_pre_rename_src(ITOV(sip), sdvp, snm, ct); 992 993 /* 994 * Link source to the target. 995 */ 996 rw_enter(&tdp->i_rwlock, RW_WRITER); 997 if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip, 998 (struct vattr *)0, (struct ud_inode **)0, cr, ct)) { 999 /* 1000 * ESAME isn't really an error; it indicates that the 1001 * operation should not be done because the source and target 1002 * are the same file, but that no error should be reported. 1003 */ 1004 if (error == ESAME) { 1005 error = 0; 1006 } 1007 rw_exit(&tdp->i_rwlock); 1008 goto errout; 1009 } 1010 rw_exit(&tdp->i_rwlock); 1011 1012 rw_enter(&sdp->i_rwlock, RW_WRITER); 1013 /* 1014 * Unlink the source. 1015 * Remove the source entry. ud_dirremove() checks that the entry 1016 * still reflects sip, and returns an error if it doesn't. 1017 * If the entry has changed just forget about it. Release 1018 * the source inode. 1019 */ 1020 if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0, 1021 DR_RENAME, cr, ct)) == ENOENT) { 1022 error = 0; 1023 } 1024 rw_exit(&sdp->i_rwlock); 1025 1026 if (error == 0) { 1027 vnevent_rename_src(ITOV(sip), sdvp, snm, ct); 1028 /* 1029 * vnevent_rename_dest and vnevent_rename_dest_dir are called 1030 * in ud_direnter(). 1031 */ 1032 } 1033 1034 errout: 1035 ITIMES(sdp); 1036 ITIMES(tdp); 1037 VN_RELE(ITOV(sip)); 1038 mutex_exit(&udf_vfsp->udf_rename_lck); 1039 1040 return (error); 1041 } 1042 1043 /* ARGSUSED */ 1044 static int32_t 1045 udf_mkdir( 1046 struct vnode *dvp, 1047 char *dirname, 1048 struct vattr *vap, 1049 struct vnode **vpp, 1050 struct cred *cr, 1051 caller_context_t *ct, 1052 int flags, 1053 vsecattr_t *vsecp) 1054 { 1055 int32_t error; 1056 struct ud_inode *ip; 1057 struct ud_inode *xip; 1058 1059 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 1060 1061 ud_printf("udf_mkdir\n"); 1062 1063 ip = VTOI(dvp); 1064 rw_enter(&ip->i_rwlock, RW_WRITER); 1065 error = ud_direnter(ip, dirname, DE_MKDIR, 1066 (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct); 1067 rw_exit(&ip->i_rwlock); 1068 ITIMES(ip); 1069 if (error == 0) { 1070 ip = xip; 1071 *vpp = ITOV(ip); 1072 ITIMES(ip); 1073 } else if (error == EEXIST) { 1074 ITIMES(xip); 1075 VN_RELE(ITOV(xip)); 1076 } 1077 1078 return (error); 1079 } 1080 1081 /* ARGSUSED */ 1082 static int32_t 1083 udf_rmdir( 1084 struct vnode *vp, 1085 char *nm, 1086 struct vnode *cdir, 1087 struct cred *cr, 1088 caller_context_t *ct, 1089 int flags) 1090 { 1091 int32_t error; 1092 struct ud_inode *ip = VTOI(vp); 1093 1094 ud_printf("udf_rmdir\n"); 1095 1096 rw_enter(&ip->i_rwlock, RW_WRITER); 1097 error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR, 1098 cr, ct); 1099 rw_exit(&ip->i_rwlock); 1100 ITIMES(ip); 1101 1102 return (error); 1103 } 1104 1105 /* ARGSUSED */ 1106 static int32_t 1107 udf_readdir( 1108 struct vnode *vp, 1109 struct uio *uiop, 1110 struct cred *cr, 1111 int32_t *eofp, 1112 caller_context_t *ct, 1113 int flags) 1114 { 1115 struct ud_inode *ip; 1116 struct dirent64 *nd; 1117 struct udf_vfs *udf_vfsp; 1118 int32_t error = 0, len, outcount = 0; 1119 uint32_t dirsiz, offset; 1120 uint32_t bufsize, ndlen, dummy; 1121 caddr_t outbuf; 1122 caddr_t outb, end_outb; 1123 struct iovec *iovp; 1124 1125 uint8_t *dname; 1126 int32_t length; 1127 1128 uint8_t *buf = NULL; 1129 1130 struct fbuf *fbp = NULL; 1131 struct file_id *fid; 1132 uint8_t *name; 1133 1134 1135 ud_printf("udf_readdir\n"); 1136 1137 ip = VTOI(vp); 1138 udf_vfsp = ip->i_udf; 1139 1140 dirsiz = ip->i_size; 1141 if ((uiop->uio_offset >= dirsiz) || 1142 (ip->i_nlink <= 0)) { 1143 if (eofp) { 1144 *eofp = 1; 1145 } 1146 return (0); 1147 } 1148 1149 offset = uiop->uio_offset; 1150 iovp = uiop->uio_iov; 1151 bufsize = iovp->iov_len; 1152 1153 outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP); 1154 end_outb = outb + bufsize; 1155 nd = (struct dirent64 *)outbuf; 1156 1157 dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP); 1158 buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP); 1159 1160 if (offset == 0) { 1161 len = DIRENT64_RECLEN(1); 1162 if (((caddr_t)nd + len) >= end_outb) { 1163 error = EINVAL; 1164 goto end; 1165 } 1166 nd->d_ino = ip->i_icb_lbano; 1167 nd->d_reclen = (uint16_t)len; 1168 nd->d_off = 0x10; 1169 nd->d_name[0] = '.'; 1170 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1); 1171 nd = (struct dirent64 *)((char *)nd + nd->d_reclen); 1172 outcount++; 1173 } else if (offset == 0x10) { 1174 offset = 0; 1175 } 1176 1177 while (offset < dirsiz) { 1178 error = ud_get_next_fid(ip, &fbp, 1179 offset, &fid, &name, buf); 1180 if (error != 0) { 1181 break; 1182 } 1183 1184 if ((fid->fid_flags & FID_DELETED) == 0) { 1185 if (fid->fid_flags & FID_PARENT) { 1186 1187 len = DIRENT64_RECLEN(2); 1188 if (((caddr_t)nd + len) >= end_outb) { 1189 error = EINVAL; 1190 break; 1191 } 1192 1193 nd->d_ino = ip->i_icb_lbano; 1194 nd->d_reclen = (uint16_t)len; 1195 nd->d_off = offset + FID_LEN(fid); 1196 nd->d_name[0] = '.'; 1197 nd->d_name[1] = '.'; 1198 bzero(&nd->d_name[2], 1199 DIRENT64_NAMELEN(len) - 2); 1200 nd = (struct dirent64 *) 1201 ((char *)nd + nd->d_reclen); 1202 } else { 1203 if ((error = ud_uncompress(fid->fid_idlen, 1204 &length, name, dname)) != 0) { 1205 break; 1206 } 1207 if (length == 0) { 1208 offset += FID_LEN(fid); 1209 continue; 1210 } 1211 len = DIRENT64_RECLEN(length); 1212 if (((caddr_t)nd + len) >= end_outb) { 1213 if (!outcount) { 1214 error = EINVAL; 1215 } 1216 break; 1217 } 1218 (void) strncpy(nd->d_name, 1219 (caddr_t)dname, length); 1220 bzero(&nd->d_name[length], 1221 DIRENT64_NAMELEN(len) - length); 1222 nd->d_ino = ud_xlate_to_daddr(udf_vfsp, 1223 SWAP_16(fid->fid_icb.lad_ext_prn), 1224 SWAP_32(fid->fid_icb.lad_ext_loc), 1, 1225 &dummy); 1226 nd->d_reclen = (uint16_t)len; 1227 nd->d_off = offset + FID_LEN(fid); 1228 nd = (struct dirent64 *) 1229 ((char *)nd + nd->d_reclen); 1230 } 1231 outcount++; 1232 } 1233 1234 offset += FID_LEN(fid); 1235 } 1236 1237 end: 1238 if (fbp != NULL) { 1239 fbrelse(fbp, S_OTHER); 1240 } 1241 ndlen = ((char *)nd - outbuf); 1242 /* 1243 * In case of error do not call uiomove. 1244 * Return the error to the caller. 1245 */ 1246 if ((error == 0) && (ndlen != 0)) { 1247 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop); 1248 uiop->uio_offset = offset; 1249 } 1250 kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize); 1251 kmem_free((caddr_t)dname, 1024); 1252 kmem_free(outbuf, (uint32_t)bufsize); 1253 if (eofp && error == 0) { 1254 *eofp = (uiop->uio_offset >= dirsiz); 1255 } 1256 return (error); 1257 } 1258 1259 /* ARGSUSED */ 1260 static int32_t 1261 udf_symlink( 1262 struct vnode *dvp, 1263 char *linkname, 1264 struct vattr *vap, 1265 char *target, 1266 struct cred *cr, 1267 caller_context_t *ct, 1268 int flags) 1269 { 1270 int32_t error = 0, outlen; 1271 uint32_t ioflag = 0; 1272 struct ud_inode *ip, *dip = VTOI(dvp); 1273 1274 struct path_comp *pc; 1275 int8_t *dname = NULL, *uname = NULL, *sp; 1276 1277 ud_printf("udf_symlink\n"); 1278 1279 ip = (struct ud_inode *)0; 1280 vap->va_type = VLNK; 1281 vap->va_rdev = 0; 1282 1283 rw_enter(&dip->i_rwlock, RW_WRITER); 1284 error = ud_direnter(dip, linkname, DE_CREATE, 1285 (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct); 1286 rw_exit(&dip->i_rwlock); 1287 if (error == 0) { 1288 dname = kmem_zalloc(1024, KM_SLEEP); 1289 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1290 1291 pc = (struct path_comp *)uname; 1292 /* 1293 * If the first character in target is "/" 1294 * then skip it and create entry for it 1295 */ 1296 if (*target == '/') { 1297 pc->pc_type = 2; 1298 pc->pc_len = 0; 1299 pc = (struct path_comp *)(((char *)pc) + 4); 1300 while (*target == '/') { 1301 target++; 1302 } 1303 } 1304 1305 while (*target != NULL) { 1306 sp = target; 1307 while ((*target != '/') && (*target != '\0')) { 1308 target ++; 1309 } 1310 /* 1311 * We got the next component of the 1312 * path name. Create path_comp of 1313 * appropriate type 1314 */ 1315 if (((target - sp) == 1) && (*sp == '.')) { 1316 /* 1317 * Dot entry. 1318 */ 1319 pc->pc_type = 4; 1320 pc = (struct path_comp *)(((char *)pc) + 4); 1321 } else if (((target - sp) == 2) && 1322 (*sp == '.') && ((*(sp + 1)) == '.')) { 1323 /* 1324 * DotDot entry. 1325 */ 1326 pc->pc_type = 3; 1327 pc = (struct path_comp *)(((char *)pc) + 4); 1328 } else { 1329 /* 1330 * convert the user given name 1331 * into appropriate form to be put 1332 * on the media 1333 */ 1334 outlen = 1024; /* set to size of dname */ 1335 if (error = ud_compress(target - sp, &outlen, 1336 (uint8_t *)sp, (uint8_t *)dname)) { 1337 break; 1338 } 1339 pc->pc_type = 5; 1340 /* LINTED */ 1341 pc->pc_len = outlen; 1342 dname[outlen] = '\0'; 1343 (void) strcpy((char *)pc->pc_id, dname); 1344 pc = (struct path_comp *) 1345 (((char *)pc) + 4 + outlen); 1346 } 1347 while (*target == '/') { 1348 target++; 1349 } 1350 if (*target == NULL) { 1351 break; 1352 } 1353 } 1354 1355 rw_enter(&ip->i_contents, RW_WRITER); 1356 if (error == 0) { 1357 ioflag = FWRITE; 1358 if (curthread->t_flag & T_DONTPEND) { 1359 ioflag |= FDSYNC; 1360 } 1361 error = ud_rdwri(UIO_WRITE, ioflag, ip, 1362 uname, ((int8_t *)pc) - uname, 1363 (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr); 1364 } 1365 if (error) { 1366 ud_idrop(ip); 1367 rw_exit(&ip->i_contents); 1368 rw_enter(&dip->i_rwlock, RW_WRITER); 1369 (void) ud_dirremove(dip, linkname, (struct ud_inode *)0, 1370 (struct vnode *)0, DR_REMOVE, cr, ct); 1371 rw_exit(&dip->i_rwlock); 1372 goto update_inode; 1373 } 1374 rw_exit(&ip->i_contents); 1375 } 1376 1377 if ((error == 0) || (error == EEXIST)) { 1378 VN_RELE(ITOV(ip)); 1379 } 1380 1381 update_inode: 1382 ITIMES(VTOI(dvp)); 1383 if (uname != NULL) { 1384 kmem_free(uname, PAGESIZE); 1385 } 1386 if (dname != NULL) { 1387 kmem_free(dname, 1024); 1388 } 1389 1390 return (error); 1391 } 1392 1393 /* ARGSUSED */ 1394 static int32_t 1395 udf_readlink( 1396 struct vnode *vp, 1397 struct uio *uiop, 1398 struct cred *cr, 1399 caller_context_t *ct) 1400 { 1401 int32_t error = 0, off, id_len, size, len; 1402 int8_t *dname = NULL, *uname = NULL; 1403 struct ud_inode *ip; 1404 struct fbuf *fbp = NULL; 1405 struct path_comp *pc; 1406 1407 ud_printf("udf_readlink\n"); 1408 1409 if (vp->v_type != VLNK) { 1410 return (EINVAL); 1411 } 1412 1413 ip = VTOI(vp); 1414 size = ip->i_size; 1415 if (size > PAGESIZE) { 1416 return (EIO); 1417 } 1418 1419 if (size == 0) { 1420 return (0); 1421 } 1422 1423 dname = kmem_zalloc(1024, KM_SLEEP); 1424 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1425 1426 rw_enter(&ip->i_contents, RW_READER); 1427 1428 if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) { 1429 goto end; 1430 } 1431 1432 off = 0; 1433 1434 while (off < size) { 1435 pc = (struct path_comp *)(fbp->fb_addr + off); 1436 switch (pc->pc_type) { 1437 case 1 : 1438 (void) strcpy(uname, ip->i_udf->udf_fsmnt); 1439 (void) strcat(uname, "/"); 1440 break; 1441 case 2 : 1442 if (pc->pc_len != 0) { 1443 goto end; 1444 } 1445 uname[0] = '/'; 1446 uname[1] = '\0'; 1447 break; 1448 case 3 : 1449 (void) strcat(uname, "../"); 1450 break; 1451 case 4 : 1452 (void) strcat(uname, "./"); 1453 break; 1454 case 5 : 1455 if ((error = ud_uncompress(pc->pc_len, &id_len, 1456 pc->pc_id, (uint8_t *)dname)) != 0) { 1457 break; 1458 } 1459 dname[id_len] = '\0'; 1460 (void) strcat(uname, dname); 1461 (void) strcat(uname, "/"); 1462 break; 1463 default : 1464 error = EINVAL; 1465 goto end; 1466 } 1467 off += 4 + pc->pc_len; 1468 } 1469 len = strlen(uname) - 1; 1470 if (uname[len] == '/') { 1471 if (len == 0) { 1472 /* 1473 * special case link to / 1474 */ 1475 len = 1; 1476 } else { 1477 uname[len] = '\0'; 1478 } 1479 } 1480 1481 error = uiomove(uname, len, UIO_READ, uiop); 1482 1483 ITIMES(ip); 1484 1485 end: 1486 if (fbp != NULL) { 1487 fbrelse(fbp, S_OTHER); 1488 } 1489 rw_exit(&ip->i_contents); 1490 if (uname != NULL) { 1491 kmem_free(uname, PAGESIZE); 1492 } 1493 if (dname != NULL) { 1494 kmem_free(dname, 1024); 1495 } 1496 return (error); 1497 } 1498 1499 /* ARGSUSED */ 1500 static int32_t 1501 udf_fsync( 1502 struct vnode *vp, 1503 int32_t syncflag, 1504 struct cred *cr, 1505 caller_context_t *ct) 1506 { 1507 int32_t error = 0; 1508 struct ud_inode *ip = VTOI(vp); 1509 1510 ud_printf("udf_fsync\n"); 1511 1512 rw_enter(&ip->i_contents, RW_WRITER); 1513 if (!(IS_SWAPVP(vp))) { 1514 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */ 1515 } 1516 if (error == 0) { 1517 error = ud_sync_indir(ip); 1518 } 1519 ITIMES(ip); /* XXX: is this necessary ??? */ 1520 rw_exit(&ip->i_contents); 1521 1522 return (error); 1523 } 1524 1525 /* ARGSUSED */ 1526 static void 1527 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct) 1528 { 1529 ud_printf("udf_iinactive\n"); 1530 1531 ud_iinactive(VTOI(vp), cr); 1532 } 1533 1534 /* ARGSUSED */ 1535 static int32_t 1536 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1537 { 1538 struct udf_fid *udfidp; 1539 struct ud_inode *ip = VTOI(vp); 1540 1541 ud_printf("udf_fid\n"); 1542 1543 if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) { 1544 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1545 return (ENOSPC); 1546 } 1547 1548 udfidp = (struct udf_fid *)fidp; 1549 bzero((char *)udfidp, sizeof (struct udf_fid)); 1550 rw_enter(&ip->i_contents, RW_READER); 1551 udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1552 udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff; 1553 udfidp->udfid_prn = ip->i_icb_prn; 1554 udfidp->udfid_icb_lbn = ip->i_icb_block; 1555 rw_exit(&ip->i_contents); 1556 1557 return (0); 1558 } 1559 1560 /* ARGSUSED2 */ 1561 static int 1562 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1563 { 1564 struct ud_inode *ip = VTOI(vp); 1565 1566 ud_printf("udf_rwlock\n"); 1567 1568 if (write_lock) { 1569 rw_enter(&ip->i_rwlock, RW_WRITER); 1570 } else { 1571 rw_enter(&ip->i_rwlock, RW_READER); 1572 } 1573 #ifdef __lock_lint 1574 rw_exit(&ip->i_rwlock); 1575 #endif 1576 return (write_lock); 1577 } 1578 1579 /* ARGSUSED */ 1580 static void 1581 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1582 { 1583 struct ud_inode *ip = VTOI(vp); 1584 1585 ud_printf("udf_rwunlock\n"); 1586 1587 #ifdef __lock_lint 1588 rw_enter(&ip->i_rwlock, RW_WRITER); 1589 #endif 1590 1591 rw_exit(&ip->i_rwlock); 1592 1593 } 1594 1595 /* ARGSUSED */ 1596 static int32_t 1597 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 1598 { 1599 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1600 } 1601 1602 static int32_t 1603 udf_frlock( 1604 struct vnode *vp, 1605 int32_t cmd, 1606 struct flock64 *bfp, 1607 int32_t flag, 1608 offset_t offset, 1609 struct flk_callback *flk_cbp, 1610 cred_t *cr, 1611 caller_context_t *ct) 1612 { 1613 struct ud_inode *ip = VTOI(vp); 1614 1615 ud_printf("udf_frlock\n"); 1616 1617 /* 1618 * If file is being mapped, disallow frlock. 1619 * XXX I am not holding tlock while checking i_mapcnt because the 1620 * current locking strategy drops all locks before calling fs_frlock. 1621 * So, mapcnt could change before we enter fs_frlock making is 1622 * meaningless to have held tlock in the first place. 1623 */ 1624 if ((ip->i_mapcnt > 0) && 1625 (MANDLOCK(vp, ip->i_char))) { 1626 return (EAGAIN); 1627 } 1628 1629 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 1630 } 1631 1632 /*ARGSUSED6*/ 1633 static int32_t 1634 udf_space( 1635 struct vnode *vp, 1636 int32_t cmd, 1637 struct flock64 *bfp, 1638 int32_t flag, 1639 offset_t offset, 1640 cred_t *cr, 1641 caller_context_t *ct) 1642 { 1643 int32_t error = 0; 1644 1645 ud_printf("udf_space\n"); 1646 1647 if (cmd != F_FREESP) { 1648 error = EINVAL; 1649 } else if ((error = convoff(vp, bfp, 0, offset)) == 0) { 1650 error = ud_freesp(vp, bfp, flag, cr); 1651 1652 if (error == 0 && bfp->l_start == 0) 1653 vnevent_truncate(vp, ct); 1654 } 1655 1656 return (error); 1657 } 1658 1659 /* ARGSUSED */ 1660 static int32_t 1661 udf_getpage( 1662 struct vnode *vp, 1663 offset_t off, 1664 size_t len, 1665 uint32_t *protp, 1666 struct page **plarr, 1667 size_t plsz, 1668 struct seg *seg, 1669 caddr_t addr, 1670 enum seg_rw rw, 1671 struct cred *cr, 1672 caller_context_t *ct) 1673 { 1674 struct ud_inode *ip = VTOI(vp); 1675 int32_t error, has_holes, beyond_eof, seqmode, dolock; 1676 int32_t pgsize = PAGESIZE; 1677 struct udf_vfs *udf_vfsp = ip->i_udf; 1678 page_t **pl; 1679 u_offset_t pgoff, eoff, uoff; 1680 krw_t rwtype; 1681 caddr_t pgaddr; 1682 1683 ud_printf("udf_getpage\n"); 1684 1685 uoff = (u_offset_t)off; /* type conversion */ 1686 if (protp) { 1687 *protp = PROT_ALL; 1688 } 1689 if (vp->v_flag & VNOMAP) { 1690 return (ENOSYS); 1691 } 1692 seqmode = ip->i_nextr == uoff && rw != S_CREATE; 1693 1694 rwtype = RW_READER; 1695 dolock = (rw_owner(&ip->i_contents) != curthread); 1696 retrylock: 1697 #ifdef __lock_lint 1698 rw_enter(&ip->i_contents, rwtype); 1699 #else 1700 if (dolock) { 1701 rw_enter(&ip->i_contents, rwtype); 1702 } 1703 #endif 1704 1705 /* 1706 * We may be getting called as a side effect of a bmap using 1707 * fbread() when the blocks might be being allocated and the 1708 * size has not yet been up'ed. In this case we want to be 1709 * able to return zero pages if we get back UDF_HOLE from 1710 * calling bmap for a non write case here. We also might have 1711 * to read some frags from the disk into a page if we are 1712 * extending the number of frags for a given lbn in bmap(). 1713 */ 1714 beyond_eof = uoff + len > ip->i_size + PAGEOFFSET; 1715 if (beyond_eof && seg != segkmap) { 1716 #ifdef __lock_lint 1717 rw_exit(&ip->i_contents); 1718 #else 1719 if (dolock) { 1720 rw_exit(&ip->i_contents); 1721 } 1722 #endif 1723 return (EFAULT); 1724 } 1725 1726 /* 1727 * Must hold i_contents lock throughout the call to pvn_getpages 1728 * since locked pages are returned from each call to ud_getapage. 1729 * Must *not* return locked pages and then try for contents lock 1730 * due to lock ordering requirements (inode > page) 1731 */ 1732 1733 has_holes = ud_bmap_has_holes(ip); 1734 1735 if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) { 1736 int32_t blk_size, count; 1737 u_offset_t offset; 1738 1739 /* 1740 * We must acquire the RW_WRITER lock in order to 1741 * call bmap_write(). 1742 */ 1743 if (dolock && rwtype == RW_READER) { 1744 rwtype = RW_WRITER; 1745 1746 if (!rw_tryupgrade(&ip->i_contents)) { 1747 1748 rw_exit(&ip->i_contents); 1749 1750 goto retrylock; 1751 } 1752 } 1753 1754 /* 1755 * May be allocating disk blocks for holes here as 1756 * a result of mmap faults. write(2) does the bmap_write 1757 * in rdip/wrip, not here. We are not dealing with frags 1758 * in this case. 1759 */ 1760 offset = uoff; 1761 while ((offset < uoff + len) && 1762 (offset < ip->i_size)) { 1763 /* 1764 * the variable "bnp" is to simplify the expression for 1765 * the compiler; * just passing in &bn to bmap_write 1766 * causes a compiler "loop" 1767 */ 1768 1769 blk_size = udf_vfsp->udf_lbsize; 1770 if ((offset + blk_size) > ip->i_size) { 1771 count = ip->i_size - offset; 1772 } else { 1773 count = blk_size; 1774 } 1775 error = ud_bmap_write(ip, offset, count, 0, cr); 1776 if (error) { 1777 goto update_inode; 1778 } 1779 offset += count; /* XXX - make this contig */ 1780 } 1781 } 1782 1783 /* 1784 * Can be a reader from now on. 1785 */ 1786 #ifdef __lock_lint 1787 if (rwtype == RW_WRITER) { 1788 rw_downgrade(&ip->i_contents); 1789 } 1790 #else 1791 if (dolock && rwtype == RW_WRITER) { 1792 rw_downgrade(&ip->i_contents); 1793 } 1794 #endif 1795 1796 /* 1797 * We remove PROT_WRITE in cases when the file has UDF holes 1798 * because we don't want to call bmap_read() to check each 1799 * page if it is backed with a disk block. 1800 */ 1801 if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) { 1802 *protp &= ~PROT_WRITE; 1803 } 1804 1805 error = 0; 1806 1807 /* 1808 * The loop looks up pages in the range <off, off + len). 1809 * For each page, we first check if we should initiate an asynchronous 1810 * read ahead before we call page_lookup (we may sleep in page_lookup 1811 * for a previously initiated disk read). 1812 */ 1813 eoff = (uoff + len); 1814 for (pgoff = uoff, pgaddr = addr, pl = plarr; 1815 pgoff < eoff; /* empty */) { 1816 page_t *pp; 1817 u_offset_t nextrio; 1818 se_t se; 1819 1820 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED); 1821 1822 /* 1823 * Handle async getpage (faultahead) 1824 */ 1825 if (plarr == NULL) { 1826 ip->i_nextrio = pgoff; 1827 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1828 pgoff += pgsize; 1829 pgaddr += pgsize; 1830 continue; 1831 } 1832 1833 /* 1834 * Check if we should initiate read ahead of next cluster. 1835 * We call page_exists only when we need to confirm that 1836 * we have the current page before we initiate the read ahead. 1837 */ 1838 nextrio = ip->i_nextrio; 1839 if (seqmode && 1840 pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio && 1841 nextrio < ip->i_size && page_exists(vp, pgoff)) 1842 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1843 1844 if ((pp = page_lookup(vp, pgoff, se)) != NULL) { 1845 1846 /* 1847 * We found the page in the page cache. 1848 */ 1849 *pl++ = pp; 1850 pgoff += pgsize; 1851 pgaddr += pgsize; 1852 len -= pgsize; 1853 plsz -= pgsize; 1854 } else { 1855 1856 /* 1857 * We have to create the page, or read it from disk. 1858 */ 1859 if (error = ud_getpage_miss(vp, pgoff, len, 1860 seg, pgaddr, pl, plsz, rw, seqmode)) { 1861 goto error_out; 1862 } 1863 1864 while (*pl != NULL) { 1865 pl++; 1866 pgoff += pgsize; 1867 pgaddr += pgsize; 1868 len -= pgsize; 1869 plsz -= pgsize; 1870 } 1871 } 1872 } 1873 1874 /* 1875 * Return pages up to plsz if they are in the page cache. 1876 * We cannot return pages if there is a chance that they are 1877 * backed with a UDF hole and rw is S_WRITE or S_CREATE. 1878 */ 1879 if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) { 1880 1881 ASSERT((protp == NULL) || 1882 !(has_holes && (*protp & PROT_WRITE))); 1883 1884 eoff = pgoff + plsz; 1885 while (pgoff < eoff) { 1886 page_t *pp; 1887 1888 if ((pp = page_lookup_nowait(vp, pgoff, 1889 SE_SHARED)) == NULL) 1890 break; 1891 1892 *pl++ = pp; 1893 pgoff += pgsize; 1894 plsz -= pgsize; 1895 } 1896 } 1897 1898 if (plarr) 1899 *pl = NULL; /* Terminate page list */ 1900 ip->i_nextr = pgoff; 1901 1902 error_out: 1903 if (error && plarr) { 1904 /* 1905 * Release any pages we have locked. 1906 */ 1907 while (pl > &plarr[0]) 1908 page_unlock(*--pl); 1909 1910 plarr[0] = NULL; 1911 } 1912 1913 update_inode: 1914 #ifdef __lock_lint 1915 rw_exit(&ip->i_contents); 1916 #else 1917 if (dolock) { 1918 rw_exit(&ip->i_contents); 1919 } 1920 #endif 1921 1922 /* 1923 * If the inode is not already marked for IACC (in rwip() for read) 1924 * and the inode is not marked for no access time update (in rwip() 1925 * for write) then update the inode access time and mod time now. 1926 */ 1927 mutex_enter(&ip->i_tlock); 1928 if ((ip->i_flag & (IACC | INOACC)) == 0) { 1929 if ((rw != S_OTHER) && (ip->i_type != VDIR)) { 1930 ip->i_flag |= IACC; 1931 } 1932 if (rw == S_WRITE) { 1933 ip->i_flag |= IUPD; 1934 } 1935 ITIMES_NOLOCK(ip); 1936 } 1937 mutex_exit(&ip->i_tlock); 1938 1939 return (error); 1940 } 1941 1942 int32_t ud_delay = 1; 1943 1944 /* ARGSUSED */ 1945 static int32_t 1946 udf_putpage( 1947 struct vnode *vp, 1948 offset_t off, 1949 size_t len, 1950 int32_t flags, 1951 struct cred *cr, 1952 caller_context_t *ct) 1953 { 1954 struct ud_inode *ip; 1955 int32_t error = 0; 1956 1957 ud_printf("udf_putpage\n"); 1958 1959 ip = VTOI(vp); 1960 #ifdef __lock_lint 1961 rw_enter(&ip->i_contents, RW_WRITER); 1962 #endif 1963 1964 if (vp->v_count == 0) { 1965 cmn_err(CE_WARN, "ud_putpage : bad v_count"); 1966 error = EINVAL; 1967 goto out; 1968 } 1969 1970 if (vp->v_flag & VNOMAP) { 1971 error = ENOSYS; 1972 goto out; 1973 } 1974 1975 if (flags & B_ASYNC) { 1976 if (ud_delay && len && 1977 (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) { 1978 mutex_enter(&ip->i_tlock); 1979 1980 /* 1981 * If nobody stalled, start a new cluster. 1982 */ 1983 if (ip->i_delaylen == 0) { 1984 ip->i_delayoff = off; 1985 ip->i_delaylen = len; 1986 mutex_exit(&ip->i_tlock); 1987 goto out; 1988 } 1989 1990 /* 1991 * If we have a full cluster or they are not contig, 1992 * then push last cluster and start over. 1993 */ 1994 if (ip->i_delaylen >= WR_CLUSTSZ(ip) || 1995 ip->i_delayoff + ip->i_delaylen != off) { 1996 u_offset_t doff; 1997 size_t dlen; 1998 1999 doff = ip->i_delayoff; 2000 dlen = ip->i_delaylen; 2001 ip->i_delayoff = off; 2002 ip->i_delaylen = len; 2003 mutex_exit(&ip->i_tlock); 2004 error = ud_putpages(vp, doff, dlen, flags, cr); 2005 /* LMXXX - flags are new val, not old */ 2006 goto out; 2007 } 2008 2009 /* 2010 * There is something there, it's not full, and 2011 * it is contig. 2012 */ 2013 ip->i_delaylen += len; 2014 mutex_exit(&ip->i_tlock); 2015 goto out; 2016 } 2017 2018 /* 2019 * Must have weird flags or we are not clustering. 2020 */ 2021 } 2022 2023 error = ud_putpages(vp, off, len, flags, cr); 2024 2025 out: 2026 #ifdef __lock_lint 2027 rw_exit(&ip->i_contents); 2028 #endif 2029 return (error); 2030 } 2031 2032 /* ARGSUSED */ 2033 static int32_t 2034 udf_map( 2035 struct vnode *vp, 2036 offset_t off, 2037 struct as *as, 2038 caddr_t *addrp, 2039 size_t len, 2040 uint8_t prot, 2041 uint8_t maxprot, 2042 uint32_t flags, 2043 struct cred *cr, 2044 caller_context_t *ct) 2045 { 2046 struct segvn_crargs vn_a; 2047 int32_t error = 0; 2048 2049 ud_printf("udf_map\n"); 2050 2051 if (vp->v_flag & VNOMAP) { 2052 error = ENOSYS; 2053 goto end; 2054 } 2055 2056 if ((off < (offset_t)0) || 2057 ((off + len) < (offset_t)0)) { 2058 error = EINVAL; 2059 goto end; 2060 } 2061 2062 if (vp->v_type != VREG) { 2063 error = ENODEV; 2064 goto end; 2065 } 2066 2067 /* 2068 * If file is being locked, disallow mapping. 2069 */ 2070 if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) { 2071 error = EAGAIN; 2072 goto end; 2073 } 2074 2075 as_rangelock(as); 2076 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 2077 if (error != 0) { 2078 as_rangeunlock(as); 2079 goto end; 2080 } 2081 2082 vn_a.vp = vp; 2083 vn_a.offset = off; 2084 vn_a.type = flags & MAP_TYPE; 2085 vn_a.prot = prot; 2086 vn_a.maxprot = maxprot; 2087 vn_a.cred = cr; 2088 vn_a.amp = NULL; 2089 vn_a.flags = flags & ~MAP_TYPE; 2090 vn_a.szc = 0; 2091 vn_a.lgrp_mem_policy_flags = 0; 2092 2093 error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a); 2094 as_rangeunlock(as); 2095 2096 end: 2097 return (error); 2098 } 2099 2100 /* ARGSUSED */ 2101 static int32_t 2102 udf_addmap(struct vnode *vp, 2103 offset_t off, 2104 struct as *as, 2105 caddr_t addr, 2106 size_t len, 2107 uint8_t prot, 2108 uint8_t maxprot, 2109 uint32_t flags, 2110 struct cred *cr, 2111 caller_context_t *ct) 2112 { 2113 struct ud_inode *ip = VTOI(vp); 2114 2115 ud_printf("udf_addmap\n"); 2116 2117 if (vp->v_flag & VNOMAP) { 2118 return (ENOSYS); 2119 } 2120 2121 mutex_enter(&ip->i_tlock); 2122 ip->i_mapcnt += btopr(len); 2123 mutex_exit(&ip->i_tlock); 2124 2125 return (0); 2126 } 2127 2128 /* ARGSUSED */ 2129 static int32_t 2130 udf_delmap( 2131 struct vnode *vp, offset_t off, 2132 struct as *as, 2133 caddr_t addr, 2134 size_t len, 2135 uint32_t prot, 2136 uint32_t maxprot, 2137 uint32_t flags, 2138 struct cred *cr, 2139 caller_context_t *ct) 2140 { 2141 struct ud_inode *ip = VTOI(vp); 2142 2143 ud_printf("udf_delmap\n"); 2144 2145 if (vp->v_flag & VNOMAP) { 2146 return (ENOSYS); 2147 } 2148 2149 mutex_enter(&ip->i_tlock); 2150 ip->i_mapcnt -= btopr(len); /* Count released mappings */ 2151 ASSERT(ip->i_mapcnt >= 0); 2152 mutex_exit(&ip->i_tlock); 2153 2154 return (0); 2155 } 2156 2157 /* ARGSUSED */ 2158 static int32_t 2159 udf_l_pathconf( 2160 struct vnode *vp, 2161 int32_t cmd, 2162 ulong_t *valp, 2163 struct cred *cr, 2164 caller_context_t *ct) 2165 { 2166 int32_t error = 0; 2167 2168 ud_printf("udf_l_pathconf\n"); 2169 2170 if (cmd == _PC_FILESIZEBITS) { 2171 /* 2172 * udf supports 64 bits as file size 2173 * but there are several other restrictions 2174 * it only supports 32-bit block numbers and 2175 * daddr32_t is only and int32_t so taking these 2176 * into account we can stay just as where ufs is 2177 */ 2178 *valp = 41; 2179 } else if (cmd == _PC_TIMESTAMP_RESOLUTION) { 2180 /* nanosecond timestamp resolution */ 2181 *valp = 1L; 2182 } else { 2183 error = fs_pathconf(vp, cmd, valp, cr, ct); 2184 } 2185 2186 return (error); 2187 } 2188 2189 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0; 2190 #ifndef __lint 2191 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads)) 2192 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes)) 2193 #endif 2194 /* 2195 * Assumption is that there will not be a pageio request 2196 * to a enbedded file 2197 */ 2198 /* ARGSUSED */ 2199 static int32_t 2200 udf_pageio( 2201 struct vnode *vp, 2202 struct page *pp, 2203 u_offset_t io_off, 2204 size_t io_len, 2205 int32_t flags, 2206 struct cred *cr, 2207 caller_context_t *ct) 2208 { 2209 daddr_t bn; 2210 struct buf *bp; 2211 struct ud_inode *ip = VTOI(vp); 2212 int32_t dolock, error = 0, contig, multi_io; 2213 size_t done_len = 0, cur_len = 0; 2214 page_t *npp = NULL, *opp = NULL, *cpp = pp; 2215 2216 if (pp == NULL) { 2217 return (EINVAL); 2218 } 2219 2220 dolock = (rw_owner(&ip->i_contents) != curthread); 2221 2222 /* 2223 * We need a better check. Ideally, we would use another 2224 * vnodeops so that hlocked and forcibly unmounted file 2225 * systems would return EIO where appropriate and w/o the 2226 * need for these checks. 2227 */ 2228 if (ip->i_udf == NULL) { 2229 return (EIO); 2230 } 2231 2232 #ifdef __lock_lint 2233 rw_enter(&ip->i_contents, RW_READER); 2234 #else 2235 if (dolock) { 2236 rw_enter(&ip->i_contents, RW_READER); 2237 } 2238 #endif 2239 2240 /* 2241 * Break the io request into chunks, one for each contiguous 2242 * stretch of disk blocks in the target file. 2243 */ 2244 while (done_len < io_len) { 2245 ASSERT(cpp); 2246 bp = NULL; 2247 contig = 0; 2248 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len), 2249 &bn, &contig)) { 2250 break; 2251 } 2252 2253 if (bn == UDF_HOLE) { /* No holey swapfiles */ 2254 cmn_err(CE_WARN, "SWAP file has HOLES"); 2255 error = EINVAL; 2256 break; 2257 } 2258 2259 cur_len = MIN(io_len - done_len, contig); 2260 2261 /* 2262 * Check if more than one I/O is 2263 * required to complete the given 2264 * I/O operation 2265 */ 2266 if (ip->i_udf->udf_lbsize < PAGESIZE) { 2267 if (cur_len >= PAGESIZE) { 2268 multi_io = 0; 2269 cur_len &= PAGEMASK; 2270 } else { 2271 multi_io = 1; 2272 cur_len = MIN(io_len - done_len, PAGESIZE); 2273 } 2274 } 2275 page_list_break(&cpp, &npp, btop(cur_len)); 2276 2277 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags); 2278 ASSERT(bp != NULL); 2279 2280 bp->b_edev = ip->i_dev; 2281 bp->b_dev = cmpdev(ip->i_dev); 2282 bp->b_blkno = bn; 2283 bp->b_un.b_addr = (caddr_t)0; 2284 bp->b_file = vp; 2285 bp->b_offset = (offset_t)(io_off + done_len); 2286 2287 /* 2288 * ub.ub_pageios.value.ul++; 2289 */ 2290 if (multi_io == 0) { 2291 (void) bdev_strategy(bp); 2292 } else { 2293 error = ud_multi_strat(ip, cpp, bp, 2294 (u_offset_t)(io_off + done_len)); 2295 if (error != 0) { 2296 pageio_done(bp); 2297 break; 2298 } 2299 } 2300 if (flags & B_READ) { 2301 ud_pageio_reads++; 2302 } else { 2303 ud_pageio_writes++; 2304 } 2305 2306 /* 2307 * If the request is not B_ASYNC, wait for i/o to complete 2308 * and re-assemble the page list to return to the caller. 2309 * If it is B_ASYNC we leave the page list in pieces and 2310 * cleanup() will dispose of them. 2311 */ 2312 if ((flags & B_ASYNC) == 0) { 2313 error = biowait(bp); 2314 pageio_done(bp); 2315 if (error) { 2316 break; 2317 } 2318 page_list_concat(&opp, &cpp); 2319 } 2320 cpp = npp; 2321 npp = NULL; 2322 done_len += cur_len; 2323 } 2324 2325 ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len)); 2326 if (error) { 2327 if (flags & B_ASYNC) { 2328 /* Cleanup unprocessed parts of list */ 2329 page_list_concat(&cpp, &npp); 2330 if (flags & B_READ) { 2331 pvn_read_done(cpp, B_ERROR); 2332 } else { 2333 pvn_write_done(cpp, B_ERROR); 2334 } 2335 } else { 2336 /* Re-assemble list and let caller clean up */ 2337 page_list_concat(&opp, &cpp); 2338 page_list_concat(&opp, &npp); 2339 } 2340 } 2341 2342 #ifdef __lock_lint 2343 rw_exit(&ip->i_contents); 2344 #else 2345 if (dolock) { 2346 rw_exit(&ip->i_contents); 2347 } 2348 #endif 2349 return (error); 2350 } 2351 2352 2353 2354 2355 /* -------------------- local functions --------------------------- */ 2356 2357 2358 2359 int32_t 2360 ud_rdwri(enum uio_rw rw, int32_t ioflag, 2361 struct ud_inode *ip, caddr_t base, int32_t len, 2362 offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr) 2363 { 2364 int32_t error; 2365 struct uio auio; 2366 struct iovec aiov; 2367 2368 ud_printf("ud_rdwri\n"); 2369 2370 bzero((caddr_t)&auio, sizeof (uio_t)); 2371 bzero((caddr_t)&aiov, sizeof (iovec_t)); 2372 2373 aiov.iov_base = base; 2374 aiov.iov_len = len; 2375 auio.uio_iov = &aiov; 2376 auio.uio_iovcnt = 1; 2377 auio.uio_loffset = offset; 2378 auio.uio_segflg = (int16_t)seg; 2379 auio.uio_resid = len; 2380 2381 if (rw == UIO_WRITE) { 2382 auio.uio_fmode = FWRITE; 2383 auio.uio_extflg = UIO_COPY_DEFAULT; 2384 auio.uio_llimit = curproc->p_fsz_ctl; 2385 error = ud_wrip(ip, &auio, ioflag, cr); 2386 } else { 2387 auio.uio_fmode = FREAD; 2388 auio.uio_extflg = UIO_COPY_CACHED; 2389 auio.uio_llimit = MAXOFFSET_T; 2390 error = ud_rdip(ip, &auio, ioflag, cr); 2391 } 2392 2393 if (aresid) { 2394 *aresid = auio.uio_resid; 2395 } else if (auio.uio_resid) { 2396 error = EIO; 2397 } 2398 return (error); 2399 } 2400 2401 /* 2402 * Free behind hacks. The pager is busted. 2403 * XXX - need to pass the information down to writedone() in a flag like B_SEQ 2404 * or B_FREE_IF_TIGHT_ON_MEMORY. 2405 */ 2406 int32_t ud_freebehind = 1; 2407 int32_t ud_smallfile = 32 * 1024; 2408 2409 /* ARGSUSED */ 2410 int32_t 2411 ud_getpage_miss(struct vnode *vp, u_offset_t off, 2412 size_t len, struct seg *seg, caddr_t addr, page_t *pl[], 2413 size_t plsz, enum seg_rw rw, int32_t seq) 2414 { 2415 struct ud_inode *ip = VTOI(vp); 2416 int32_t err = 0; 2417 size_t io_len; 2418 u_offset_t io_off; 2419 u_offset_t pgoff; 2420 page_t *pp; 2421 2422 pl[0] = NULL; 2423 2424 /* 2425 * Figure out whether the page can be created, or must be 2426 * read from the disk 2427 */ 2428 if (rw == S_CREATE) { 2429 if ((pp = page_create_va(vp, off, 2430 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 2431 cmn_err(CE_WARN, "ud_getpage_miss: page_create"); 2432 return (EINVAL); 2433 } 2434 io_len = PAGESIZE; 2435 } else { 2436 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 2437 &io_len, off, PAGESIZE, 0); 2438 2439 /* 2440 * Some other thread has entered the page. 2441 * ud_getpage will retry page_lookup. 2442 */ 2443 if (pp == NULL) { 2444 return (0); 2445 } 2446 2447 /* 2448 * Fill the page with as much data as we can from the file. 2449 */ 2450 err = ud_page_fill(ip, pp, off, B_READ, &pgoff); 2451 if (err) { 2452 pvn_read_done(pp, B_ERROR); 2453 return (err); 2454 } 2455 2456 /* 2457 * XXX ??? ufs has io_len instead of pgoff below 2458 */ 2459 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2460 2461 /* 2462 * If the file access is sequential, initiate read ahead 2463 * of the next cluster. 2464 */ 2465 if (seq && ip->i_nextrio < ip->i_size) { 2466 ud_getpage_ra(vp, off, seg, addr); 2467 } 2468 } 2469 2470 outmiss: 2471 pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw); 2472 return (err); 2473 } 2474 2475 /* ARGSUSED */ 2476 void 2477 ud_getpage_ra(struct vnode *vp, 2478 u_offset_t off, struct seg *seg, caddr_t addr) 2479 { 2480 page_t *pp; 2481 size_t io_len; 2482 struct ud_inode *ip = VTOI(vp); 2483 u_offset_t io_off = ip->i_nextrio, pgoff; 2484 caddr_t addr2 = addr + (io_off - off); 2485 daddr_t bn; 2486 int32_t contig = 0; 2487 2488 /* 2489 * Is this test needed? 2490 */ 2491 2492 if (addr2 >= seg->s_base + seg->s_size) { 2493 return; 2494 } 2495 2496 contig = 0; 2497 if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) { 2498 return; 2499 } 2500 2501 pp = pvn_read_kluster(vp, io_off, seg, addr2, 2502 &io_off, &io_len, io_off, PAGESIZE, 1); 2503 2504 /* 2505 * Some other thread has entered the page. 2506 * So no read head done here (ie we will have to and wait 2507 * for the read when needed). 2508 */ 2509 2510 if (pp == NULL) { 2511 return; 2512 } 2513 2514 (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff); 2515 ip->i_nextrio = io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2516 } 2517 2518 int 2519 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off, 2520 uint32_t bflgs, u_offset_t *pg_off) 2521 { 2522 daddr_t bn; 2523 struct buf *bp; 2524 caddr_t kaddr, caddr; 2525 int32_t error = 0, contig = 0, multi_io = 0; 2526 int32_t lbsize = ip->i_udf->udf_lbsize; 2527 int32_t lbmask = ip->i_udf->udf_lbmask; 2528 uint64_t isize; 2529 2530 isize = (ip->i_size + lbmask) & (~lbmask); 2531 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2532 2533 /* 2534 * Embedded file read file_entry 2535 * from buffer cache and copy the required 2536 * portions 2537 */ 2538 bp = ud_bread(ip->i_dev, 2539 ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize); 2540 if ((bp->b_error == 0) && 2541 (bp->b_resid == 0)) { 2542 2543 caddr = bp->b_un.b_addr + ip->i_data_off; 2544 2545 /* 2546 * mapin to kvm 2547 */ 2548 kaddr = (caddr_t)ppmapin(pp, 2549 PROT_READ | PROT_WRITE, (caddr_t)-1); 2550 (void) kcopy(caddr, kaddr, ip->i_size); 2551 2552 /* 2553 * mapout of kvm 2554 */ 2555 ppmapout(kaddr); 2556 } 2557 brelse(bp); 2558 contig = ip->i_size; 2559 } else { 2560 2561 /* 2562 * Get the continuous size and block number 2563 * at offset "off" 2564 */ 2565 if (error = ud_bmap_read(ip, off, &bn, &contig)) 2566 goto out; 2567 contig = MIN(contig, PAGESIZE); 2568 contig = (contig + lbmask) & (~lbmask); 2569 2570 /* 2571 * Zero part of the page which we are not 2572 * going to read from the disk. 2573 */ 2574 2575 if (bn == UDF_HOLE) { 2576 2577 /* 2578 * This is a HOLE. Just zero out 2579 * the page 2580 */ 2581 if (((off + contig) == isize) || 2582 (contig == PAGESIZE)) { 2583 pagezero(pp->p_prev, 0, PAGESIZE); 2584 goto out; 2585 } 2586 } 2587 2588 if (contig < PAGESIZE) { 2589 uint64_t count; 2590 2591 count = isize - off; 2592 if (contig != count) { 2593 multi_io = 1; 2594 contig = (int32_t)(MIN(count, PAGESIZE)); 2595 } else { 2596 pagezero(pp->p_prev, contig, PAGESIZE - contig); 2597 } 2598 } 2599 2600 /* 2601 * Get a bp and initialize it 2602 */ 2603 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs); 2604 ASSERT(bp != NULL); 2605 2606 bp->b_edev = ip->i_dev; 2607 bp->b_dev = cmpdev(ip->i_dev); 2608 bp->b_blkno = bn; 2609 bp->b_un.b_addr = 0; 2610 bp->b_file = ip->i_vnode; 2611 2612 /* 2613 * Start I/O 2614 */ 2615 if (multi_io == 0) { 2616 2617 /* 2618 * Single I/O is sufficient for this page 2619 */ 2620 (void) bdev_strategy(bp); 2621 } else { 2622 2623 /* 2624 * We need to do the I/O in 2625 * piece's 2626 */ 2627 error = ud_multi_strat(ip, pp, bp, off); 2628 if (error != 0) { 2629 goto out; 2630 } 2631 } 2632 if ((bflgs & B_ASYNC) == 0) { 2633 2634 /* 2635 * Wait for i/o to complete. 2636 */ 2637 2638 error = biowait(bp); 2639 pageio_done(bp); 2640 if (error) { 2641 goto out; 2642 } 2643 } 2644 } 2645 if ((off + contig) >= ip->i_size) { 2646 contig = ip->i_size - off; 2647 } 2648 2649 out: 2650 *pg_off = contig; 2651 return (error); 2652 } 2653 2654 int32_t 2655 ud_putpages(struct vnode *vp, offset_t off, 2656 size_t len, int32_t flags, struct cred *cr) 2657 { 2658 struct ud_inode *ip; 2659 page_t *pp; 2660 u_offset_t io_off; 2661 size_t io_len; 2662 u_offset_t eoff; 2663 int32_t err = 0; 2664 int32_t dolock; 2665 2666 ud_printf("ud_putpages\n"); 2667 2668 if (vp->v_count == 0) { 2669 cmn_err(CE_WARN, "ud_putpages: bad v_count"); 2670 return (EINVAL); 2671 } 2672 2673 ip = VTOI(vp); 2674 2675 /* 2676 * Acquire the readers/write inode lock before locking 2677 * any pages in this inode. 2678 * The inode lock is held during i/o. 2679 */ 2680 if (len == 0) { 2681 mutex_enter(&ip->i_tlock); 2682 ip->i_delayoff = ip->i_delaylen = 0; 2683 mutex_exit(&ip->i_tlock); 2684 } 2685 #ifdef __lock_lint 2686 rw_enter(&ip->i_contents, RW_READER); 2687 #else 2688 dolock = (rw_owner(&ip->i_contents) != curthread); 2689 if (dolock) { 2690 rw_enter(&ip->i_contents, RW_READER); 2691 } 2692 #endif 2693 2694 if (!vn_has_cached_data(vp)) { 2695 #ifdef __lock_lint 2696 rw_exit(&ip->i_contents); 2697 #else 2698 if (dolock) { 2699 rw_exit(&ip->i_contents); 2700 } 2701 #endif 2702 return (0); 2703 } 2704 2705 if (len == 0) { 2706 /* 2707 * Search the entire vp list for pages >= off. 2708 */ 2709 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage, 2710 flags, cr); 2711 } else { 2712 /* 2713 * Loop over all offsets in the range looking for 2714 * pages to deal with. 2715 */ 2716 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) { 2717 eoff = MIN(off + len, eoff); 2718 } else { 2719 eoff = off + len; 2720 } 2721 2722 for (io_off = off; io_off < eoff; io_off += io_len) { 2723 /* 2724 * If we are not invalidating, synchronously 2725 * freeing or writing pages, use the routine 2726 * page_lookup_nowait() to prevent reclaiming 2727 * them from the free list. 2728 */ 2729 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 2730 pp = page_lookup(vp, io_off, 2731 (flags & (B_INVAL | B_FREE)) ? 2732 SE_EXCL : SE_SHARED); 2733 } else { 2734 pp = page_lookup_nowait(vp, io_off, 2735 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 2736 } 2737 2738 if (pp == NULL || pvn_getdirty(pp, flags) == 0) { 2739 io_len = PAGESIZE; 2740 } else { 2741 2742 err = ud_putapage(vp, pp, 2743 &io_off, &io_len, flags, cr); 2744 if (err != 0) { 2745 break; 2746 } 2747 /* 2748 * "io_off" and "io_len" are returned as 2749 * the range of pages we actually wrote. 2750 * This allows us to skip ahead more quickly 2751 * since several pages may've been dealt 2752 * with by this iteration of the loop. 2753 */ 2754 } 2755 } 2756 } 2757 if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) { 2758 /* 2759 * We have just sync'ed back all the pages on 2760 * the inode, turn off the IMODTIME flag. 2761 */ 2762 mutex_enter(&ip->i_tlock); 2763 ip->i_flag &= ~IMODTIME; 2764 mutex_exit(&ip->i_tlock); 2765 } 2766 #ifdef __lock_lint 2767 rw_exit(&ip->i_contents); 2768 #else 2769 if (dolock) { 2770 rw_exit(&ip->i_contents); 2771 } 2772 #endif 2773 return (err); 2774 } 2775 2776 /* ARGSUSED */ 2777 int32_t 2778 ud_putapage(struct vnode *vp, 2779 page_t *pp, u_offset_t *offp, 2780 size_t *lenp, int32_t flags, struct cred *cr) 2781 { 2782 daddr_t bn; 2783 size_t io_len; 2784 struct ud_inode *ip; 2785 int32_t error = 0, contig, multi_io = 0; 2786 struct udf_vfs *udf_vfsp; 2787 u_offset_t off, io_off; 2788 caddr_t kaddr, caddr; 2789 struct buf *bp = NULL; 2790 int32_t lbmask; 2791 uint64_t isize; 2792 uint16_t crc_len; 2793 struct file_entry *fe; 2794 2795 ud_printf("ud_putapage\n"); 2796 2797 ip = VTOI(vp); 2798 ASSERT(ip); 2799 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2800 lbmask = ip->i_udf->udf_lbmask; 2801 isize = (ip->i_size + lbmask) & (~lbmask); 2802 2803 udf_vfsp = ip->i_udf; 2804 ASSERT(udf_vfsp->udf_flags & UDF_FL_RW); 2805 2806 /* 2807 * If the modified time on the inode has not already been 2808 * set elsewhere (e.g. for write/setattr) we set the time now. 2809 * This gives us approximate modified times for mmap'ed files 2810 * which are modified via stores in the user address space. 2811 */ 2812 if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) { 2813 mutex_enter(&ip->i_tlock); 2814 ip->i_flag |= IUPD; 2815 ITIMES_NOLOCK(ip); 2816 mutex_exit(&ip->i_tlock); 2817 } 2818 2819 2820 /* 2821 * Align the request to a block boundry (for old file systems), 2822 * and go ask bmap() how contiguous things are for this file. 2823 */ 2824 off = pp->p_offset & ~(offset_t)lbmask; 2825 /* block align it */ 2826 2827 2828 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2829 ASSERT(ip->i_size <= ip->i_max_emb); 2830 2831 pp = pvn_write_kluster(vp, pp, &io_off, 2832 &io_len, off, PAGESIZE, flags); 2833 if (io_len == 0) { 2834 io_len = PAGESIZE; 2835 } 2836 2837 bp = ud_bread(ip->i_dev, 2838 ip->i_icb_lbano << udf_vfsp->udf_l2d_shift, 2839 udf_vfsp->udf_lbsize); 2840 fe = (struct file_entry *)bp->b_un.b_addr; 2841 if ((bp->b_flags & B_ERROR) || 2842 (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY, 2843 ip->i_icb_block, 2844 1, udf_vfsp->udf_lbsize) != 0)) { 2845 if (pp != NULL) 2846 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2847 if (bp->b_flags & B_ERROR) { 2848 error = EIO; 2849 } else { 2850 error = EINVAL; 2851 } 2852 brelse(bp); 2853 return (error); 2854 } 2855 if ((bp->b_error == 0) && 2856 (bp->b_resid == 0)) { 2857 2858 caddr = bp->b_un.b_addr + ip->i_data_off; 2859 kaddr = (caddr_t)ppmapin(pp, 2860 PROT_READ | PROT_WRITE, (caddr_t)-1); 2861 (void) kcopy(kaddr, caddr, ip->i_size); 2862 ppmapout(kaddr); 2863 } 2864 crc_len = offsetof(struct file_entry, fe_spec) + 2865 SWAP_32(fe->fe_len_ear); 2866 crc_len += ip->i_size; 2867 ud_make_tag(ip->i_udf, &fe->fe_tag, 2868 UD_FILE_ENTRY, ip->i_icb_block, crc_len); 2869 2870 bwrite(bp); 2871 2872 if (flags & B_ASYNC) { 2873 pvn_write_done(pp, flags); 2874 } 2875 contig = ip->i_size; 2876 } else { 2877 2878 if (error = ud_bmap_read(ip, off, &bn, &contig)) { 2879 goto out; 2880 } 2881 contig = MIN(contig, PAGESIZE); 2882 contig = (contig + lbmask) & (~lbmask); 2883 2884 if (contig < PAGESIZE) { 2885 uint64_t count; 2886 2887 count = isize - off; 2888 if (contig != count) { 2889 multi_io = 1; 2890 contig = (int32_t)(MIN(count, PAGESIZE)); 2891 } 2892 } 2893 2894 if ((off + contig) > isize) { 2895 contig = isize - off; 2896 } 2897 2898 if (contig > PAGESIZE) { 2899 if (contig & PAGEOFFSET) { 2900 contig &= PAGEMASK; 2901 } 2902 } 2903 2904 pp = pvn_write_kluster(vp, pp, &io_off, 2905 &io_len, off, contig, flags); 2906 if (io_len == 0) { 2907 io_len = PAGESIZE; 2908 } 2909 2910 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags); 2911 ASSERT(bp != NULL); 2912 2913 bp->b_edev = ip->i_dev; 2914 bp->b_dev = cmpdev(ip->i_dev); 2915 bp->b_blkno = bn; 2916 bp->b_un.b_addr = 0; 2917 bp->b_file = vp; 2918 bp->b_offset = (offset_t)off; 2919 2920 2921 /* 2922 * write throttle 2923 */ 2924 ASSERT(bp->b_iodone == NULL); 2925 bp->b_iodone = ud_iodone; 2926 mutex_enter(&ip->i_tlock); 2927 ip->i_writes += bp->b_bcount; 2928 mutex_exit(&ip->i_tlock); 2929 2930 if (multi_io == 0) { 2931 2932 (void) bdev_strategy(bp); 2933 } else { 2934 error = ud_multi_strat(ip, pp, bp, off); 2935 if (error != 0) { 2936 goto out; 2937 } 2938 } 2939 2940 if ((flags & B_ASYNC) == 0) { 2941 /* 2942 * Wait for i/o to complete. 2943 */ 2944 error = biowait(bp); 2945 pageio_done(bp); 2946 } 2947 } 2948 2949 if ((flags & B_ASYNC) == 0) { 2950 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags); 2951 } 2952 2953 pp = NULL; 2954 2955 out: 2956 if (error != 0 && pp != NULL) { 2957 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2958 } 2959 2960 if (offp) { 2961 *offp = io_off; 2962 } 2963 if (lenp) { 2964 *lenp = io_len; 2965 } 2966 2967 return (error); 2968 } 2969 2970 2971 int32_t 2972 ud_iodone(struct buf *bp) 2973 { 2974 struct ud_inode *ip; 2975 2976 ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ)); 2977 2978 bp->b_iodone = NULL; 2979 2980 ip = VTOI(bp->b_pages->p_vnode); 2981 2982 mutex_enter(&ip->i_tlock); 2983 if (ip->i_writes >= ud_LW) { 2984 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) { 2985 if (ud_WRITES) { 2986 cv_broadcast(&ip->i_wrcv); /* wake all up */ 2987 } 2988 } 2989 } else { 2990 ip->i_writes -= bp->b_bcount; 2991 } 2992 mutex_exit(&ip->i_tlock); 2993 iodone(bp); 2994 return (0); 2995 } 2996 2997 /* ARGSUSED3 */ 2998 int32_t 2999 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr) 3000 { 3001 struct vnode *vp; 3002 struct udf_vfs *udf_vfsp; 3003 krw_t rwtype; 3004 caddr_t base; 3005 uint32_t flags; 3006 int32_t error, n, on, mapon, dofree; 3007 u_offset_t off; 3008 long oresid = uio->uio_resid; 3009 3010 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 3011 if ((ip->i_type != VREG) && 3012 (ip->i_type != VDIR) && 3013 (ip->i_type != VLNK)) { 3014 return (EIO); 3015 } 3016 3017 if (uio->uio_loffset > MAXOFFSET_T) { 3018 return (0); 3019 } 3020 3021 if ((uio->uio_loffset < (offset_t)0) || 3022 ((uio->uio_loffset + uio->uio_resid) < 0)) { 3023 return (EINVAL); 3024 } 3025 if (uio->uio_resid == 0) { 3026 return (0); 3027 } 3028 3029 vp = ITOV(ip); 3030 udf_vfsp = ip->i_udf; 3031 mutex_enter(&ip->i_tlock); 3032 ip->i_flag |= IACC; 3033 mutex_exit(&ip->i_tlock); 3034 3035 rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER); 3036 3037 do { 3038 offset_t diff; 3039 u_offset_t uoff = uio->uio_loffset; 3040 off = uoff & (offset_t)MAXBMASK; 3041 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3042 on = (int)blkoff(udf_vfsp, uoff); 3043 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3044 3045 diff = ip->i_size - uoff; 3046 3047 if (diff <= (offset_t)0) { 3048 error = 0; 3049 goto out; 3050 } 3051 if (diff < (offset_t)n) { 3052 n = (int)diff; 3053 } 3054 dofree = ud_freebehind && 3055 ip->i_nextr == (off & PAGEMASK) && 3056 off > ud_smallfile; 3057 3058 #ifndef __lock_lint 3059 if (rwtype == RW_READER) { 3060 rw_exit(&ip->i_contents); 3061 } 3062 #endif 3063 3064 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3065 (uint32_t)n, 1, S_READ); 3066 error = uiomove(base + mapon, (long)n, UIO_READ, uio); 3067 3068 flags = 0; 3069 if (!error) { 3070 /* 3071 * If read a whole block, or read to eof, 3072 * won't need this buffer again soon. 3073 */ 3074 if (n + on == MAXBSIZE && ud_freebehind && dofree && 3075 freemem < lotsfree + pages_before_pager) { 3076 flags = SM_FREE | SM_DONTNEED |SM_ASYNC; 3077 } 3078 /* 3079 * In POSIX SYNC (FSYNC and FDSYNC) read mode, 3080 * we want to make sure that the page which has 3081 * been read, is written on disk if it is dirty. 3082 * And corresponding indirect blocks should also 3083 * be flushed out. 3084 */ 3085 if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) { 3086 flags &= ~SM_ASYNC; 3087 flags |= SM_WRITE; 3088 } 3089 error = segmap_release(segkmap, base, flags); 3090 } else { 3091 (void) segmap_release(segkmap, base, flags); 3092 } 3093 3094 #ifndef __lock_lint 3095 if (rwtype == RW_READER) { 3096 rw_enter(&ip->i_contents, rwtype); 3097 } 3098 #endif 3099 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3100 out: 3101 /* 3102 * Inode is updated according to this table if FRSYNC is set. 3103 * 3104 * FSYNC FDSYNC(posix.4) 3105 * -------------------------- 3106 * always IATTCHG|IBDWRITE 3107 */ 3108 if (ioflag & FRSYNC) { 3109 if ((ioflag & FSYNC) || 3110 ((ioflag & FDSYNC) && 3111 (ip->i_flag & (IATTCHG|IBDWRITE)))) { 3112 rw_exit(&ip->i_contents); 3113 rw_enter(&ip->i_contents, RW_WRITER); 3114 ud_iupdat(ip, 1); 3115 } 3116 } 3117 /* 3118 * If we've already done a partial read, terminate 3119 * the read but return no error. 3120 */ 3121 if (oresid != uio->uio_resid) { 3122 error = 0; 3123 } 3124 ITIMES(ip); 3125 3126 return (error); 3127 } 3128 3129 int32_t 3130 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr) 3131 { 3132 caddr_t base; 3133 struct vnode *vp; 3134 struct udf_vfs *udf_vfsp; 3135 uint32_t flags; 3136 int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0; 3137 int32_t pagecreate, newpage; 3138 uint64_t old_i_size; 3139 u_offset_t off; 3140 long start_resid = uio->uio_resid, premove_resid; 3141 rlim64_t limit = uio->uio_limit; 3142 3143 3144 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 3145 if ((ip->i_type != VREG) && 3146 (ip->i_type != VDIR) && 3147 (ip->i_type != VLNK)) { 3148 return (EIO); 3149 } 3150 3151 if (uio->uio_loffset >= MAXOFFSET_T) { 3152 return (EFBIG); 3153 } 3154 /* 3155 * see udf_l_pathconf 3156 */ 3157 if (limit > (((uint64_t)1 << 40) - 1)) { 3158 limit = ((uint64_t)1 << 40) - 1; 3159 } 3160 if (uio->uio_loffset >= limit) { 3161 proc_t *p = ttoproc(curthread); 3162 3163 mutex_enter(&p->p_lock); 3164 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 3165 p, RCA_UNSAFE_SIGINFO); 3166 mutex_exit(&p->p_lock); 3167 return (EFBIG); 3168 } 3169 if ((uio->uio_loffset < (offset_t)0) || 3170 ((uio->uio_loffset + uio->uio_resid) < 0)) { 3171 return (EINVAL); 3172 } 3173 if (uio->uio_resid == 0) { 3174 return (0); 3175 } 3176 3177 mutex_enter(&ip->i_tlock); 3178 ip->i_flag |= INOACC; 3179 3180 if (ioflag & (FSYNC | FDSYNC)) { 3181 ip->i_flag |= ISYNC; 3182 iupdat_flag = 1; 3183 } 3184 mutex_exit(&ip->i_tlock); 3185 3186 udf_vfsp = ip->i_udf; 3187 vp = ITOV(ip); 3188 3189 do { 3190 u_offset_t uoff = uio->uio_loffset; 3191 off = uoff & (offset_t)MAXBMASK; 3192 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3193 on = (int)blkoff(udf_vfsp, uoff); 3194 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3195 3196 if (ip->i_type == VREG && uoff + n >= limit) { 3197 if (uoff >= limit) { 3198 error = EFBIG; 3199 goto out; 3200 } 3201 n = (int)(limit - (rlim64_t)uoff); 3202 } 3203 if (uoff + n > ip->i_size) { 3204 /* 3205 * We are extending the length of the file. 3206 * bmap is used so that we are sure that 3207 * if we need to allocate new blocks, that it 3208 * is done here before we up the file size. 3209 */ 3210 error = ud_bmap_write(ip, uoff, 3211 (int)(on + n), mapon == 0, cr); 3212 if (error) { 3213 break; 3214 } 3215 i_size_changed = 1; 3216 old_i_size = ip->i_size; 3217 ip->i_size = uoff + n; 3218 /* 3219 * If we are writing from the beginning of 3220 * the mapping, we can just create the 3221 * pages without having to read them. 3222 */ 3223 pagecreate = (mapon == 0); 3224 } else if (n == MAXBSIZE) { 3225 /* 3226 * Going to do a whole mappings worth, 3227 * so we can just create the pages w/o 3228 * having to read them in. But before 3229 * we do that, we need to make sure any 3230 * needed blocks are allocated first. 3231 */ 3232 error = ud_bmap_write(ip, uoff, 3233 (int)(on + n), 1, cr); 3234 if (error) { 3235 break; 3236 } 3237 pagecreate = 1; 3238 } else { 3239 pagecreate = 0; 3240 } 3241 3242 rw_exit(&ip->i_contents); 3243 3244 /* 3245 * Touch the page and fault it in if it is not in 3246 * core before segmap_getmapflt can lock it. This 3247 * is to avoid the deadlock if the buffer is mapped 3248 * to the same file through mmap which we want to 3249 * write to. 3250 */ 3251 uio_prefaultpages((long)n, uio); 3252 3253 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3254 (uint32_t)n, !pagecreate, S_WRITE); 3255 3256 /* 3257 * segmap_pagecreate() returns 1 if it calls 3258 * page_create_va() to allocate any pages. 3259 */ 3260 newpage = 0; 3261 if (pagecreate) { 3262 newpage = segmap_pagecreate(segkmap, base, 3263 (size_t)n, 0); 3264 } 3265 3266 premove_resid = uio->uio_resid; 3267 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio); 3268 3269 if (pagecreate && 3270 uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) { 3271 /* 3272 * We created pages w/o initializing them completely, 3273 * thus we need to zero the part that wasn't set up. 3274 * This happens on most EOF write cases and if 3275 * we had some sort of error during the uiomove. 3276 */ 3277 int nzero, nmoved; 3278 3279 nmoved = (int)(uio->uio_loffset - (off + mapon)); 3280 ASSERT(nmoved >= 0 && nmoved <= n); 3281 nzero = roundup(on + n, PAGESIZE) - nmoved; 3282 ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE); 3283 (void) kzero(base + mapon + nmoved, (uint32_t)nzero); 3284 } 3285 3286 /* 3287 * Unlock the pages allocated by page_create_va() 3288 * in segmap_pagecreate() 3289 */ 3290 if (newpage) { 3291 segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE); 3292 } 3293 3294 if (error) { 3295 /* 3296 * If we failed on a write, we may have already 3297 * allocated file blocks as well as pages. It's 3298 * hard to undo the block allocation, but we must 3299 * be sure to invalidate any pages that may have 3300 * been allocated. 3301 */ 3302 (void) segmap_release(segkmap, base, SM_INVAL); 3303 } else { 3304 flags = 0; 3305 /* 3306 * Force write back for synchronous write cases. 3307 */ 3308 if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) { 3309 /* 3310 * If the sticky bit is set but the 3311 * execute bit is not set, we do a 3312 * synchronous write back and free 3313 * the page when done. We set up swap 3314 * files to be handled this way to 3315 * prevent servers from keeping around 3316 * the client's swap pages too long. 3317 * XXX - there ought to be a better way. 3318 */ 3319 if (IS_SWAPVP(vp)) { 3320 flags = SM_WRITE | SM_FREE | 3321 SM_DONTNEED; 3322 iupdat_flag = 0; 3323 } else { 3324 flags = SM_WRITE; 3325 } 3326 } else if (((mapon + n) == MAXBSIZE) || 3327 IS_SWAPVP(vp)) { 3328 /* 3329 * Have written a whole block. 3330 * Start an asynchronous write and 3331 * mark the buffer to indicate that 3332 * it won't be needed again soon. 3333 */ 3334 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED; 3335 } 3336 error = segmap_release(segkmap, base, flags); 3337 3338 /* 3339 * If the operation failed and is synchronous, 3340 * then we need to unwind what uiomove() last 3341 * did so we can potentially return an error to 3342 * the caller. If this write operation was 3343 * done in two pieces and the first succeeded, 3344 * then we won't return an error for the second 3345 * piece that failed. However, we only want to 3346 * return a resid value that reflects what was 3347 * really done. 3348 * 3349 * Failures for non-synchronous operations can 3350 * be ignored since the page subsystem will 3351 * retry the operation until it succeeds or the 3352 * file system is unmounted. 3353 */ 3354 if (error) { 3355 if ((ioflag & (FSYNC | FDSYNC)) || 3356 ip->i_type == VDIR) { 3357 uio->uio_resid = premove_resid; 3358 } else { 3359 error = 0; 3360 } 3361 } 3362 } 3363 3364 /* 3365 * Re-acquire contents lock. 3366 */ 3367 rw_enter(&ip->i_contents, RW_WRITER); 3368 /* 3369 * If the uiomove() failed or if a synchronous 3370 * page push failed, fix up i_size. 3371 */ 3372 if (error) { 3373 if (i_size_changed) { 3374 /* 3375 * The uiomove failed, and we 3376 * allocated blocks,so get rid 3377 * of them. 3378 */ 3379 (void) ud_itrunc(ip, old_i_size, 0, cr); 3380 } 3381 } else { 3382 /* 3383 * XXX - Can this be out of the loop? 3384 */ 3385 ip->i_flag |= IUPD | ICHG; 3386 if (i_size_changed) { 3387 ip->i_flag |= IATTCHG; 3388 } 3389 if ((ip->i_perm & (IEXEC | (IEXEC >> 5) | 3390 (IEXEC >> 10))) != 0 && 3391 (ip->i_char & (ISUID | ISGID)) != 0 && 3392 secpolicy_vnode_setid_retain(cr, 3393 (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) { 3394 /* 3395 * Clear Set-UID & Set-GID bits on 3396 * successful write if not privileged 3397 * and at least one of the execute bits 3398 * is set. If we always clear Set-GID, 3399 * mandatory file and record locking is 3400 * unuseable. 3401 */ 3402 ip->i_char &= ~(ISUID | ISGID); 3403 } 3404 } 3405 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3406 3407 out: 3408 /* 3409 * Inode is updated according to this table - 3410 * 3411 * FSYNC FDSYNC(posix.4) 3412 * -------------------------- 3413 * always@ IATTCHG|IBDWRITE 3414 * 3415 * @ - If we are doing synchronous write the only time we should 3416 * not be sync'ing the ip here is if we have the stickyhack 3417 * activated, the file is marked with the sticky bit and 3418 * no exec bit, the file length has not been changed and 3419 * no new blocks have been allocated during this write. 3420 */ 3421 if ((ip->i_flag & ISYNC) != 0) { 3422 /* 3423 * we have eliminated nosync 3424 */ 3425 if ((ip->i_flag & (IATTCHG|IBDWRITE)) || 3426 ((ioflag & FSYNC) && iupdat_flag)) { 3427 ud_iupdat(ip, 1); 3428 } 3429 } 3430 3431 /* 3432 * If we've already done a partial-write, terminate 3433 * the write but return no error. 3434 */ 3435 if (start_resid != uio->uio_resid) { 3436 error = 0; 3437 } 3438 ip->i_flag &= ~(INOACC | ISYNC); 3439 ITIMES_NOLOCK(ip); 3440 3441 return (error); 3442 } 3443 3444 int32_t 3445 ud_multi_strat(struct ud_inode *ip, 3446 page_t *pp, struct buf *bp, u_offset_t start) 3447 { 3448 daddr_t bn; 3449 int32_t error = 0, io_count, contig, alloc_sz, i; 3450 uint32_t io_off; 3451 mio_master_t *mm = NULL; 3452 mio_slave_t *ms = NULL; 3453 struct buf *rbp; 3454 3455 ASSERT(!(start & PAGEOFFSET)); 3456 3457 /* 3458 * Figure out how many buffers to allocate 3459 */ 3460 io_count = 0; 3461 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3462 contig = 0; 3463 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off), 3464 &bn, &contig)) { 3465 goto end; 3466 } 3467 if (contig == 0) { 3468 goto end; 3469 } 3470 contig = MIN(contig, PAGESIZE - io_off); 3471 if (bn != UDF_HOLE) { 3472 io_count ++; 3473 } else { 3474 /* 3475 * HOLE 3476 */ 3477 if (bp->b_flags & B_READ) { 3478 3479 /* 3480 * This is a hole and is read 3481 * it should be filled with 0's 3482 */ 3483 pagezero(pp, io_off, contig); 3484 } 3485 } 3486 } 3487 3488 3489 if (io_count != 0) { 3490 3491 /* 3492 * Allocate memory for all the 3493 * required number of buffers 3494 */ 3495 alloc_sz = sizeof (mio_master_t) + 3496 (sizeof (mio_slave_t) * io_count); 3497 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP); 3498 if (mm == NULL) { 3499 error = ENOMEM; 3500 goto end; 3501 } 3502 3503 /* 3504 * initialize master 3505 */ 3506 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL); 3507 mm->mm_size = alloc_sz; 3508 mm->mm_bp = bp; 3509 mm->mm_resid = 0; 3510 mm->mm_error = 0; 3511 mm->mm_index = master_index++; 3512 3513 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3514 3515 /* 3516 * Initialize buffers 3517 */ 3518 io_count = 0; 3519 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3520 contig = 0; 3521 if (error = ud_bmap_read(ip, 3522 (u_offset_t)(start + io_off), 3523 &bn, &contig)) { 3524 goto end; 3525 } 3526 ASSERT(contig); 3527 if ((io_off + contig) > bp->b_bcount) { 3528 contig = bp->b_bcount - io_off; 3529 } 3530 if (bn != UDF_HOLE) { 3531 /* 3532 * Clone the buffer 3533 * and prepare to start I/O 3534 */ 3535 ms->ms_ptr = mm; 3536 bioinit(&ms->ms_buf); 3537 rbp = bioclone(bp, io_off, (size_t)contig, 3538 bp->b_edev, bn, ud_slave_done, 3539 &ms->ms_buf, KM_NOSLEEP); 3540 ASSERT(rbp == &ms->ms_buf); 3541 mm->mm_resid += contig; 3542 io_count++; 3543 ms ++; 3544 } 3545 } 3546 3547 /* 3548 * Start I/O's 3549 */ 3550 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3551 for (i = 0; i < io_count; i++) { 3552 (void) bdev_strategy(&ms->ms_buf); 3553 ms ++; 3554 } 3555 } 3556 3557 end: 3558 if (error != 0) { 3559 bp->b_flags |= B_ERROR; 3560 bp->b_error = error; 3561 if (mm != NULL) { 3562 mutex_destroy(&mm->mm_mutex); 3563 kmem_free(mm, mm->mm_size); 3564 } 3565 } 3566 return (error); 3567 } 3568 3569 int32_t 3570 ud_slave_done(struct buf *bp) 3571 { 3572 mio_master_t *mm; 3573 int32_t resid; 3574 3575 ASSERT(SEMA_HELD(&bp->b_sem)); 3576 ASSERT((bp->b_flags & B_DONE) == 0); 3577 3578 mm = ((mio_slave_t *)bp)->ms_ptr; 3579 3580 /* 3581 * Propagate error and byte count info from slave struct to 3582 * the master struct 3583 */ 3584 mutex_enter(&mm->mm_mutex); 3585 if (bp->b_flags & B_ERROR) { 3586 3587 /* 3588 * If multiple slave buffers get 3589 * error we forget the old errors 3590 * this is ok because we any way 3591 * cannot return multiple errors 3592 */ 3593 mm->mm_error = bp->b_error; 3594 } 3595 mm->mm_resid -= bp->b_bcount; 3596 resid = mm->mm_resid; 3597 mutex_exit(&mm->mm_mutex); 3598 3599 /* 3600 * free up the resources allocated to cloned buffers. 3601 */ 3602 bp_mapout(bp); 3603 biofini(bp); 3604 3605 if (resid == 0) { 3606 3607 /* 3608 * This is the last I/O operation 3609 * clean up and return the original buffer 3610 */ 3611 if (mm->mm_error) { 3612 mm->mm_bp->b_flags |= B_ERROR; 3613 mm->mm_bp->b_error = mm->mm_error; 3614 } 3615 biodone(mm->mm_bp); 3616 mutex_destroy(&mm->mm_mutex); 3617 kmem_free(mm, mm->mm_size); 3618 } 3619 return (0); 3620 }