1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 27 * All rights reserved. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/systm.h> 33 #include <sys/cred.h> 34 #include <sys/buf.h> 35 #include <sys/vfs.h> 36 #include <sys/vnode.h> 37 #include <sys/uio.h> 38 #include <sys/stat.h> 39 #include <sys/errno.h> 40 #include <sys/sysmacros.h> 41 #include <sys/statvfs.h> 42 #include <sys/kmem.h> 43 #include <sys/kstat.h> 44 #include <sys/dirent.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/vtrace.h> 48 #include <sys/mode.h> 49 #include <sys/acl.h> 50 #include <sys/nbmlock.h> 51 #include <sys/policy.h> 52 #include <sys/sdt.h> 53 54 #include <rpc/types.h> 55 #include <rpc/auth.h> 56 #include <rpc/svc.h> 57 58 #include <nfs/nfs.h> 59 #include <nfs/export.h> 60 #include <nfs/nfs_cmd.h> 61 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_map.h> 66 #include <vm/seg_kmem.h> 67 68 #include <sys/strsubr.h> 69 70 /* 71 * These are the interface routines for the server side of the 72 * Network File System. See the NFS version 2 protocol specification 73 * for a description of this interface. 74 */ 75 76 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 77 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 78 cred_t *); 79 80 /* 81 * Some "over the wire" UNIX file types. These are encoded 82 * into the mode. This needs to be fixed in the next rev. 83 */ 84 #define IFMT 0170000 /* type of file */ 85 #define IFCHR 0020000 /* character special */ 86 #define IFBLK 0060000 /* block special */ 87 #define IFSOCK 0140000 /* socket */ 88 89 u_longlong_t nfs2_srv_caller_id; 90 91 /* 92 * Get file attributes. 93 * Returns the current attributes of the file with the given fhandle. 94 */ 95 /* ARGSUSED */ 96 void 97 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 98 struct svc_req *req, cred_t *cr) 99 { 100 int error; 101 vnode_t *vp; 102 struct vattr va; 103 104 vp = nfs_fhtovp(fhp, exi); 105 if (vp == NULL) { 106 ns->ns_status = NFSERR_STALE; 107 return; 108 } 109 110 /* 111 * Do the getattr. 112 */ 113 va.va_mask = AT_ALL; /* we want all the attributes */ 114 115 error = rfs4_delegated_getattr(vp, &va, 0, cr); 116 117 /* check for overflows */ 118 if (!error) { 119 /* Lie about the object type for a referral */ 120 if (vn_is_nfs_reparse(vp, cr)) 121 va.va_type = VLNK; 122 123 acl_perm(vp, exi, &va, cr); 124 error = vattr_to_nattr(&va, &ns->ns_attr); 125 } 126 127 VN_RELE(vp); 128 129 ns->ns_status = puterrno(error); 130 } 131 void * 132 rfs_getattr_getfh(fhandle_t *fhp) 133 { 134 return (fhp); 135 } 136 137 /* 138 * Set file attributes. 139 * Sets the attributes of the file with the given fhandle. Returns 140 * the new attributes. 141 */ 142 void 143 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 144 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 145 { 146 int error; 147 int flag; 148 int in_crit = 0; 149 vnode_t *vp; 150 struct vattr va; 151 struct vattr bva; 152 struct flock64 bf; 153 caller_context_t ct; 154 155 156 vp = nfs_fhtovp(&args->saa_fh, exi); 157 if (vp == NULL) { 158 ns->ns_status = NFSERR_STALE; 159 return; 160 } 161 162 if (rdonly(exi, req) || vn_is_readonly(vp)) { 163 VN_RELE(vp); 164 ns->ns_status = NFSERR_ROFS; 165 return; 166 } 167 168 error = sattr_to_vattr(&args->saa_sa, &va); 169 if (error) { 170 VN_RELE(vp); 171 ns->ns_status = puterrno(error); 172 return; 173 } 174 175 /* 176 * If the client is requesting a change to the mtime, 177 * but the nanosecond field is set to 1 billion, then 178 * this is a flag to the server that it should set the 179 * atime and mtime fields to the server's current time. 180 * The 1 billion number actually came from the client 181 * as 1 million, but the units in the over the wire 182 * request are microseconds instead of nanoseconds. 183 * 184 * This is an overload of the protocol and should be 185 * documented in the NFS Version 2 protocol specification. 186 */ 187 if (va.va_mask & AT_MTIME) { 188 if (va.va_mtime.tv_nsec == 1000000000) { 189 gethrestime(&va.va_mtime); 190 va.va_atime = va.va_mtime; 191 va.va_mask |= AT_ATIME; 192 flag = 0; 193 } else 194 flag = ATTR_UTIME; 195 } else 196 flag = 0; 197 198 /* 199 * If the filesystem is exported with nosuid, then mask off 200 * the setuid and setgid bits. 201 */ 202 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 203 (exi->exi_export.ex_flags & EX_NOSUID)) 204 va.va_mode &= ~(VSUID | VSGID); 205 206 ct.cc_sysid = 0; 207 ct.cc_pid = 0; 208 ct.cc_caller_id = nfs2_srv_caller_id; 209 ct.cc_flags = CC_DONTBLOCK; 210 211 /* 212 * We need to specially handle size changes because it is 213 * possible for the client to create a file with modes 214 * which indicate read-only, but with the file opened for 215 * writing. If the client then tries to set the size of 216 * the file, then the normal access checking done in 217 * VOP_SETATTR would prevent the client from doing so, 218 * although it should be legal for it to do so. To get 219 * around this, we do the access checking for ourselves 220 * and then use VOP_SPACE which doesn't do the access 221 * checking which VOP_SETATTR does. VOP_SPACE can only 222 * operate on VREG files, let VOP_SETATTR handle the other 223 * extremely rare cases. 224 * Also the client should not be allowed to change the 225 * size of the file if there is a conflicting non-blocking 226 * mandatory lock in the region of change. 227 */ 228 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 229 if (nbl_need_check(vp)) { 230 nbl_start_crit(vp, RW_READER); 231 in_crit = 1; 232 } 233 234 bva.va_mask = AT_UID | AT_SIZE; 235 236 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 237 238 if (error) { 239 if (in_crit) 240 nbl_end_crit(vp); 241 VN_RELE(vp); 242 ns->ns_status = puterrno(error); 243 return; 244 } 245 246 if (in_crit) { 247 u_offset_t offset; 248 ssize_t length; 249 250 if (va.va_size < bva.va_size) { 251 offset = va.va_size; 252 length = bva.va_size - va.va_size; 253 } else { 254 offset = bva.va_size; 255 length = va.va_size - bva.va_size; 256 } 257 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 258 NULL)) { 259 error = EACCES; 260 } 261 } 262 263 if (crgetuid(cr) == bva.va_uid && !error && 264 va.va_size != bva.va_size) { 265 va.va_mask &= ~AT_SIZE; 266 bf.l_type = F_WRLCK; 267 bf.l_whence = 0; 268 bf.l_start = (off64_t)va.va_size; 269 bf.l_len = 0; 270 bf.l_sysid = 0; 271 bf.l_pid = 0; 272 273 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 274 (offset_t)va.va_size, cr, &ct); 275 } 276 if (in_crit) 277 nbl_end_crit(vp); 278 } else 279 error = 0; 280 281 /* 282 * Do the setattr. 283 */ 284 if (!error && va.va_mask) { 285 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 286 } 287 288 /* 289 * check if the monitor on either vop_space or vop_setattr detected 290 * a delegation conflict and if so, mark the thread flag as 291 * wouldblock so that the response is dropped and the client will 292 * try again. 293 */ 294 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 295 VN_RELE(vp); 296 curthread->t_flag |= T_WOULDBLOCK; 297 return; 298 } 299 300 if (!error) { 301 va.va_mask = AT_ALL; /* get everything */ 302 303 error = rfs4_delegated_getattr(vp, &va, 0, cr); 304 305 /* check for overflows */ 306 if (!error) { 307 acl_perm(vp, exi, &va, cr); 308 error = vattr_to_nattr(&va, &ns->ns_attr); 309 } 310 } 311 312 ct.cc_flags = 0; 313 314 /* 315 * Force modified metadata out to stable storage. 316 */ 317 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 318 319 VN_RELE(vp); 320 321 ns->ns_status = puterrno(error); 322 } 323 void * 324 rfs_setattr_getfh(struct nfssaargs *args) 325 { 326 return (&args->saa_fh); 327 } 328 329 /* Change and release @exip and @vpp only in success */ 330 int 331 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip) 332 { 333 struct exportinfo *exi; 334 vnode_t *vp; 335 fid_t fid; 336 int error; 337 338 vp = *vpp; 339 340 /* traverse() releases argument in success */ 341 VN_HOLD(*vpp); 342 343 if ((error = traverse(&vp)) != 0) { 344 VN_RELE(*vpp); 345 return (error); 346 } 347 348 bzero(&fid, sizeof (fid)); 349 fid.fid_len = MAXFIDSZ; 350 error = VOP_FID(vp, &fid, NULL); 351 if (error) { 352 VN_RELE(vp); 353 return (error); 354 } 355 356 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid); 357 if (exi == NULL || 358 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) { 359 /* It is not error, just subdir is not exported 360 * or "nohide" is not set 361 */ 362 VN_RELE(vp); 363 } else { 364 /* go to submount */ 365 exi_rele(*exip); 366 *exip = exi; 367 368 VN_RELE(*vpp); 369 *vpp = vp; 370 } 371 return (0); 372 } 373 374 /* 375 * Directory lookup. 376 * Returns an fhandle and file attributes for file name in a directory. 377 */ 378 /* ARGSUSED */ 379 void 380 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 381 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 382 { 383 int error; 384 vnode_t *dvp; 385 vnode_t *vp; 386 struct vattr va; 387 fhandle_t *fhp = da->da_fhandle; 388 struct sec_ol sec = {0, 0}; 389 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 390 char *name; 391 struct sockaddr *ca; 392 393 /* 394 * Trusted Extension doesn't support NFSv2. MOUNT 395 * will reject v2 clients. Need to prevent v2 client 396 * access via WebNFS here. 397 */ 398 if (is_system_labeled() && req->rq_vers == 2) { 399 dr->dr_status = NFSERR_ACCES; 400 return; 401 } 402 403 /* 404 * Disallow NULL paths 405 */ 406 if (da->da_name == NULL || *da->da_name == '\0') { 407 dr->dr_status = NFSERR_ACCES; 408 return; 409 } 410 411 /* 412 * Allow lookups from the root - the default 413 * location of the public filehandle. 414 */ 415 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 416 dvp = rootdir; 417 VN_HOLD(dvp); 418 } else { 419 dvp = nfs_fhtovp(fhp, exi); 420 if (dvp == NULL) { 421 dr->dr_status = NFSERR_STALE; 422 return; 423 } 424 } 425 426 /* 427 * Not allow lookup beyond root. 428 * If the filehandle matches a filehandle of the exi, 429 * then the ".." refers beyond the root of an exported filesystem. 430 */ 431 if (strcmp(da->da_name, "..") == 0 && 432 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 433 VN_RELE(dvp); 434 dr->dr_status = NFSERR_NOENT; 435 return; 436 } 437 438 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 439 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 440 MAXPATHLEN); 441 442 if (name == NULL) { 443 dr->dr_status = NFSERR_ACCES; 444 return; 445 } 446 447 exi_hold(exi); 448 449 /* 450 * If the public filehandle is used then allow 451 * a multi-component lookup, i.e. evaluate 452 * a pathname and follow symbolic links if 453 * necessary. 454 * 455 * This may result in a vnode in another filesystem 456 * which is OK as long as the filesystem is exported. 457 */ 458 if (PUBLIC_FH2(fhp)) { 459 struct exportinfo *new; 460 461 publicfh_flag = TRUE; 462 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &new, 463 &sec); 464 465 if (error == 0) { 466 exi_rele(exi); 467 exi = new; 468 } 469 } else { 470 /* 471 * Do a normal single component lookup. 472 */ 473 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 474 NULL, NULL, NULL); 475 } 476 477 if (name != da->da_name) 478 kmem_free(name, MAXPATHLEN); 479 480 if (error == 0 && vn_ismntpt(vp)) { 481 error = rfs_cross_mnt(&vp, &exi); 482 if (error) 483 VN_RELE(vp); 484 } 485 486 if (!error) { 487 va.va_mask = AT_ALL; /* we want everything */ 488 489 error = rfs4_delegated_getattr(vp, &va, 0, cr); 490 491 /* check for overflows */ 492 if (!error) { 493 acl_perm(vp, exi, &va, cr); 494 error = vattr_to_nattr(&va, &dr->dr_attr); 495 if (!error) { 496 if (sec.sec_flags & SEC_QUERY) 497 error = makefh_ol(&dr->dr_fhandle, exi, 498 sec.sec_index); 499 else { 500 error = makefh(&dr->dr_fhandle, vp, 501 exi); 502 if (!error && publicfh_flag && 503 !chk_clnt_sec(exi, req)) 504 auth_weak = TRUE; 505 } 506 } 507 } 508 VN_RELE(vp); 509 } 510 511 VN_RELE(dvp); 512 513 /* The passed argument exportinfo is released by the 514 * caller, comon_dispatch 515 */ 516 exi_rele(exi); 517 518 /* 519 * If it's public fh, no 0x81, and client's flavor is 520 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 521 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 522 */ 523 if (auth_weak) 524 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 525 else 526 dr->dr_status = puterrno(error); 527 } 528 void * 529 rfs_lookup_getfh(struct nfsdiropargs *da) 530 { 531 return (da->da_fhandle); 532 } 533 534 /* 535 * Read symbolic link. 536 * Returns the string in the symbolic link at the given fhandle. 537 */ 538 /* ARGSUSED */ 539 void 540 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 541 struct svc_req *req, cred_t *cr) 542 { 543 int error; 544 struct iovec iov; 545 struct uio uio; 546 vnode_t *vp; 547 struct vattr va; 548 struct sockaddr *ca; 549 char *name = NULL; 550 int is_referral = 0; 551 552 vp = nfs_fhtovp(fhp, exi); 553 if (vp == NULL) { 554 rl->rl_data = NULL; 555 rl->rl_status = NFSERR_STALE; 556 return; 557 } 558 559 va.va_mask = AT_MODE; 560 561 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 562 563 if (error) { 564 VN_RELE(vp); 565 rl->rl_data = NULL; 566 rl->rl_status = puterrno(error); 567 return; 568 } 569 570 if (MANDLOCK(vp, va.va_mode)) { 571 VN_RELE(vp); 572 rl->rl_data = NULL; 573 rl->rl_status = NFSERR_ACCES; 574 return; 575 } 576 577 /* We lied about the object type for a referral */ 578 if (vn_is_nfs_reparse(vp, cr)) 579 is_referral = 1; 580 581 /* 582 * XNFS and RFC1094 require us to return ENXIO if argument 583 * is not a link. BUGID 1138002. 584 */ 585 if (vp->v_type != VLNK && !is_referral) { 586 VN_RELE(vp); 587 rl->rl_data = NULL; 588 rl->rl_status = NFSERR_NXIO; 589 return; 590 } 591 592 /* 593 * Allocate data for pathname. This will be freed by rfs_rlfree. 594 */ 595 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 596 597 if (is_referral) { 598 char *s; 599 size_t strsz; 600 601 /* Get an artificial symlink based on a referral */ 602 s = build_symlink(vp, cr, &strsz); 603 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; 604 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 605 vnode_t *, vp, char *, s); 606 if (s == NULL) 607 error = EINVAL; 608 else { 609 error = 0; 610 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 611 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 612 kmem_free(s, strsz); 613 } 614 615 } else { 616 617 /* 618 * Set up io vector to read sym link data 619 */ 620 iov.iov_base = rl->rl_data; 621 iov.iov_len = NFS_MAXPATHLEN; 622 uio.uio_iov = &iov; 623 uio.uio_iovcnt = 1; 624 uio.uio_segflg = UIO_SYSSPACE; 625 uio.uio_extflg = UIO_COPY_CACHED; 626 uio.uio_loffset = (offset_t)0; 627 uio.uio_resid = NFS_MAXPATHLEN; 628 629 /* 630 * Do the readlink. 631 */ 632 error = VOP_READLINK(vp, &uio, cr, NULL); 633 634 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 635 636 if (!error) 637 rl->rl_data[rl->rl_count] = '\0'; 638 639 } 640 641 642 VN_RELE(vp); 643 644 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 645 name = nfscmd_convname(ca, exi, rl->rl_data, 646 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 647 648 if (name != NULL && name != rl->rl_data) { 649 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 650 rl->rl_data = name; 651 } 652 653 /* 654 * XNFS and RFC1094 require us to return ENXIO if argument 655 * is not a link. UFS returns EINVAL if this is the case, 656 * so we do the mapping here. BUGID 1138002. 657 */ 658 if (error == EINVAL) 659 rl->rl_status = NFSERR_NXIO; 660 else 661 rl->rl_status = puterrno(error); 662 663 } 664 void * 665 rfs_readlink_getfh(fhandle_t *fhp) 666 { 667 return (fhp); 668 } 669 /* 670 * Free data allocated by rfs_readlink 671 */ 672 void 673 rfs_rlfree(struct nfsrdlnres *rl) 674 { 675 if (rl->rl_data != NULL) 676 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 677 } 678 679 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 680 681 /* 682 * Read data. 683 * Returns some data read from the file at the given fhandle. 684 */ 685 /* ARGSUSED */ 686 void 687 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 688 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 689 { 690 vnode_t *vp; 691 int error; 692 struct vattr va; 693 struct iovec iov; 694 struct uio uio; 695 mblk_t *mp; 696 int alloc_err = 0; 697 int in_crit = 0; 698 caller_context_t ct; 699 700 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 701 if (vp == NULL) { 702 rr->rr_data = NULL; 703 rr->rr_status = NFSERR_STALE; 704 return; 705 } 706 707 if (vp->v_type != VREG) { 708 VN_RELE(vp); 709 rr->rr_data = NULL; 710 rr->rr_status = NFSERR_ISDIR; 711 return; 712 } 713 714 ct.cc_sysid = 0; 715 ct.cc_pid = 0; 716 ct.cc_caller_id = nfs2_srv_caller_id; 717 ct.cc_flags = CC_DONTBLOCK; 718 719 /* 720 * Enter the critical region before calling VOP_RWLOCK 721 * to avoid a deadlock with write requests. 722 */ 723 if (nbl_need_check(vp)) { 724 nbl_start_crit(vp, RW_READER); 725 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 726 0, NULL)) { 727 nbl_end_crit(vp); 728 VN_RELE(vp); 729 rr->rr_data = NULL; 730 rr->rr_status = NFSERR_ACCES; 731 return; 732 } 733 in_crit = 1; 734 } 735 736 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 737 738 /* check if a monitor detected a delegation conflict */ 739 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 740 VN_RELE(vp); 741 /* mark as wouldblock so response is dropped */ 742 curthread->t_flag |= T_WOULDBLOCK; 743 744 rr->rr_data = NULL; 745 return; 746 } 747 748 va.va_mask = AT_ALL; 749 750 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 751 752 if (error) { 753 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 754 if (in_crit) 755 nbl_end_crit(vp); 756 757 VN_RELE(vp); 758 rr->rr_data = NULL; 759 rr->rr_status = puterrno(error); 760 761 return; 762 } 763 764 /* 765 * This is a kludge to allow reading of files created 766 * with no read permission. The owner of the file 767 * is always allowed to read it. 768 */ 769 if (crgetuid(cr) != va.va_uid) { 770 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 771 772 if (error) { 773 /* 774 * Exec is the same as read over the net because 775 * of demand loading. 776 */ 777 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 778 } 779 if (error) { 780 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 781 if (in_crit) 782 nbl_end_crit(vp); 783 VN_RELE(vp); 784 rr->rr_data = NULL; 785 rr->rr_status = puterrno(error); 786 787 return; 788 } 789 } 790 791 if (MANDLOCK(vp, va.va_mode)) { 792 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 793 if (in_crit) 794 nbl_end_crit(vp); 795 796 VN_RELE(vp); 797 rr->rr_data = NULL; 798 rr->rr_status = NFSERR_ACCES; 799 800 return; 801 } 802 803 rr->rr_ok.rrok_wlist_len = 0; 804 rr->rr_ok.rrok_wlist = NULL; 805 806 if ((u_offset_t)ra->ra_offset >= va.va_size) { 807 rr->rr_count = 0; 808 rr->rr_data = NULL; 809 /* 810 * In this case, status is NFS_OK, but there is no data 811 * to encode. So set rr_mp to NULL. 812 */ 813 rr->rr_mp = NULL; 814 rr->rr_ok.rrok_wlist = ra->ra_wlist; 815 if (rr->rr_ok.rrok_wlist) 816 clist_zero_len(rr->rr_ok.rrok_wlist); 817 goto done; 818 } 819 820 if (ra->ra_wlist) { 821 mp = NULL; 822 rr->rr_mp = NULL; 823 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 824 if (ra->ra_count > iov.iov_len) { 825 rr->rr_data = NULL; 826 rr->rr_status = NFSERR_INVAL; 827 goto done; 828 } 829 } else { 830 /* 831 * mp will contain the data to be sent out in the read reply. 832 * This will be freed after the reply has been sent out (by the 833 * driver). 834 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 835 * that the call to xdrmblk_putmblk() never fails. 836 */ 837 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 838 &alloc_err); 839 ASSERT(mp != NULL); 840 ASSERT(alloc_err == 0); 841 842 rr->rr_mp = mp; 843 844 /* 845 * Set up io vector 846 */ 847 iov.iov_base = (caddr_t)mp->b_datap->db_base; 848 iov.iov_len = ra->ra_count; 849 } 850 851 uio.uio_iov = &iov; 852 uio.uio_iovcnt = 1; 853 uio.uio_segflg = UIO_SYSSPACE; 854 uio.uio_extflg = UIO_COPY_CACHED; 855 uio.uio_loffset = (offset_t)ra->ra_offset; 856 uio.uio_resid = ra->ra_count; 857 858 error = VOP_READ(vp, &uio, 0, cr, &ct); 859 860 if (error) { 861 if (mp) 862 freeb(mp); 863 864 /* 865 * check if a monitor detected a delegation conflict and 866 * mark as wouldblock so response is dropped 867 */ 868 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 869 curthread->t_flag |= T_WOULDBLOCK; 870 else 871 rr->rr_status = puterrno(error); 872 873 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 874 if (in_crit) 875 nbl_end_crit(vp); 876 877 VN_RELE(vp); 878 rr->rr_data = NULL; 879 880 return; 881 } 882 883 /* 884 * Get attributes again so we can send the latest access 885 * time to the client side for his cache. 886 */ 887 va.va_mask = AT_ALL; 888 889 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 890 891 if (error) { 892 if (mp) 893 freeb(mp); 894 895 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 896 if (in_crit) 897 nbl_end_crit(vp); 898 899 VN_RELE(vp); 900 rr->rr_data = NULL; 901 rr->rr_status = puterrno(error); 902 903 return; 904 } 905 906 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 907 908 if (mp) { 909 rr->rr_data = (char *)mp->b_datap->db_base; 910 } else { 911 if (ra->ra_wlist) { 912 rr->rr_data = (caddr_t)iov.iov_base; 913 if (!rdma_setup_read_data2(ra, rr)) { 914 rr->rr_data = NULL; 915 rr->rr_status = puterrno(NFSERR_INVAL); 916 } 917 } 918 } 919 done: 920 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 921 if (in_crit) 922 nbl_end_crit(vp); 923 924 acl_perm(vp, exi, &va, cr); 925 926 /* check for overflows */ 927 error = vattr_to_nattr(&va, &rr->rr_attr); 928 929 VN_RELE(vp); 930 931 rr->rr_status = puterrno(error); 932 } 933 934 /* 935 * Free data allocated by rfs_read 936 */ 937 void 938 rfs_rdfree(struct nfsrdresult *rr) 939 { 940 mblk_t *mp; 941 942 if (rr->rr_status == NFS_OK) { 943 mp = rr->rr_mp; 944 if (mp != NULL) 945 freeb(mp); 946 } 947 } 948 949 void * 950 rfs_read_getfh(struct nfsreadargs *ra) 951 { 952 return (&ra->ra_fhandle); 953 } 954 955 #define MAX_IOVECS 12 956 957 #ifdef DEBUG 958 static int rfs_write_sync_hits = 0; 959 static int rfs_write_sync_misses = 0; 960 #endif 961 962 /* 963 * Write data to file. 964 * Returns attributes of a file after writing some data to it. 965 * 966 * Any changes made here, especially in error handling might have 967 * to also be done in rfs_write (which clusters write requests). 968 */ 969 void 970 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 971 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 972 { 973 int error; 974 vnode_t *vp; 975 rlim64_t rlimit; 976 struct vattr va; 977 struct uio uio; 978 struct iovec iov[MAX_IOVECS]; 979 mblk_t *m; 980 struct iovec *iovp; 981 int iovcnt; 982 cred_t *savecred; 983 int in_crit = 0; 984 caller_context_t ct; 985 986 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 987 if (vp == NULL) { 988 ns->ns_status = NFSERR_STALE; 989 return; 990 } 991 992 if (rdonly(exi, req)) { 993 VN_RELE(vp); 994 ns->ns_status = NFSERR_ROFS; 995 return; 996 } 997 998 if (vp->v_type != VREG) { 999 VN_RELE(vp); 1000 ns->ns_status = NFSERR_ISDIR; 1001 return; 1002 } 1003 1004 ct.cc_sysid = 0; 1005 ct.cc_pid = 0; 1006 ct.cc_caller_id = nfs2_srv_caller_id; 1007 ct.cc_flags = CC_DONTBLOCK; 1008 1009 va.va_mask = AT_UID|AT_MODE; 1010 1011 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1012 1013 if (error) { 1014 VN_RELE(vp); 1015 ns->ns_status = puterrno(error); 1016 1017 return; 1018 } 1019 1020 if (crgetuid(cr) != va.va_uid) { 1021 /* 1022 * This is a kludge to allow writes of files created 1023 * with read only permission. The owner of the file 1024 * is always allowed to write it. 1025 */ 1026 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 1027 1028 if (error) { 1029 VN_RELE(vp); 1030 ns->ns_status = puterrno(error); 1031 return; 1032 } 1033 } 1034 1035 /* 1036 * Can't access a mandatory lock file. This might cause 1037 * the NFS service thread to block forever waiting for a 1038 * lock to be released that will never be released. 1039 */ 1040 if (MANDLOCK(vp, va.va_mode)) { 1041 VN_RELE(vp); 1042 ns->ns_status = NFSERR_ACCES; 1043 return; 1044 } 1045 1046 /* 1047 * We have to enter the critical region before calling VOP_RWLOCK 1048 * to avoid a deadlock with ufs. 1049 */ 1050 if (nbl_need_check(vp)) { 1051 nbl_start_crit(vp, RW_READER); 1052 in_crit = 1; 1053 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 1054 wa->wa_count, 0, NULL)) { 1055 error = EACCES; 1056 goto out; 1057 } 1058 } 1059 1060 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1061 1062 /* check if a monitor detected a delegation conflict */ 1063 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1064 VN_RELE(vp); 1065 /* mark as wouldblock so response is dropped */ 1066 curthread->t_flag |= T_WOULDBLOCK; 1067 return; 1068 } 1069 1070 if (wa->wa_data || wa->wa_rlist) { 1071 /* Do the RDMA thing if necessary */ 1072 if (wa->wa_rlist) { 1073 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1074 iov[0].iov_len = wa->wa_count; 1075 } else { 1076 iov[0].iov_base = wa->wa_data; 1077 iov[0].iov_len = wa->wa_count; 1078 } 1079 uio.uio_iov = iov; 1080 uio.uio_iovcnt = 1; 1081 uio.uio_segflg = UIO_SYSSPACE; 1082 uio.uio_extflg = UIO_COPY_DEFAULT; 1083 uio.uio_loffset = (offset_t)wa->wa_offset; 1084 uio.uio_resid = wa->wa_count; 1085 /* 1086 * The limit is checked on the client. We 1087 * should allow any size writes here. 1088 */ 1089 uio.uio_llimit = curproc->p_fsz_ctl; 1090 rlimit = uio.uio_llimit - wa->wa_offset; 1091 if (rlimit < (rlim64_t)uio.uio_resid) 1092 uio.uio_resid = (uint_t)rlimit; 1093 1094 /* 1095 * for now we assume no append mode 1096 */ 1097 /* 1098 * We're changing creds because VM may fault and we need 1099 * the cred of the current thread to be used if quota 1100 * checking is enabled. 1101 */ 1102 savecred = curthread->t_cred; 1103 curthread->t_cred = cr; 1104 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1105 curthread->t_cred = savecred; 1106 } else { 1107 iovcnt = 0; 1108 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1109 iovcnt++; 1110 if (iovcnt <= MAX_IOVECS) { 1111 #ifdef DEBUG 1112 rfs_write_sync_hits++; 1113 #endif 1114 iovp = iov; 1115 } else { 1116 #ifdef DEBUG 1117 rfs_write_sync_misses++; 1118 #endif 1119 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1120 } 1121 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1122 uio.uio_iov = iovp; 1123 uio.uio_iovcnt = iovcnt; 1124 uio.uio_segflg = UIO_SYSSPACE; 1125 uio.uio_extflg = UIO_COPY_DEFAULT; 1126 uio.uio_loffset = (offset_t)wa->wa_offset; 1127 uio.uio_resid = wa->wa_count; 1128 /* 1129 * The limit is checked on the client. We 1130 * should allow any size writes here. 1131 */ 1132 uio.uio_llimit = curproc->p_fsz_ctl; 1133 rlimit = uio.uio_llimit - wa->wa_offset; 1134 if (rlimit < (rlim64_t)uio.uio_resid) 1135 uio.uio_resid = (uint_t)rlimit; 1136 1137 /* 1138 * For now we assume no append mode. 1139 */ 1140 /* 1141 * We're changing creds because VM may fault and we need 1142 * the cred of the current thread to be used if quota 1143 * checking is enabled. 1144 */ 1145 savecred = curthread->t_cred; 1146 curthread->t_cred = cr; 1147 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1148 curthread->t_cred = savecred; 1149 1150 if (iovp != iov) 1151 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1152 } 1153 1154 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1155 1156 if (!error) { 1157 /* 1158 * Get attributes again so we send the latest mod 1159 * time to the client side for his cache. 1160 */ 1161 va.va_mask = AT_ALL; /* now we want everything */ 1162 1163 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1164 1165 /* check for overflows */ 1166 if (!error) { 1167 acl_perm(vp, exi, &va, cr); 1168 error = vattr_to_nattr(&va, &ns->ns_attr); 1169 } 1170 } 1171 1172 out: 1173 if (in_crit) 1174 nbl_end_crit(vp); 1175 VN_RELE(vp); 1176 1177 /* check if a monitor detected a delegation conflict */ 1178 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1179 /* mark as wouldblock so response is dropped */ 1180 curthread->t_flag |= T_WOULDBLOCK; 1181 else 1182 ns->ns_status = puterrno(error); 1183 1184 } 1185 1186 struct rfs_async_write { 1187 struct nfswriteargs *wa; 1188 struct nfsattrstat *ns; 1189 struct svc_req *req; 1190 cred_t *cr; 1191 kthread_t *thread; 1192 struct rfs_async_write *list; 1193 }; 1194 1195 struct rfs_async_write_list { 1196 fhandle_t *fhp; 1197 kcondvar_t cv; 1198 struct rfs_async_write *list; 1199 struct rfs_async_write_list *next; 1200 }; 1201 1202 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1203 static kmutex_t rfs_async_write_lock; 1204 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1205 1206 #define MAXCLIOVECS 42 1207 #define RFSWRITE_INITVAL (enum nfsstat) -1 1208 1209 #ifdef DEBUG 1210 static int rfs_write_hits = 0; 1211 static int rfs_write_misses = 0; 1212 #endif 1213 1214 /* 1215 * Write data to file. 1216 * Returns attributes of a file after writing some data to it. 1217 */ 1218 void 1219 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1220 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1221 { 1222 int error; 1223 vnode_t *vp; 1224 rlim64_t rlimit; 1225 struct vattr va; 1226 struct uio uio; 1227 struct rfs_async_write_list *lp; 1228 struct rfs_async_write_list *nlp; 1229 struct rfs_async_write *rp; 1230 struct rfs_async_write *nrp; 1231 struct rfs_async_write *trp; 1232 struct rfs_async_write *lrp; 1233 int data_written; 1234 int iovcnt; 1235 mblk_t *m; 1236 struct iovec *iovp; 1237 struct iovec *niovp; 1238 struct iovec iov[MAXCLIOVECS]; 1239 int count; 1240 int rcount; 1241 uint_t off; 1242 uint_t len; 1243 struct rfs_async_write nrpsp; 1244 struct rfs_async_write_list nlpsp; 1245 ushort_t t_flag; 1246 cred_t *savecred; 1247 int in_crit = 0; 1248 caller_context_t ct; 1249 1250 if (!rfs_write_async) { 1251 rfs_write_sync(wa, ns, exi, req, cr); 1252 return; 1253 } 1254 1255 /* 1256 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1257 * is considered an OK. 1258 */ 1259 ns->ns_status = RFSWRITE_INITVAL; 1260 1261 nrp = &nrpsp; 1262 nrp->wa = wa; 1263 nrp->ns = ns; 1264 nrp->req = req; 1265 nrp->cr = cr; 1266 nrp->thread = curthread; 1267 1268 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 1269 1270 /* 1271 * Look to see if there is already a cluster started 1272 * for this file. 1273 */ 1274 mutex_enter(&rfs_async_write_lock); 1275 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { 1276 if (bcmp(&wa->wa_fhandle, lp->fhp, 1277 sizeof (fhandle_t)) == 0) 1278 break; 1279 } 1280 1281 /* 1282 * If lp is non-NULL, then there is already a cluster 1283 * started. We need to place ourselves in the cluster 1284 * list in the right place as determined by starting 1285 * offset. Conflicts with non-blocking mandatory locked 1286 * regions will be checked when the cluster is processed. 1287 */ 1288 if (lp != NULL) { 1289 rp = lp->list; 1290 trp = NULL; 1291 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1292 trp = rp; 1293 rp = rp->list; 1294 } 1295 nrp->list = rp; 1296 if (trp == NULL) 1297 lp->list = nrp; 1298 else 1299 trp->list = nrp; 1300 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1301 cv_wait(&lp->cv, &rfs_async_write_lock); 1302 mutex_exit(&rfs_async_write_lock); 1303 1304 return; 1305 } 1306 1307 /* 1308 * No cluster started yet, start one and add ourselves 1309 * to the list of clusters. 1310 */ 1311 nrp->list = NULL; 1312 1313 nlp = &nlpsp; 1314 nlp->fhp = &wa->wa_fhandle; 1315 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1316 nlp->list = nrp; 1317 nlp->next = NULL; 1318 1319 if (rfs_async_write_head == NULL) { 1320 rfs_async_write_head = nlp; 1321 } else { 1322 lp = rfs_async_write_head; 1323 while (lp->next != NULL) 1324 lp = lp->next; 1325 lp->next = nlp; 1326 } 1327 mutex_exit(&rfs_async_write_lock); 1328 1329 /* 1330 * Convert the file handle common to all of the requests 1331 * in this cluster to a vnode. 1332 */ 1333 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1334 if (vp == NULL) { 1335 mutex_enter(&rfs_async_write_lock); 1336 if (rfs_async_write_head == nlp) 1337 rfs_async_write_head = nlp->next; 1338 else { 1339 lp = rfs_async_write_head; 1340 while (lp->next != nlp) 1341 lp = lp->next; 1342 lp->next = nlp->next; 1343 } 1344 t_flag = curthread->t_flag & T_WOULDBLOCK; 1345 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1346 rp->ns->ns_status = NFSERR_STALE; 1347 rp->thread->t_flag |= t_flag; 1348 } 1349 cv_broadcast(&nlp->cv); 1350 mutex_exit(&rfs_async_write_lock); 1351 1352 return; 1353 } 1354 1355 /* 1356 * Can only write regular files. Attempts to write any 1357 * other file types fail with EISDIR. 1358 */ 1359 if (vp->v_type != VREG) { 1360 VN_RELE(vp); 1361 mutex_enter(&rfs_async_write_lock); 1362 if (rfs_async_write_head == nlp) 1363 rfs_async_write_head = nlp->next; 1364 else { 1365 lp = rfs_async_write_head; 1366 while (lp->next != nlp) 1367 lp = lp->next; 1368 lp->next = nlp->next; 1369 } 1370 t_flag = curthread->t_flag & T_WOULDBLOCK; 1371 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1372 rp->ns->ns_status = NFSERR_ISDIR; 1373 rp->thread->t_flag |= t_flag; 1374 } 1375 cv_broadcast(&nlp->cv); 1376 mutex_exit(&rfs_async_write_lock); 1377 1378 return; 1379 } 1380 1381 /* 1382 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1383 * deadlock with ufs. 1384 */ 1385 if (nbl_need_check(vp)) { 1386 nbl_start_crit(vp, RW_READER); 1387 in_crit = 1; 1388 } 1389 1390 ct.cc_sysid = 0; 1391 ct.cc_pid = 0; 1392 ct.cc_caller_id = nfs2_srv_caller_id; 1393 ct.cc_flags = CC_DONTBLOCK; 1394 1395 /* 1396 * Lock the file for writing. This operation provides 1397 * the delay which allows clusters to grow. 1398 */ 1399 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1400 1401 /* check if a monitor detected a delegation conflict */ 1402 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1403 if (in_crit) 1404 nbl_end_crit(vp); 1405 VN_RELE(vp); 1406 /* mark as wouldblock so response is dropped */ 1407 curthread->t_flag |= T_WOULDBLOCK; 1408 mutex_enter(&rfs_async_write_lock); 1409 if (rfs_async_write_head == nlp) 1410 rfs_async_write_head = nlp->next; 1411 else { 1412 lp = rfs_async_write_head; 1413 while (lp->next != nlp) 1414 lp = lp->next; 1415 lp->next = nlp->next; 1416 } 1417 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1418 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1419 rp->ns->ns_status = puterrno(error); 1420 rp->thread->t_flag |= T_WOULDBLOCK; 1421 } 1422 } 1423 cv_broadcast(&nlp->cv); 1424 mutex_exit(&rfs_async_write_lock); 1425 1426 return; 1427 } 1428 1429 /* 1430 * Disconnect this cluster from the list of clusters. 1431 * The cluster that is being dealt with must be fixed 1432 * in size after this point, so there is no reason 1433 * to leave it on the list so that new requests can 1434 * find it. 1435 * 1436 * The algorithm is that the first write request will 1437 * create a cluster, convert the file handle to a 1438 * vnode pointer, and then lock the file for writing. 1439 * This request is not likely to be clustered with 1440 * any others. However, the next request will create 1441 * a new cluster and be blocked in VOP_RWLOCK while 1442 * the first request is being processed. This delay 1443 * will allow more requests to be clustered in this 1444 * second cluster. 1445 */ 1446 mutex_enter(&rfs_async_write_lock); 1447 if (rfs_async_write_head == nlp) 1448 rfs_async_write_head = nlp->next; 1449 else { 1450 lp = rfs_async_write_head; 1451 while (lp->next != nlp) 1452 lp = lp->next; 1453 lp->next = nlp->next; 1454 } 1455 mutex_exit(&rfs_async_write_lock); 1456 1457 /* 1458 * Step through the list of requests in this cluster. 1459 * We need to check permissions to make sure that all 1460 * of the requests have sufficient permission to write 1461 * the file. A cluster can be composed of requests 1462 * from different clients and different users on each 1463 * client. 1464 * 1465 * As a side effect, we also calculate the size of the 1466 * byte range that this cluster encompasses. 1467 */ 1468 rp = nlp->list; 1469 off = rp->wa->wa_offset; 1470 len = (uint_t)0; 1471 do { 1472 if (rdonly(exi, rp->req)) { 1473 rp->ns->ns_status = NFSERR_ROFS; 1474 t_flag = curthread->t_flag & T_WOULDBLOCK; 1475 rp->thread->t_flag |= t_flag; 1476 continue; 1477 } 1478 1479 va.va_mask = AT_UID|AT_MODE; 1480 1481 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1482 1483 if (!error) { 1484 if (crgetuid(rp->cr) != va.va_uid) { 1485 /* 1486 * This is a kludge to allow writes of files 1487 * created with read only permission. The 1488 * owner of the file is always allowed to 1489 * write it. 1490 */ 1491 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1492 } 1493 if (!error && MANDLOCK(vp, va.va_mode)) 1494 error = EACCES; 1495 } 1496 1497 /* 1498 * Check for a conflict with a nbmand-locked region. 1499 */ 1500 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1501 rp->wa->wa_count, 0, NULL)) { 1502 error = EACCES; 1503 } 1504 1505 if (error) { 1506 rp->ns->ns_status = puterrno(error); 1507 t_flag = curthread->t_flag & T_WOULDBLOCK; 1508 rp->thread->t_flag |= t_flag; 1509 continue; 1510 } 1511 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1512 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1513 } while ((rp = rp->list) != NULL); 1514 1515 /* 1516 * Step through the cluster attempting to gather as many 1517 * requests which are contiguous as possible. These 1518 * contiguous requests are handled via one call to VOP_WRITE 1519 * instead of different calls to VOP_WRITE. We also keep 1520 * track of the fact that any data was written. 1521 */ 1522 rp = nlp->list; 1523 data_written = 0; 1524 do { 1525 /* 1526 * Skip any requests which are already marked as having an 1527 * error. 1528 */ 1529 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1530 rp = rp->list; 1531 continue; 1532 } 1533 1534 /* 1535 * Count the number of iovec's which are required 1536 * to handle this set of requests. One iovec is 1537 * needed for each data buffer, whether addressed 1538 * by wa_data or by the b_rptr pointers in the 1539 * mblk chains. 1540 */ 1541 iovcnt = 0; 1542 lrp = rp; 1543 for (;;) { 1544 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1545 iovcnt++; 1546 else { 1547 m = lrp->wa->wa_mblk; 1548 while (m != NULL) { 1549 iovcnt++; 1550 m = m->b_cont; 1551 } 1552 } 1553 if (lrp->list == NULL || 1554 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1555 lrp->wa->wa_offset + lrp->wa->wa_count != 1556 lrp->list->wa->wa_offset) { 1557 lrp = lrp->list; 1558 break; 1559 } 1560 lrp = lrp->list; 1561 } 1562 1563 if (iovcnt <= MAXCLIOVECS) { 1564 #ifdef DEBUG 1565 rfs_write_hits++; 1566 #endif 1567 niovp = iov; 1568 } else { 1569 #ifdef DEBUG 1570 rfs_write_misses++; 1571 #endif 1572 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1573 } 1574 /* 1575 * Put together the scatter/gather iovecs. 1576 */ 1577 iovp = niovp; 1578 trp = rp; 1579 count = 0; 1580 do { 1581 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1582 if (trp->wa->wa_rlist) { 1583 iovp->iov_base = 1584 (char *)((trp->wa->wa_rlist)-> 1585 u.c_daddr3); 1586 iovp->iov_len = trp->wa->wa_count; 1587 } else { 1588 iovp->iov_base = trp->wa->wa_data; 1589 iovp->iov_len = trp->wa->wa_count; 1590 } 1591 iovp++; 1592 } else { 1593 m = trp->wa->wa_mblk; 1594 rcount = trp->wa->wa_count; 1595 while (m != NULL) { 1596 iovp->iov_base = (caddr_t)m->b_rptr; 1597 iovp->iov_len = (m->b_wptr - m->b_rptr); 1598 rcount -= iovp->iov_len; 1599 if (rcount < 0) 1600 iovp->iov_len += rcount; 1601 iovp++; 1602 if (rcount <= 0) 1603 break; 1604 m = m->b_cont; 1605 } 1606 } 1607 count += trp->wa->wa_count; 1608 trp = trp->list; 1609 } while (trp != lrp); 1610 1611 uio.uio_iov = niovp; 1612 uio.uio_iovcnt = iovcnt; 1613 uio.uio_segflg = UIO_SYSSPACE; 1614 uio.uio_extflg = UIO_COPY_DEFAULT; 1615 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1616 uio.uio_resid = count; 1617 /* 1618 * The limit is checked on the client. We 1619 * should allow any size writes here. 1620 */ 1621 uio.uio_llimit = curproc->p_fsz_ctl; 1622 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1623 if (rlimit < (rlim64_t)uio.uio_resid) 1624 uio.uio_resid = (uint_t)rlimit; 1625 1626 /* 1627 * For now we assume no append mode. 1628 */ 1629 1630 /* 1631 * We're changing creds because VM may fault 1632 * and we need the cred of the current 1633 * thread to be used if quota * checking is 1634 * enabled. 1635 */ 1636 savecred = curthread->t_cred; 1637 curthread->t_cred = cr; 1638 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1639 curthread->t_cred = savecred; 1640 1641 /* check if a monitor detected a delegation conflict */ 1642 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1643 /* mark as wouldblock so response is dropped */ 1644 curthread->t_flag |= T_WOULDBLOCK; 1645 1646 if (niovp != iov) 1647 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1648 1649 if (!error) { 1650 data_written = 1; 1651 /* 1652 * Get attributes again so we send the latest mod 1653 * time to the client side for his cache. 1654 */ 1655 va.va_mask = AT_ALL; /* now we want everything */ 1656 1657 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1658 1659 if (!error) 1660 acl_perm(vp, exi, &va, rp->cr); 1661 } 1662 1663 /* 1664 * Fill in the status responses for each request 1665 * which was just handled. Also, copy the latest 1666 * attributes in to the attribute responses if 1667 * appropriate. 1668 */ 1669 t_flag = curthread->t_flag & T_WOULDBLOCK; 1670 do { 1671 rp->thread->t_flag |= t_flag; 1672 /* check for overflows */ 1673 if (!error) { 1674 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1675 } 1676 rp->ns->ns_status = puterrno(error); 1677 rp = rp->list; 1678 } while (rp != lrp); 1679 } while (rp != NULL); 1680 1681 /* 1682 * If any data was written at all, then we need to flush 1683 * the data and metadata to stable storage. 1684 */ 1685 if (data_written) { 1686 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1687 1688 if (!error) { 1689 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1690 } 1691 } 1692 1693 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1694 1695 if (in_crit) 1696 nbl_end_crit(vp); 1697 VN_RELE(vp); 1698 1699 t_flag = curthread->t_flag & T_WOULDBLOCK; 1700 mutex_enter(&rfs_async_write_lock); 1701 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1702 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1703 rp->ns->ns_status = puterrno(error); 1704 rp->thread->t_flag |= t_flag; 1705 } 1706 } 1707 cv_broadcast(&nlp->cv); 1708 mutex_exit(&rfs_async_write_lock); 1709 1710 } 1711 1712 void * 1713 rfs_write_getfh(struct nfswriteargs *wa) 1714 { 1715 return (&wa->wa_fhandle); 1716 } 1717 1718 /* 1719 * Create a file. 1720 * Creates a file with given attributes and returns those attributes 1721 * and an fhandle for the new file. 1722 */ 1723 void 1724 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1725 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1726 { 1727 int error; 1728 int lookuperr; 1729 int in_crit = 0; 1730 struct vattr va; 1731 vnode_t *vp; 1732 vnode_t *realvp; 1733 vnode_t *dvp; 1734 char *name = args->ca_da.da_name; 1735 vnode_t *tvp = NULL; 1736 int mode; 1737 int lookup_ok; 1738 bool_t trunc; 1739 struct sockaddr *ca; 1740 1741 /* 1742 * Disallow NULL paths 1743 */ 1744 if (name == NULL || *name == '\0') { 1745 dr->dr_status = NFSERR_ACCES; 1746 return; 1747 } 1748 1749 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1750 if (dvp == NULL) { 1751 dr->dr_status = NFSERR_STALE; 1752 return; 1753 } 1754 1755 error = sattr_to_vattr(args->ca_sa, &va); 1756 if (error) { 1757 dr->dr_status = puterrno(error); 1758 return; 1759 } 1760 1761 /* 1762 * Must specify the mode. 1763 */ 1764 if (!(va.va_mask & AT_MODE)) { 1765 VN_RELE(dvp); 1766 dr->dr_status = NFSERR_INVAL; 1767 return; 1768 } 1769 1770 /* 1771 * This is a completely gross hack to make mknod 1772 * work over the wire until we can wack the protocol 1773 */ 1774 if ((va.va_mode & IFMT) == IFCHR) { 1775 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1776 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1777 else { 1778 va.va_type = VCHR; 1779 /* 1780 * uncompress the received dev_t 1781 * if the top half is zero indicating a request 1782 * from an `older style' OS. 1783 */ 1784 if ((va.va_size & 0xffff0000) == 0) 1785 va.va_rdev = nfsv2_expdev(va.va_size); 1786 else 1787 va.va_rdev = (dev_t)va.va_size; 1788 } 1789 va.va_mask &= ~AT_SIZE; 1790 } else if ((va.va_mode & IFMT) == IFBLK) { 1791 va.va_type = VBLK; 1792 /* 1793 * uncompress the received dev_t 1794 * if the top half is zero indicating a request 1795 * from an `older style' OS. 1796 */ 1797 if ((va.va_size & 0xffff0000) == 0) 1798 va.va_rdev = nfsv2_expdev(va.va_size); 1799 else 1800 va.va_rdev = (dev_t)va.va_size; 1801 va.va_mask &= ~AT_SIZE; 1802 } else if ((va.va_mode & IFMT) == IFSOCK) { 1803 va.va_type = VSOCK; 1804 } else { 1805 va.va_type = VREG; 1806 } 1807 va.va_mode &= ~IFMT; 1808 va.va_mask |= AT_TYPE; 1809 1810 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1811 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1812 MAXPATHLEN); 1813 if (name == NULL) { 1814 dr->dr_status = puterrno(EINVAL); 1815 return; 1816 } 1817 1818 /* 1819 * Why was the choice made to use VWRITE as the mode to the 1820 * call to VOP_CREATE ? This results in a bug. When a client 1821 * opens a file that already exists and is RDONLY, the second 1822 * open fails with an EACESS because of the mode. 1823 * bug ID 1054648. 1824 */ 1825 lookup_ok = 0; 1826 mode = VWRITE; 1827 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1828 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1829 NULL, NULL, NULL); 1830 if (!error) { 1831 struct vattr at; 1832 1833 lookup_ok = 1; 1834 at.va_mask = AT_MODE; 1835 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1836 if (!error) 1837 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1838 VN_RELE(tvp); 1839 tvp = NULL; 1840 } 1841 } 1842 1843 if (!lookup_ok) { 1844 if (rdonly(exi, req)) { 1845 error = EROFS; 1846 } else if (va.va_type != VREG && va.va_type != VFIFO && 1847 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1848 error = EPERM; 1849 } else { 1850 error = 0; 1851 } 1852 } 1853 1854 /* 1855 * If file size is being modified on an already existing file 1856 * make sure that there are no conflicting non-blocking mandatory 1857 * locks in the region being manipulated. Return EACCES if there 1858 * are conflicting locks. 1859 */ 1860 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1861 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1862 NULL, NULL, NULL); 1863 1864 if (!lookuperr && 1865 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1866 VN_RELE(tvp); 1867 curthread->t_flag |= T_WOULDBLOCK; 1868 goto out; 1869 } 1870 1871 if (!lookuperr && nbl_need_check(tvp)) { 1872 /* 1873 * The file exists. Now check if it has any 1874 * conflicting non-blocking mandatory locks 1875 * in the region being changed. 1876 */ 1877 struct vattr bva; 1878 u_offset_t offset; 1879 ssize_t length; 1880 1881 nbl_start_crit(tvp, RW_READER); 1882 in_crit = 1; 1883 1884 bva.va_mask = AT_SIZE; 1885 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1886 if (!error) { 1887 if (va.va_size < bva.va_size) { 1888 offset = va.va_size; 1889 length = bva.va_size - va.va_size; 1890 } else { 1891 offset = bva.va_size; 1892 length = va.va_size - bva.va_size; 1893 } 1894 if (length) { 1895 if (nbl_conflict(tvp, NBL_WRITE, 1896 offset, length, 0, NULL)) { 1897 error = EACCES; 1898 } 1899 } 1900 } 1901 if (error) { 1902 nbl_end_crit(tvp); 1903 VN_RELE(tvp); 1904 in_crit = 0; 1905 } 1906 } else if (tvp != NULL) { 1907 VN_RELE(tvp); 1908 } 1909 } 1910 1911 if (!error) { 1912 /* 1913 * If filesystem is shared with nosuid the remove any 1914 * setuid/setgid bits on create. 1915 */ 1916 if (va.va_type == VREG && 1917 exi->exi_export.ex_flags & EX_NOSUID) 1918 va.va_mode &= ~(VSUID | VSGID); 1919 1920 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1921 NULL, NULL); 1922 1923 if (!error) { 1924 1925 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 1926 trunc = TRUE; 1927 else 1928 trunc = FALSE; 1929 1930 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1931 VN_RELE(vp); 1932 curthread->t_flag |= T_WOULDBLOCK; 1933 goto out; 1934 } 1935 va.va_mask = AT_ALL; 1936 1937 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1938 1939 /* check for overflows */ 1940 if (!error) { 1941 acl_perm(vp, exi, &va, cr); 1942 error = vattr_to_nattr(&va, &dr->dr_attr); 1943 if (!error) { 1944 error = makefh(&dr->dr_fhandle, vp, 1945 exi); 1946 } 1947 } 1948 /* 1949 * Force modified metadata out to stable storage. 1950 * 1951 * if a underlying vp exists, pass it to VOP_FSYNC 1952 */ 1953 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1954 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 1955 else 1956 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1957 VN_RELE(vp); 1958 } 1959 1960 if (in_crit) { 1961 nbl_end_crit(tvp); 1962 VN_RELE(tvp); 1963 } 1964 } 1965 1966 /* 1967 * Force modified data and metadata out to stable storage. 1968 */ 1969 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1970 1971 out: 1972 1973 VN_RELE(dvp); 1974 1975 dr->dr_status = puterrno(error); 1976 1977 if (name != args->ca_da.da_name) 1978 kmem_free(name, MAXPATHLEN); 1979 } 1980 void * 1981 rfs_create_getfh(struct nfscreatargs *args) 1982 { 1983 return (args->ca_da.da_fhandle); 1984 } 1985 1986 /* 1987 * Remove a file. 1988 * Remove named file from parent directory. 1989 */ 1990 void 1991 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 1992 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1993 { 1994 int error = 0; 1995 vnode_t *vp; 1996 vnode_t *targvp; 1997 int in_crit = 0; 1998 1999 /* 2000 * Disallow NULL paths 2001 */ 2002 if (da->da_name == NULL || *da->da_name == '\0') { 2003 *status = NFSERR_ACCES; 2004 return; 2005 } 2006 2007 vp = nfs_fhtovp(da->da_fhandle, exi); 2008 if (vp == NULL) { 2009 *status = NFSERR_STALE; 2010 return; 2011 } 2012 2013 if (rdonly(exi, req)) { 2014 VN_RELE(vp); 2015 *status = NFSERR_ROFS; 2016 return; 2017 } 2018 2019 /* 2020 * Check for a conflict with a non-blocking mandatory share reservation. 2021 */ 2022 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 2023 NULL, cr, NULL, NULL, NULL); 2024 if (error != 0) { 2025 VN_RELE(vp); 2026 *status = puterrno(error); 2027 return; 2028 } 2029 2030 /* 2031 * If the file is delegated to an v4 client, then initiate 2032 * recall and drop this request (by setting T_WOULDBLOCK). 2033 * The client will eventually re-transmit the request and 2034 * (hopefully), by then, the v4 client will have returned 2035 * the delegation. 2036 */ 2037 2038 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2039 VN_RELE(vp); 2040 VN_RELE(targvp); 2041 curthread->t_flag |= T_WOULDBLOCK; 2042 return; 2043 } 2044 2045 if (nbl_need_check(targvp)) { 2046 nbl_start_crit(targvp, RW_READER); 2047 in_crit = 1; 2048 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 2049 error = EACCES; 2050 goto out; 2051 } 2052 } 2053 2054 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 2055 2056 /* 2057 * Force modified data and metadata out to stable storage. 2058 */ 2059 (void) VOP_FSYNC(vp, 0, cr, NULL); 2060 2061 out: 2062 if (in_crit) 2063 nbl_end_crit(targvp); 2064 VN_RELE(targvp); 2065 VN_RELE(vp); 2066 2067 *status = puterrno(error); 2068 2069 } 2070 2071 void * 2072 rfs_remove_getfh(struct nfsdiropargs *da) 2073 { 2074 return (da->da_fhandle); 2075 } 2076 2077 /* 2078 * rename a file 2079 * Give a file (from) a new name (to). 2080 */ 2081 void 2082 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2083 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2084 { 2085 int error = 0; 2086 vnode_t *fromvp; 2087 vnode_t *tovp; 2088 struct exportinfo *to_exi; 2089 fhandle_t *fh; 2090 vnode_t *srcvp; 2091 vnode_t *targvp; 2092 int in_crit = 0; 2093 2094 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2095 if (fromvp == NULL) { 2096 *status = NFSERR_STALE; 2097 return; 2098 } 2099 2100 fh = args->rna_to.da_fhandle; 2101 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2102 if (to_exi == NULL) { 2103 VN_RELE(fromvp); 2104 *status = NFSERR_ACCES; 2105 return; 2106 } 2107 exi_rele(to_exi); 2108 2109 if (to_exi != exi) { 2110 VN_RELE(fromvp); 2111 *status = NFSERR_XDEV; 2112 return; 2113 } 2114 2115 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2116 if (tovp == NULL) { 2117 VN_RELE(fromvp); 2118 *status = NFSERR_STALE; 2119 return; 2120 } 2121 2122 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2123 VN_RELE(tovp); 2124 VN_RELE(fromvp); 2125 *status = NFSERR_NOTDIR; 2126 return; 2127 } 2128 2129 /* 2130 * Disallow NULL paths 2131 */ 2132 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2133 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2134 VN_RELE(tovp); 2135 VN_RELE(fromvp); 2136 *status = NFSERR_ACCES; 2137 return; 2138 } 2139 2140 if (rdonly(exi, req)) { 2141 VN_RELE(tovp); 2142 VN_RELE(fromvp); 2143 *status = NFSERR_ROFS; 2144 return; 2145 } 2146 2147 /* 2148 * Check for a conflict with a non-blocking mandatory share reservation. 2149 */ 2150 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2151 NULL, cr, NULL, NULL, NULL); 2152 if (error != 0) { 2153 VN_RELE(tovp); 2154 VN_RELE(fromvp); 2155 *status = puterrno(error); 2156 return; 2157 } 2158 2159 /* Check for delegations on the source file */ 2160 2161 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2162 VN_RELE(tovp); 2163 VN_RELE(fromvp); 2164 VN_RELE(srcvp); 2165 curthread->t_flag |= T_WOULDBLOCK; 2166 return; 2167 } 2168 2169 /* Check for delegation on the file being renamed over, if it exists */ 2170 2171 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && 2172 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2173 NULL, NULL, NULL) == 0) { 2174 2175 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2176 VN_RELE(tovp); 2177 VN_RELE(fromvp); 2178 VN_RELE(srcvp); 2179 VN_RELE(targvp); 2180 curthread->t_flag |= T_WOULDBLOCK; 2181 return; 2182 } 2183 VN_RELE(targvp); 2184 } 2185 2186 2187 if (nbl_need_check(srcvp)) { 2188 nbl_start_crit(srcvp, RW_READER); 2189 in_crit = 1; 2190 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2191 error = EACCES; 2192 goto out; 2193 } 2194 } 2195 2196 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2197 tovp, args->rna_to.da_name, cr, NULL, 0); 2198 2199 if (error == 0) 2200 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2201 strlen(args->rna_to.da_name)); 2202 2203 /* 2204 * Force modified data and metadata out to stable storage. 2205 */ 2206 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2207 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2208 2209 out: 2210 if (in_crit) 2211 nbl_end_crit(srcvp); 2212 VN_RELE(srcvp); 2213 VN_RELE(tovp); 2214 VN_RELE(fromvp); 2215 2216 *status = puterrno(error); 2217 2218 } 2219 void * 2220 rfs_rename_getfh(struct nfsrnmargs *args) 2221 { 2222 return (args->rna_from.da_fhandle); 2223 } 2224 2225 /* 2226 * Link to a file. 2227 * Create a file (to) which is a hard link to the given file (from). 2228 */ 2229 void 2230 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2231 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2232 { 2233 int error; 2234 vnode_t *fromvp; 2235 vnode_t *tovp; 2236 struct exportinfo *to_exi; 2237 fhandle_t *fh; 2238 2239 fromvp = nfs_fhtovp(args->la_from, exi); 2240 if (fromvp == NULL) { 2241 *status = NFSERR_STALE; 2242 return; 2243 } 2244 2245 fh = args->la_to.da_fhandle; 2246 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2247 if (to_exi == NULL) { 2248 VN_RELE(fromvp); 2249 *status = NFSERR_ACCES; 2250 return; 2251 } 2252 exi_rele(to_exi); 2253 2254 if (to_exi != exi) { 2255 VN_RELE(fromvp); 2256 *status = NFSERR_XDEV; 2257 return; 2258 } 2259 2260 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2261 if (tovp == NULL) { 2262 VN_RELE(fromvp); 2263 *status = NFSERR_STALE; 2264 return; 2265 } 2266 2267 if (tovp->v_type != VDIR) { 2268 VN_RELE(tovp); 2269 VN_RELE(fromvp); 2270 *status = NFSERR_NOTDIR; 2271 return; 2272 } 2273 /* 2274 * Disallow NULL paths 2275 */ 2276 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2277 VN_RELE(tovp); 2278 VN_RELE(fromvp); 2279 *status = NFSERR_ACCES; 2280 return; 2281 } 2282 2283 if (rdonly(exi, req)) { 2284 VN_RELE(tovp); 2285 VN_RELE(fromvp); 2286 *status = NFSERR_ROFS; 2287 return; 2288 } 2289 2290 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2291 2292 /* 2293 * Force modified data and metadata out to stable storage. 2294 */ 2295 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2296 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2297 2298 VN_RELE(tovp); 2299 VN_RELE(fromvp); 2300 2301 *status = puterrno(error); 2302 2303 } 2304 void * 2305 rfs_link_getfh(struct nfslinkargs *args) 2306 { 2307 return (args->la_from); 2308 } 2309 2310 /* 2311 * Symbolicly link to a file. 2312 * Create a file (to) with the given attributes which is a symbolic link 2313 * to the given path name (to). 2314 */ 2315 void 2316 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2317 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2318 { 2319 int error; 2320 struct vattr va; 2321 vnode_t *vp; 2322 vnode_t *svp; 2323 int lerror; 2324 struct sockaddr *ca; 2325 char *name = NULL; 2326 2327 /* 2328 * Disallow NULL paths 2329 */ 2330 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2331 *status = NFSERR_ACCES; 2332 return; 2333 } 2334 2335 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2336 if (vp == NULL) { 2337 *status = NFSERR_STALE; 2338 return; 2339 } 2340 2341 if (rdonly(exi, req)) { 2342 VN_RELE(vp); 2343 *status = NFSERR_ROFS; 2344 return; 2345 } 2346 2347 error = sattr_to_vattr(args->sla_sa, &va); 2348 if (error) { 2349 VN_RELE(vp); 2350 *status = puterrno(error); 2351 return; 2352 } 2353 2354 if (!(va.va_mask & AT_MODE)) { 2355 VN_RELE(vp); 2356 *status = NFSERR_INVAL; 2357 return; 2358 } 2359 2360 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2361 name = nfscmd_convname(ca, exi, args->sla_tnm, 2362 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2363 2364 if (name == NULL) { 2365 *status = NFSERR_ACCES; 2366 return; 2367 } 2368 2369 va.va_type = VLNK; 2370 va.va_mask |= AT_TYPE; 2371 2372 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2373 2374 /* 2375 * Force new data and metadata out to stable storage. 2376 */ 2377 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2378 NULL, cr, NULL, NULL, NULL); 2379 2380 if (!lerror) { 2381 (void) VOP_FSYNC(svp, 0, cr, NULL); 2382 VN_RELE(svp); 2383 } 2384 2385 /* 2386 * Force modified data and metadata out to stable storage. 2387 */ 2388 (void) VOP_FSYNC(vp, 0, cr, NULL); 2389 2390 VN_RELE(vp); 2391 2392 *status = puterrno(error); 2393 if (name != args->sla_tnm) 2394 kmem_free(name, MAXPATHLEN); 2395 2396 } 2397 void * 2398 rfs_symlink_getfh(struct nfsslargs *args) 2399 { 2400 return (args->sla_from.da_fhandle); 2401 } 2402 2403 /* 2404 * Make a directory. 2405 * Create a directory with the given name, parent directory, and attributes. 2406 * Returns a file handle and attributes for the new directory. 2407 */ 2408 void 2409 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2410 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2411 { 2412 int error; 2413 struct vattr va; 2414 vnode_t *dvp = NULL; 2415 vnode_t *vp; 2416 char *name = args->ca_da.da_name; 2417 2418 /* 2419 * Disallow NULL paths 2420 */ 2421 if (name == NULL || *name == '\0') { 2422 dr->dr_status = NFSERR_ACCES; 2423 return; 2424 } 2425 2426 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2427 if (vp == NULL) { 2428 dr->dr_status = NFSERR_STALE; 2429 return; 2430 } 2431 2432 if (rdonly(exi, req)) { 2433 VN_RELE(vp); 2434 dr->dr_status = NFSERR_ROFS; 2435 return; 2436 } 2437 2438 error = sattr_to_vattr(args->ca_sa, &va); 2439 if (error) { 2440 VN_RELE(vp); 2441 dr->dr_status = puterrno(error); 2442 return; 2443 } 2444 2445 if (!(va.va_mask & AT_MODE)) { 2446 VN_RELE(vp); 2447 dr->dr_status = NFSERR_INVAL; 2448 return; 2449 } 2450 2451 va.va_type = VDIR; 2452 va.va_mask |= AT_TYPE; 2453 2454 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2455 2456 if (!error) { 2457 /* 2458 * Attribtutes of the newly created directory should 2459 * be returned to the client. 2460 */ 2461 va.va_mask = AT_ALL; /* We want everything */ 2462 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2463 2464 /* check for overflows */ 2465 if (!error) { 2466 acl_perm(vp, exi, &va, cr); 2467 error = vattr_to_nattr(&va, &dr->dr_attr); 2468 if (!error) { 2469 error = makefh(&dr->dr_fhandle, dvp, exi); 2470 } 2471 } 2472 /* 2473 * Force new data and metadata out to stable storage. 2474 */ 2475 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2476 VN_RELE(dvp); 2477 } 2478 2479 /* 2480 * Force modified data and metadata out to stable storage. 2481 */ 2482 (void) VOP_FSYNC(vp, 0, cr, NULL); 2483 2484 VN_RELE(vp); 2485 2486 dr->dr_status = puterrno(error); 2487 2488 } 2489 void * 2490 rfs_mkdir_getfh(struct nfscreatargs *args) 2491 { 2492 return (args->ca_da.da_fhandle); 2493 } 2494 2495 /* 2496 * Remove a directory. 2497 * Remove the given directory name from the given parent directory. 2498 */ 2499 void 2500 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2501 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2502 { 2503 int error; 2504 vnode_t *vp; 2505 2506 2507 /* 2508 * Disallow NULL paths 2509 */ 2510 if (da->da_name == NULL || *da->da_name == '\0') { 2511 *status = NFSERR_ACCES; 2512 return; 2513 } 2514 2515 vp = nfs_fhtovp(da->da_fhandle, exi); 2516 if (vp == NULL) { 2517 *status = NFSERR_STALE; 2518 return; 2519 } 2520 2521 if (rdonly(exi, req)) { 2522 VN_RELE(vp); 2523 *status = NFSERR_ROFS; 2524 return; 2525 } 2526 2527 /* 2528 * VOP_RMDIR now takes a new third argument (the current 2529 * directory of the process). That's because someone 2530 * wants to return EINVAL if one tries to remove ".". 2531 * Of course, NFS servers have no idea what their 2532 * clients' current directories are. We fake it by 2533 * supplying a vnode known to exist and illegal to 2534 * remove. 2535 */ 2536 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); 2537 2538 /* 2539 * Force modified data and metadata out to stable storage. 2540 */ 2541 (void) VOP_FSYNC(vp, 0, cr, NULL); 2542 2543 VN_RELE(vp); 2544 2545 /* 2546 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2547 * if the directory is not empty. A System V NFS server 2548 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2549 * over the wire. 2550 */ 2551 if (error == EEXIST) 2552 *status = NFSERR_NOTEMPTY; 2553 else 2554 *status = puterrno(error); 2555 2556 } 2557 void * 2558 rfs_rmdir_getfh(struct nfsdiropargs *da) 2559 { 2560 return (da->da_fhandle); 2561 } 2562 2563 /* ARGSUSED */ 2564 void 2565 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2566 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2567 { 2568 int error; 2569 int iseof; 2570 struct iovec iov; 2571 struct uio uio; 2572 vnode_t *vp; 2573 char *ndata = NULL; 2574 struct sockaddr *ca; 2575 size_t nents; 2576 int ret; 2577 2578 vp = nfs_fhtovp(&rda->rda_fh, exi); 2579 if (vp == NULL) { 2580 rd->rd_entries = NULL; 2581 rd->rd_status = NFSERR_STALE; 2582 return; 2583 } 2584 2585 if (vp->v_type != VDIR) { 2586 VN_RELE(vp); 2587 rd->rd_entries = NULL; 2588 rd->rd_status = NFSERR_NOTDIR; 2589 return; 2590 } 2591 2592 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2593 2594 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2595 2596 if (error) { 2597 rd->rd_entries = NULL; 2598 goto bad; 2599 } 2600 2601 if (rda->rda_count == 0) { 2602 rd->rd_entries = NULL; 2603 rd->rd_size = 0; 2604 rd->rd_eof = FALSE; 2605 goto bad; 2606 } 2607 2608 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2609 2610 /* 2611 * Allocate data for entries. This will be freed by rfs_rddirfree. 2612 */ 2613 rd->rd_bufsize = (uint_t)rda->rda_count; 2614 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2615 2616 /* 2617 * Set up io vector to read directory data 2618 */ 2619 iov.iov_base = (caddr_t)rd->rd_entries; 2620 iov.iov_len = rda->rda_count; 2621 uio.uio_iov = &iov; 2622 uio.uio_iovcnt = 1; 2623 uio.uio_segflg = UIO_SYSSPACE; 2624 uio.uio_extflg = UIO_COPY_CACHED; 2625 uio.uio_loffset = (offset_t)rda->rda_offset; 2626 uio.uio_resid = rda->rda_count; 2627 2628 /* 2629 * read directory 2630 */ 2631 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2632 2633 /* 2634 * Clean up 2635 */ 2636 if (!error) { 2637 /* 2638 * set size and eof 2639 */ 2640 if (uio.uio_resid == rda->rda_count) { 2641 rd->rd_size = 0; 2642 rd->rd_eof = TRUE; 2643 } else { 2644 rd->rd_size = (uint32_t)(rda->rda_count - 2645 uio.uio_resid); 2646 rd->rd_eof = iseof ? TRUE : FALSE; 2647 } 2648 } 2649 2650 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2651 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2652 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2653 rda->rda_count, &ndata); 2654 2655 if (ret != 0) { 2656 size_t dropbytes; 2657 /* 2658 * We had to drop one or more entries in order to fit 2659 * during the character conversion. We need to patch 2660 * up the size and eof info. 2661 */ 2662 if (rd->rd_eof) 2663 rd->rd_eof = FALSE; 2664 dropbytes = nfscmd_dropped_entrysize( 2665 (struct dirent64 *)rd->rd_entries, nents, ret); 2666 rd->rd_size -= dropbytes; 2667 } 2668 if (ndata == NULL) { 2669 ndata = (char *)rd->rd_entries; 2670 } else if (ndata != (char *)rd->rd_entries) { 2671 kmem_free(rd->rd_entries, rd->rd_bufsize); 2672 rd->rd_entries = (void *)ndata; 2673 rd->rd_bufsize = rda->rda_count; 2674 } 2675 2676 bad: 2677 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2678 2679 #if 0 /* notyet */ 2680 /* 2681 * Don't do this. It causes local disk writes when just 2682 * reading the file and the overhead is deemed larger 2683 * than the benefit. 2684 */ 2685 /* 2686 * Force modified metadata out to stable storage. 2687 */ 2688 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2689 #endif 2690 2691 VN_RELE(vp); 2692 2693 rd->rd_status = puterrno(error); 2694 2695 } 2696 void * 2697 rfs_readdir_getfh(struct nfsrddirargs *rda) 2698 { 2699 return (&rda->rda_fh); 2700 } 2701 void 2702 rfs_rddirfree(struct nfsrddirres *rd) 2703 { 2704 if (rd->rd_entries != NULL) 2705 kmem_free(rd->rd_entries, rd->rd_bufsize); 2706 } 2707 2708 /* ARGSUSED */ 2709 void 2710 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2711 struct svc_req *req, cred_t *cr) 2712 { 2713 int error; 2714 struct statvfs64 sb; 2715 vnode_t *vp; 2716 2717 vp = nfs_fhtovp(fh, exi); 2718 if (vp == NULL) { 2719 fs->fs_status = NFSERR_STALE; 2720 return; 2721 } 2722 2723 error = VFS_STATVFS(vp->v_vfsp, &sb); 2724 2725 if (!error) { 2726 fs->fs_tsize = nfstsize(); 2727 fs->fs_bsize = sb.f_frsize; 2728 fs->fs_blocks = sb.f_blocks; 2729 fs->fs_bfree = sb.f_bfree; 2730 fs->fs_bavail = sb.f_bavail; 2731 } 2732 2733 VN_RELE(vp); 2734 2735 fs->fs_status = puterrno(error); 2736 2737 } 2738 void * 2739 rfs_statfs_getfh(fhandle_t *fh) 2740 { 2741 return (fh); 2742 } 2743 2744 static int 2745 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2746 { 2747 vap->va_mask = 0; 2748 2749 /* 2750 * There was a sign extension bug in some VFS based systems 2751 * which stored the mode as a short. When it would get 2752 * assigned to a u_long, no sign extension would occur. 2753 * It needed to, but this wasn't noticed because sa_mode 2754 * would then get assigned back to the short, thus ignoring 2755 * the upper 16 bits of sa_mode. 2756 * 2757 * To make this implementation work for both broken 2758 * clients and good clients, we check for both versions 2759 * of the mode. 2760 */ 2761 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2762 sa->sa_mode != (uint32_t)-1) { 2763 vap->va_mask |= AT_MODE; 2764 vap->va_mode = sa->sa_mode; 2765 } 2766 if (sa->sa_uid != (uint32_t)-1) { 2767 vap->va_mask |= AT_UID; 2768 vap->va_uid = sa->sa_uid; 2769 } 2770 if (sa->sa_gid != (uint32_t)-1) { 2771 vap->va_mask |= AT_GID; 2772 vap->va_gid = sa->sa_gid; 2773 } 2774 if (sa->sa_size != (uint32_t)-1) { 2775 vap->va_mask |= AT_SIZE; 2776 vap->va_size = sa->sa_size; 2777 } 2778 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2779 sa->sa_atime.tv_usec != (int32_t)-1) { 2780 #ifndef _LP64 2781 /* return error if time overflow */ 2782 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2783 return (EOVERFLOW); 2784 #endif 2785 vap->va_mask |= AT_ATIME; 2786 /* 2787 * nfs protocol defines times as unsigned so don't extend sign, 2788 * unless sysadmin set nfs_allow_preepoch_time. 2789 */ 2790 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2791 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2792 } 2793 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2794 sa->sa_mtime.tv_usec != (int32_t)-1) { 2795 #ifndef _LP64 2796 /* return error if time overflow */ 2797 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2798 return (EOVERFLOW); 2799 #endif 2800 vap->va_mask |= AT_MTIME; 2801 /* 2802 * nfs protocol defines times as unsigned so don't extend sign, 2803 * unless sysadmin set nfs_allow_preepoch_time. 2804 */ 2805 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2806 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2807 } 2808 return (0); 2809 } 2810 2811 static enum nfsftype vt_to_nf[] = { 2812 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2813 }; 2814 2815 /* 2816 * check the following fields for overflow: nodeid, size, and time. 2817 * There could be a problem when converting 64-bit LP64 fields 2818 * into 32-bit ones. Return an error if there is an overflow. 2819 */ 2820 int 2821 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2822 { 2823 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2824 na->na_type = vt_to_nf[vap->va_type]; 2825 2826 if (vap->va_mode == (unsigned short) -1) 2827 na->na_mode = (uint32_t)-1; 2828 else 2829 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2830 2831 if (vap->va_uid == (unsigned short)(-1)) 2832 na->na_uid = (uint32_t)(-1); 2833 else if (vap->va_uid == UID_NOBODY) 2834 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2835 else 2836 na->na_uid = vap->va_uid; 2837 2838 if (vap->va_gid == (unsigned short)(-1)) 2839 na->na_gid = (uint32_t)-1; 2840 else if (vap->va_gid == GID_NOBODY) 2841 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2842 else 2843 na->na_gid = vap->va_gid; 2844 2845 /* 2846 * Do we need to check fsid for overflow? It is 64-bit in the 2847 * vattr, but are bigger than 32 bit values supported? 2848 */ 2849 na->na_fsid = vap->va_fsid; 2850 2851 na->na_nodeid = vap->va_nodeid; 2852 2853 /* 2854 * Check to make sure that the nodeid is representable over the 2855 * wire without losing bits. 2856 */ 2857 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2858 return (EFBIG); 2859 na->na_nlink = vap->va_nlink; 2860 2861 /* 2862 * Check for big files here, instead of at the caller. See 2863 * comments in cstat for large special file explanation. 2864 */ 2865 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2866 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2867 return (EFBIG); 2868 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2869 /* UNKNOWN_SIZE | OVERFLOW */ 2870 na->na_size = MAXOFF32_T; 2871 } else 2872 na->na_size = vap->va_size; 2873 } else 2874 na->na_size = vap->va_size; 2875 2876 /* 2877 * If the vnode times overflow the 32-bit times that NFS2 2878 * uses on the wire then return an error. 2879 */ 2880 if (!NFS_VAP_TIME_OK(vap)) { 2881 return (EOVERFLOW); 2882 } 2883 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2884 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2885 2886 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2887 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2888 2889 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2890 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2891 2892 /* 2893 * If the dev_t will fit into 16 bits then compress 2894 * it, otherwise leave it alone. See comments in 2895 * nfs_client.c. 2896 */ 2897 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2898 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2899 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2900 else 2901 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2902 2903 na->na_blocks = vap->va_nblocks; 2904 na->na_blocksize = vap->va_blksize; 2905 2906 /* 2907 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2908 * over-the-wire protocols for named-pipe vnodes. It remaps the 2909 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2910 * 2911 * BUYER BEWARE: 2912 * If you are porting the NFS to a non-Sun server, you probably 2913 * don't want to include the following block of code. The 2914 * over-the-wire special file types will be changing with the 2915 * NFS Protocol Revision. 2916 */ 2917 if (vap->va_type == VFIFO) 2918 NA_SETFIFO(na); 2919 return (0); 2920 } 2921 2922 /* 2923 * acl v2 support: returns approximate permission. 2924 * default: returns minimal permission (more restrictive) 2925 * aclok: returns maximal permission (less restrictive) 2926 * This routine changes the permissions that are alaredy in *va. 2927 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 2928 * CLASS_OBJ is always the same as GROUP_OBJ entry. 2929 */ 2930 static void 2931 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 2932 { 2933 vsecattr_t vsa; 2934 int aclcnt; 2935 aclent_t *aclentp; 2936 mode_t mask_perm; 2937 mode_t grp_perm; 2938 mode_t other_perm; 2939 mode_t other_orig; 2940 int error; 2941 2942 /* dont care default acl */ 2943 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 2944 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 2945 2946 if (!error) { 2947 aclcnt = vsa.vsa_aclcnt; 2948 if (aclcnt > MIN_ACL_ENTRIES) { 2949 /* non-trivial ACL */ 2950 aclentp = vsa.vsa_aclentp; 2951 if (exi->exi_export.ex_flags & EX_ACLOK) { 2952 /* maximal permissions */ 2953 grp_perm = 0; 2954 other_perm = 0; 2955 for (; aclcnt > 0; aclcnt--, aclentp++) { 2956 switch (aclentp->a_type) { 2957 case USER_OBJ: 2958 break; 2959 case USER: 2960 grp_perm |= 2961 aclentp->a_perm << 3; 2962 other_perm |= aclentp->a_perm; 2963 break; 2964 case GROUP_OBJ: 2965 grp_perm |= 2966 aclentp->a_perm << 3; 2967 break; 2968 case GROUP: 2969 other_perm |= aclentp->a_perm; 2970 break; 2971 case OTHER_OBJ: 2972 other_orig = aclentp->a_perm; 2973 break; 2974 case CLASS_OBJ: 2975 mask_perm = aclentp->a_perm; 2976 break; 2977 default: 2978 break; 2979 } 2980 } 2981 grp_perm &= mask_perm << 3; 2982 other_perm &= mask_perm; 2983 other_perm |= other_orig; 2984 2985 } else { 2986 /* minimal permissions */ 2987 grp_perm = 070; 2988 other_perm = 07; 2989 for (; aclcnt > 0; aclcnt--, aclentp++) { 2990 switch (aclentp->a_type) { 2991 case USER_OBJ: 2992 break; 2993 case USER: 2994 case CLASS_OBJ: 2995 grp_perm &= 2996 aclentp->a_perm << 3; 2997 other_perm &= 2998 aclentp->a_perm; 2999 break; 3000 case GROUP_OBJ: 3001 grp_perm &= 3002 aclentp->a_perm << 3; 3003 break; 3004 case GROUP: 3005 other_perm &= 3006 aclentp->a_perm; 3007 break; 3008 case OTHER_OBJ: 3009 other_perm &= 3010 aclentp->a_perm; 3011 break; 3012 default: 3013 break; 3014 } 3015 } 3016 } 3017 /* copy to va */ 3018 va->va_mode &= ~077; 3019 va->va_mode |= grp_perm | other_perm; 3020 } 3021 if (vsa.vsa_aclcnt) 3022 kmem_free(vsa.vsa_aclentp, 3023 vsa.vsa_aclcnt * sizeof (aclent_t)); 3024 } 3025 } 3026 3027 void 3028 rfs_srvrinit(void) 3029 { 3030 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); 3031 nfs2_srv_caller_id = fs_new_caller_id(); 3032 } 3033 3034 void 3035 rfs_srvrfini(void) 3036 { 3037 mutex_destroy(&rfs_async_write_lock); 3038 } 3039 3040 static int 3041 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 3042 { 3043 struct clist *wcl; 3044 int wlist_len; 3045 uint32_t count = rr->rr_count; 3046 3047 wcl = ra->ra_wlist; 3048 3049 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 3050 return (FALSE); 3051 } 3052 3053 wcl = ra->ra_wlist; 3054 rr->rr_ok.rrok_wlist_len = wlist_len; 3055 rr->rr_ok.rrok_wlist = wcl; 3056 3057 return (TRUE); 3058 }