1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 27 * All rights reserved. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/systm.h> 33 #include <sys/cred.h> 34 #include <sys/buf.h> 35 #include <sys/vfs.h> 36 #include <sys/vnode.h> 37 #include <sys/uio.h> 38 #include <sys/stat.h> 39 #include <sys/errno.h> 40 #include <sys/sysmacros.h> 41 #include <sys/statvfs.h> 42 #include <sys/kmem.h> 43 #include <sys/kstat.h> 44 #include <sys/dirent.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/vtrace.h> 48 #include <sys/mode.h> 49 #include <sys/acl.h> 50 #include <sys/nbmlock.h> 51 #include <sys/policy.h> 52 #include <sys/sdt.h> 53 54 #include <rpc/types.h> 55 #include <rpc/auth.h> 56 #include <rpc/svc.h> 57 58 #include <nfs/nfs.h> 59 #include <nfs/export.h> 60 #include <nfs/nfs_cmd.h> 61 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_map.h> 66 #include <vm/seg_kmem.h> 67 68 #include <sys/strsubr.h> 69 70 /* 71 * These are the interface routines for the server side of the 72 * Network File System. See the NFS version 2 protocol specification 73 * for a description of this interface. 74 */ 75 76 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 77 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 78 cred_t *); 79 80 /* 81 * Some "over the wire" UNIX file types. These are encoded 82 * into the mode. This needs to be fixed in the next rev. 83 */ 84 #define IFMT 0170000 /* type of file */ 85 #define IFCHR 0020000 /* character special */ 86 #define IFBLK 0060000 /* block special */ 87 #define IFSOCK 0140000 /* socket */ 88 89 u_longlong_t nfs2_srv_caller_id; 90 91 /* 92 * Get file attributes. 93 * Returns the current attributes of the file with the given fhandle. 94 */ 95 /* ARGSUSED */ 96 void 97 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 98 struct svc_req *req, cred_t *cr) 99 { 100 int error; 101 vnode_t *vp; 102 struct vattr va; 103 104 vp = nfs_fhtovp(fhp, exi); 105 if (vp == NULL) { 106 ns->ns_status = NFSERR_STALE; 107 return; 108 } 109 110 /* 111 * Do the getattr. 112 */ 113 va.va_mask = AT_ALL; /* we want all the attributes */ 114 115 error = rfs4_delegated_getattr(vp, &va, 0, cr); 116 117 /* check for overflows */ 118 if (!error) { 119 /* Lie about the object type for a referral */ 120 if (vn_is_nfs_reparse(vp, cr)) 121 va.va_type = VLNK; 122 123 acl_perm(vp, exi, &va, cr); 124 error = vattr_to_nattr(&va, &ns->ns_attr); 125 } 126 127 VN_RELE(vp); 128 129 ns->ns_status = puterrno(error); 130 } 131 void * 132 rfs_getattr_getfh(fhandle_t *fhp) 133 { 134 return (fhp); 135 } 136 137 /* 138 * Set file attributes. 139 * Sets the attributes of the file with the given fhandle. Returns 140 * the new attributes. 141 */ 142 void 143 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 144 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 145 { 146 int error; 147 int flag; 148 int in_crit = 0; 149 vnode_t *vp; 150 struct vattr va; 151 struct vattr bva; 152 struct flock64 bf; 153 caller_context_t ct; 154 155 156 vp = nfs_fhtovp(&args->saa_fh, exi); 157 if (vp == NULL) { 158 ns->ns_status = NFSERR_STALE; 159 return; 160 } 161 162 if (rdonly(exi, req) || vn_is_readonly(vp)) { 163 VN_RELE(vp); 164 ns->ns_status = NFSERR_ROFS; 165 return; 166 } 167 168 error = sattr_to_vattr(&args->saa_sa, &va); 169 if (error) { 170 VN_RELE(vp); 171 ns->ns_status = puterrno(error); 172 return; 173 } 174 175 /* 176 * If the client is requesting a change to the mtime, 177 * but the nanosecond field is set to 1 billion, then 178 * this is a flag to the server that it should set the 179 * atime and mtime fields to the server's current time. 180 * The 1 billion number actually came from the client 181 * as 1 million, but the units in the over the wire 182 * request are microseconds instead of nanoseconds. 183 * 184 * This is an overload of the protocol and should be 185 * documented in the NFS Version 2 protocol specification. 186 */ 187 if (va.va_mask & AT_MTIME) { 188 if (va.va_mtime.tv_nsec == 1000000000) { 189 gethrestime(&va.va_mtime); 190 va.va_atime = va.va_mtime; 191 va.va_mask |= AT_ATIME; 192 flag = 0; 193 } else 194 flag = ATTR_UTIME; 195 } else 196 flag = 0; 197 198 /* 199 * If the filesystem is exported with nosuid, then mask off 200 * the setuid and setgid bits. 201 */ 202 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 203 (exi->exi_export.ex_flags & EX_NOSUID)) 204 va.va_mode &= ~(VSUID | VSGID); 205 206 ct.cc_sysid = 0; 207 ct.cc_pid = 0; 208 ct.cc_caller_id = nfs2_srv_caller_id; 209 ct.cc_flags = CC_DONTBLOCK; 210 211 /* 212 * We need to specially handle size changes because it is 213 * possible for the client to create a file with modes 214 * which indicate read-only, but with the file opened for 215 * writing. If the client then tries to set the size of 216 * the file, then the normal access checking done in 217 * VOP_SETATTR would prevent the client from doing so, 218 * although it should be legal for it to do so. To get 219 * around this, we do the access checking for ourselves 220 * and then use VOP_SPACE which doesn't do the access 221 * checking which VOP_SETATTR does. VOP_SPACE can only 222 * operate on VREG files, let VOP_SETATTR handle the other 223 * extremely rare cases. 224 * Also the client should not be allowed to change the 225 * size of the file if there is a conflicting non-blocking 226 * mandatory lock in the region of change. 227 */ 228 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 229 if (nbl_need_check(vp)) { 230 nbl_start_crit(vp, RW_READER); 231 in_crit = 1; 232 } 233 234 bva.va_mask = AT_UID | AT_SIZE; 235 236 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 237 238 if (error) { 239 if (in_crit) 240 nbl_end_crit(vp); 241 VN_RELE(vp); 242 ns->ns_status = puterrno(error); 243 return; 244 } 245 246 if (in_crit) { 247 u_offset_t offset; 248 ssize_t length; 249 250 if (va.va_size < bva.va_size) { 251 offset = va.va_size; 252 length = bva.va_size - va.va_size; 253 } else { 254 offset = bva.va_size; 255 length = va.va_size - bva.va_size; 256 } 257 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 258 NULL)) { 259 error = EACCES; 260 } 261 } 262 263 if (crgetuid(cr) == bva.va_uid && !error && 264 va.va_size != bva.va_size) { 265 va.va_mask &= ~AT_SIZE; 266 bf.l_type = F_WRLCK; 267 bf.l_whence = 0; 268 bf.l_start = (off64_t)va.va_size; 269 bf.l_len = 0; 270 bf.l_sysid = 0; 271 bf.l_pid = 0; 272 273 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 274 (offset_t)va.va_size, cr, &ct); 275 } 276 if (in_crit) 277 nbl_end_crit(vp); 278 } else 279 error = 0; 280 281 /* 282 * Do the setattr. 283 */ 284 if (!error && va.va_mask) { 285 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 286 } 287 288 /* 289 * check if the monitor on either vop_space or vop_setattr detected 290 * a delegation conflict and if so, mark the thread flag as 291 * wouldblock so that the response is dropped and the client will 292 * try again. 293 */ 294 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 295 VN_RELE(vp); 296 curthread->t_flag |= T_WOULDBLOCK; 297 return; 298 } 299 300 if (!error) { 301 va.va_mask = AT_ALL; /* get everything */ 302 303 error = rfs4_delegated_getattr(vp, &va, 0, cr); 304 305 /* check for overflows */ 306 if (!error) { 307 acl_perm(vp, exi, &va, cr); 308 error = vattr_to_nattr(&va, &ns->ns_attr); 309 } 310 } 311 312 ct.cc_flags = 0; 313 314 /* 315 * Force modified metadata out to stable storage. 316 */ 317 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 318 319 VN_RELE(vp); 320 321 ns->ns_status = puterrno(error); 322 } 323 void * 324 rfs_setattr_getfh(struct nfssaargs *args) 325 { 326 return (&args->saa_fh); 327 } 328 329 /* 330 * Directory lookup. 331 * Returns an fhandle and file attributes for file name in a directory. 332 */ 333 /* ARGSUSED */ 334 void 335 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 336 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 337 { 338 int error; 339 vnode_t *dvp; 340 vnode_t *vp; 341 struct vattr va; 342 fhandle_t *fhp = da->da_fhandle; 343 struct sec_ol sec = {0, 0}; 344 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 345 char *name; 346 struct sockaddr *ca; 347 348 /* 349 * Trusted Extension doesn't support NFSv2. MOUNT 350 * will reject v2 clients. Need to prevent v2 client 351 * access via WebNFS here. 352 */ 353 if (is_system_labeled() && req->rq_vers == 2) { 354 dr->dr_status = NFSERR_ACCES; 355 return; 356 } 357 358 /* 359 * Disallow NULL paths 360 */ 361 if (da->da_name == NULL || *da->da_name == '\0') { 362 dr->dr_status = NFSERR_ACCES; 363 return; 364 } 365 366 /* 367 * Allow lookups from the root - the default 368 * location of the public filehandle. 369 */ 370 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 371 dvp = rootdir; 372 VN_HOLD(dvp); 373 } else { 374 dvp = nfs_fhtovp(fhp, exi); 375 if (dvp == NULL) { 376 dr->dr_status = NFSERR_STALE; 377 return; 378 } 379 } 380 381 /* 382 * Not allow lookup beyond root. 383 * If the filehandle matches a filehandle of the exi, 384 * then the ".." refers beyond the root of an exported filesystem. 385 */ 386 if (strcmp(da->da_name, "..") == 0 && 387 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 388 VN_RELE(dvp); 389 dr->dr_status = NFSERR_NOENT; 390 return; 391 } 392 393 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 394 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 395 MAXPATHLEN); 396 397 if (name == NULL) { 398 dr->dr_status = NFSERR_ACCES; 399 return; 400 } 401 402 /* 403 * If the public filehandle is used then allow 404 * a multi-component lookup, i.e. evaluate 405 * a pathname and follow symbolic links if 406 * necessary. 407 * 408 * This may result in a vnode in another filesystem 409 * which is OK as long as the filesystem is exported. 410 */ 411 if (PUBLIC_FH2(fhp)) { 412 publicfh_flag = TRUE; 413 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, 414 &sec); 415 } else { 416 /* 417 * Do a normal single component lookup. 418 */ 419 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 420 NULL, NULL, NULL); 421 } 422 423 if (name != da->da_name) 424 kmem_free(name, MAXPATHLEN); 425 426 427 if (!error) { 428 va.va_mask = AT_ALL; /* we want everything */ 429 430 error = rfs4_delegated_getattr(vp, &va, 0, cr); 431 432 /* check for overflows */ 433 if (!error) { 434 acl_perm(vp, exi, &va, cr); 435 error = vattr_to_nattr(&va, &dr->dr_attr); 436 if (!error) { 437 if (sec.sec_flags & SEC_QUERY) 438 error = makefh_ol(&dr->dr_fhandle, exi, 439 sec.sec_index); 440 else { 441 error = makefh(&dr->dr_fhandle, vp, 442 exi); 443 if (!error && publicfh_flag && 444 !chk_clnt_sec(exi, req)) 445 auth_weak = TRUE; 446 } 447 } 448 } 449 VN_RELE(vp); 450 } 451 452 VN_RELE(dvp); 453 454 /* 455 * If publicfh_flag is true then we have called rfs_publicfh_mclookup 456 * and have obtained a new exportinfo in exi which needs to be 457 * released. Note the the original exportinfo pointed to by exi 458 * will be released by the caller, comon_dispatch. 459 */ 460 if (publicfh_flag && exi != NULL) 461 exi_rele(exi); 462 463 /* 464 * If it's public fh, no 0x81, and client's flavor is 465 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 466 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 467 */ 468 if (auth_weak) 469 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 470 else 471 dr->dr_status = puterrno(error); 472 } 473 void * 474 rfs_lookup_getfh(struct nfsdiropargs *da) 475 { 476 return (da->da_fhandle); 477 } 478 479 /* 480 * Read symbolic link. 481 * Returns the string in the symbolic link at the given fhandle. 482 */ 483 /* ARGSUSED */ 484 void 485 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 486 struct svc_req *req, cred_t *cr) 487 { 488 int error; 489 struct iovec iov; 490 struct uio uio; 491 vnode_t *vp; 492 struct vattr va; 493 struct sockaddr *ca; 494 char *name = NULL; 495 int is_referral = 0; 496 497 vp = nfs_fhtovp(fhp, exi); 498 if (vp == NULL) { 499 rl->rl_data = NULL; 500 rl->rl_status = NFSERR_STALE; 501 return; 502 } 503 504 va.va_mask = AT_MODE; 505 506 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 507 508 if (error) { 509 VN_RELE(vp); 510 rl->rl_data = NULL; 511 rl->rl_status = puterrno(error); 512 return; 513 } 514 515 if (MANDLOCK(vp, va.va_mode)) { 516 VN_RELE(vp); 517 rl->rl_data = NULL; 518 rl->rl_status = NFSERR_ACCES; 519 return; 520 } 521 522 /* We lied about the object type for a referral */ 523 if (vn_is_nfs_reparse(vp, cr)) 524 is_referral = 1; 525 526 /* 527 * XNFS and RFC1094 require us to return ENXIO if argument 528 * is not a link. BUGID 1138002. 529 */ 530 if (vp->v_type != VLNK && !is_referral) { 531 VN_RELE(vp); 532 rl->rl_data = NULL; 533 rl->rl_status = NFSERR_NXIO; 534 return; 535 } 536 537 /* 538 * Allocate data for pathname. This will be freed by rfs_rlfree. 539 */ 540 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 541 542 if (is_referral) { 543 char *s; 544 size_t strsz; 545 546 /* Get an artificial symlink based on a referral */ 547 s = build_symlink(vp, cr, &strsz); 548 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; 549 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 550 vnode_t *, vp, char *, s); 551 if (s == NULL) 552 error = EINVAL; 553 else { 554 error = 0; 555 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 556 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 557 kmem_free(s, strsz); 558 } 559 560 } else { 561 562 /* 563 * Set up io vector to read sym link data 564 */ 565 iov.iov_base = rl->rl_data; 566 iov.iov_len = NFS_MAXPATHLEN; 567 uio.uio_iov = &iov; 568 uio.uio_iovcnt = 1; 569 uio.uio_segflg = UIO_SYSSPACE; 570 uio.uio_extflg = UIO_COPY_CACHED; 571 uio.uio_loffset = (offset_t)0; 572 uio.uio_resid = NFS_MAXPATHLEN; 573 574 /* 575 * Do the readlink. 576 */ 577 error = VOP_READLINK(vp, &uio, cr, NULL); 578 579 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 580 581 if (!error) 582 rl->rl_data[rl->rl_count] = '\0'; 583 584 } 585 586 587 VN_RELE(vp); 588 589 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 590 name = nfscmd_convname(ca, exi, rl->rl_data, 591 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 592 593 if (name != NULL && name != rl->rl_data) { 594 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 595 rl->rl_data = name; 596 } 597 598 /* 599 * XNFS and RFC1094 require us to return ENXIO if argument 600 * is not a link. UFS returns EINVAL if this is the case, 601 * so we do the mapping here. BUGID 1138002. 602 */ 603 if (error == EINVAL) 604 rl->rl_status = NFSERR_NXIO; 605 else 606 rl->rl_status = puterrno(error); 607 608 } 609 void * 610 rfs_readlink_getfh(fhandle_t *fhp) 611 { 612 return (fhp); 613 } 614 /* 615 * Free data allocated by rfs_readlink 616 */ 617 void 618 rfs_rlfree(struct nfsrdlnres *rl) 619 { 620 if (rl->rl_data != NULL) 621 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 622 } 623 624 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 625 626 /* 627 * Read data. 628 * Returns some data read from the file at the given fhandle. 629 */ 630 /* ARGSUSED */ 631 void 632 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 633 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 634 { 635 vnode_t *vp; 636 int error; 637 struct vattr va; 638 struct iovec iov; 639 struct uio uio; 640 mblk_t *mp; 641 int alloc_err = 0; 642 int in_crit = 0; 643 caller_context_t ct; 644 645 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 646 if (vp == NULL) { 647 rr->rr_data = NULL; 648 rr->rr_status = NFSERR_STALE; 649 return; 650 } 651 652 if (vp->v_type != VREG) { 653 VN_RELE(vp); 654 rr->rr_data = NULL; 655 rr->rr_status = NFSERR_ISDIR; 656 return; 657 } 658 659 ct.cc_sysid = 0; 660 ct.cc_pid = 0; 661 ct.cc_caller_id = nfs2_srv_caller_id; 662 ct.cc_flags = CC_DONTBLOCK; 663 664 /* 665 * Enter the critical region before calling VOP_RWLOCK 666 * to avoid a deadlock with write requests. 667 */ 668 if (nbl_need_check(vp)) { 669 nbl_start_crit(vp, RW_READER); 670 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 671 0, NULL)) { 672 nbl_end_crit(vp); 673 VN_RELE(vp); 674 rr->rr_data = NULL; 675 rr->rr_status = NFSERR_ACCES; 676 return; 677 } 678 in_crit = 1; 679 } 680 681 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 682 683 /* check if a monitor detected a delegation conflict */ 684 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 685 VN_RELE(vp); 686 /* mark as wouldblock so response is dropped */ 687 curthread->t_flag |= T_WOULDBLOCK; 688 689 rr->rr_data = NULL; 690 return; 691 } 692 693 va.va_mask = AT_ALL; 694 695 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 696 697 if (error) { 698 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 699 if (in_crit) 700 nbl_end_crit(vp); 701 702 VN_RELE(vp); 703 rr->rr_data = NULL; 704 rr->rr_status = puterrno(error); 705 706 return; 707 } 708 709 /* 710 * This is a kludge to allow reading of files created 711 * with no read permission. The owner of the file 712 * is always allowed to read it. 713 */ 714 if (crgetuid(cr) != va.va_uid) { 715 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 716 717 if (error) { 718 /* 719 * Exec is the same as read over the net because 720 * of demand loading. 721 */ 722 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 723 } 724 if (error) { 725 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 726 if (in_crit) 727 nbl_end_crit(vp); 728 VN_RELE(vp); 729 rr->rr_data = NULL; 730 rr->rr_status = puterrno(error); 731 732 return; 733 } 734 } 735 736 if (MANDLOCK(vp, va.va_mode)) { 737 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 738 if (in_crit) 739 nbl_end_crit(vp); 740 741 VN_RELE(vp); 742 rr->rr_data = NULL; 743 rr->rr_status = NFSERR_ACCES; 744 745 return; 746 } 747 748 rr->rr_ok.rrok_wlist_len = 0; 749 rr->rr_ok.rrok_wlist = NULL; 750 751 if ((u_offset_t)ra->ra_offset >= va.va_size) { 752 rr->rr_count = 0; 753 rr->rr_data = NULL; 754 /* 755 * In this case, status is NFS_OK, but there is no data 756 * to encode. So set rr_mp to NULL. 757 */ 758 rr->rr_mp = NULL; 759 rr->rr_ok.rrok_wlist = ra->ra_wlist; 760 if (rr->rr_ok.rrok_wlist) 761 clist_zero_len(rr->rr_ok.rrok_wlist); 762 goto done; 763 } 764 765 if (ra->ra_wlist) { 766 mp = NULL; 767 rr->rr_mp = NULL; 768 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 769 if (ra->ra_count > iov.iov_len) { 770 rr->rr_data = NULL; 771 rr->rr_status = NFSERR_INVAL; 772 goto done; 773 } 774 } else { 775 /* 776 * mp will contain the data to be sent out in the read reply. 777 * This will be freed after the reply has been sent out (by the 778 * driver). 779 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 780 * that the call to xdrmblk_putmblk() never fails. 781 */ 782 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 783 &alloc_err); 784 ASSERT(mp != NULL); 785 ASSERT(alloc_err == 0); 786 787 rr->rr_mp = mp; 788 789 /* 790 * Set up io vector 791 */ 792 iov.iov_base = (caddr_t)mp->b_datap->db_base; 793 iov.iov_len = ra->ra_count; 794 } 795 796 uio.uio_iov = &iov; 797 uio.uio_iovcnt = 1; 798 uio.uio_segflg = UIO_SYSSPACE; 799 uio.uio_extflg = UIO_COPY_CACHED; 800 uio.uio_loffset = (offset_t)ra->ra_offset; 801 uio.uio_resid = ra->ra_count; 802 803 error = VOP_READ(vp, &uio, 0, cr, &ct); 804 805 if (error) { 806 if (mp) 807 freeb(mp); 808 809 /* 810 * check if a monitor detected a delegation conflict and 811 * mark as wouldblock so response is dropped 812 */ 813 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 814 curthread->t_flag |= T_WOULDBLOCK; 815 else 816 rr->rr_status = puterrno(error); 817 818 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 819 if (in_crit) 820 nbl_end_crit(vp); 821 822 VN_RELE(vp); 823 rr->rr_data = NULL; 824 825 return; 826 } 827 828 /* 829 * Get attributes again so we can send the latest access 830 * time to the client side for his cache. 831 */ 832 va.va_mask = AT_ALL; 833 834 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 835 836 if (error) { 837 if (mp) 838 freeb(mp); 839 840 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 841 if (in_crit) 842 nbl_end_crit(vp); 843 844 VN_RELE(vp); 845 rr->rr_data = NULL; 846 rr->rr_status = puterrno(error); 847 848 return; 849 } 850 851 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 852 853 if (mp) { 854 rr->rr_data = (char *)mp->b_datap->db_base; 855 } else { 856 if (ra->ra_wlist) { 857 rr->rr_data = (caddr_t)iov.iov_base; 858 if (!rdma_setup_read_data2(ra, rr)) { 859 rr->rr_data = NULL; 860 rr->rr_status = puterrno(NFSERR_INVAL); 861 } 862 } 863 } 864 done: 865 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 866 if (in_crit) 867 nbl_end_crit(vp); 868 869 acl_perm(vp, exi, &va, cr); 870 871 /* check for overflows */ 872 error = vattr_to_nattr(&va, &rr->rr_attr); 873 874 VN_RELE(vp); 875 876 rr->rr_status = puterrno(error); 877 } 878 879 /* 880 * Free data allocated by rfs_read 881 */ 882 void 883 rfs_rdfree(struct nfsrdresult *rr) 884 { 885 mblk_t *mp; 886 887 if (rr->rr_status == NFS_OK) { 888 mp = rr->rr_mp; 889 if (mp != NULL) 890 freeb(mp); 891 } 892 } 893 894 void * 895 rfs_read_getfh(struct nfsreadargs *ra) 896 { 897 return (&ra->ra_fhandle); 898 } 899 900 #define MAX_IOVECS 12 901 902 #ifdef DEBUG 903 static int rfs_write_sync_hits = 0; 904 static int rfs_write_sync_misses = 0; 905 #endif 906 907 /* 908 * Write data to file. 909 * Returns attributes of a file after writing some data to it. 910 * 911 * Any changes made here, especially in error handling might have 912 * to also be done in rfs_write (which clusters write requests). 913 */ 914 void 915 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 916 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 917 { 918 int error; 919 vnode_t *vp; 920 rlim64_t rlimit; 921 struct vattr va; 922 struct uio uio; 923 struct iovec iov[MAX_IOVECS]; 924 mblk_t *m; 925 struct iovec *iovp; 926 int iovcnt; 927 cred_t *savecred; 928 int in_crit = 0; 929 caller_context_t ct; 930 931 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 932 if (vp == NULL) { 933 ns->ns_status = NFSERR_STALE; 934 return; 935 } 936 937 if (rdonly(exi, req)) { 938 VN_RELE(vp); 939 ns->ns_status = NFSERR_ROFS; 940 return; 941 } 942 943 if (vp->v_type != VREG) { 944 VN_RELE(vp); 945 ns->ns_status = NFSERR_ISDIR; 946 return; 947 } 948 949 ct.cc_sysid = 0; 950 ct.cc_pid = 0; 951 ct.cc_caller_id = nfs2_srv_caller_id; 952 ct.cc_flags = CC_DONTBLOCK; 953 954 va.va_mask = AT_UID|AT_MODE; 955 956 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 957 958 if (error) { 959 VN_RELE(vp); 960 ns->ns_status = puterrno(error); 961 962 return; 963 } 964 965 if (crgetuid(cr) != va.va_uid) { 966 /* 967 * This is a kludge to allow writes of files created 968 * with read only permission. The owner of the file 969 * is always allowed to write it. 970 */ 971 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 972 973 if (error) { 974 VN_RELE(vp); 975 ns->ns_status = puterrno(error); 976 return; 977 } 978 } 979 980 /* 981 * Can't access a mandatory lock file. This might cause 982 * the NFS service thread to block forever waiting for a 983 * lock to be released that will never be released. 984 */ 985 if (MANDLOCK(vp, va.va_mode)) { 986 VN_RELE(vp); 987 ns->ns_status = NFSERR_ACCES; 988 return; 989 } 990 991 /* 992 * We have to enter the critical region before calling VOP_RWLOCK 993 * to avoid a deadlock with ufs. 994 */ 995 if (nbl_need_check(vp)) { 996 nbl_start_crit(vp, RW_READER); 997 in_crit = 1; 998 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 999 wa->wa_count, 0, NULL)) { 1000 error = EACCES; 1001 goto out; 1002 } 1003 } 1004 1005 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1006 1007 /* check if a monitor detected a delegation conflict */ 1008 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1009 VN_RELE(vp); 1010 /* mark as wouldblock so response is dropped */ 1011 curthread->t_flag |= T_WOULDBLOCK; 1012 return; 1013 } 1014 1015 if (wa->wa_data || wa->wa_rlist) { 1016 /* Do the RDMA thing if necessary */ 1017 if (wa->wa_rlist) { 1018 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1019 iov[0].iov_len = wa->wa_count; 1020 } else { 1021 iov[0].iov_base = wa->wa_data; 1022 iov[0].iov_len = wa->wa_count; 1023 } 1024 uio.uio_iov = iov; 1025 uio.uio_iovcnt = 1; 1026 uio.uio_segflg = UIO_SYSSPACE; 1027 uio.uio_extflg = UIO_COPY_DEFAULT; 1028 uio.uio_loffset = (offset_t)wa->wa_offset; 1029 uio.uio_resid = wa->wa_count; 1030 /* 1031 * The limit is checked on the client. We 1032 * should allow any size writes here. 1033 */ 1034 uio.uio_llimit = curproc->p_fsz_ctl; 1035 rlimit = uio.uio_llimit - wa->wa_offset; 1036 if (rlimit < (rlim64_t)uio.uio_resid) 1037 uio.uio_resid = (uint_t)rlimit; 1038 1039 /* 1040 * for now we assume no append mode 1041 */ 1042 /* 1043 * We're changing creds because VM may fault and we need 1044 * the cred of the current thread to be used if quota 1045 * checking is enabled. 1046 */ 1047 savecred = curthread->t_cred; 1048 curthread->t_cred = cr; 1049 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1050 curthread->t_cred = savecred; 1051 } else { 1052 iovcnt = 0; 1053 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1054 iovcnt++; 1055 if (iovcnt <= MAX_IOVECS) { 1056 #ifdef DEBUG 1057 rfs_write_sync_hits++; 1058 #endif 1059 iovp = iov; 1060 } else { 1061 #ifdef DEBUG 1062 rfs_write_sync_misses++; 1063 #endif 1064 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1065 } 1066 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1067 uio.uio_iov = iovp; 1068 uio.uio_iovcnt = iovcnt; 1069 uio.uio_segflg = UIO_SYSSPACE; 1070 uio.uio_extflg = UIO_COPY_DEFAULT; 1071 uio.uio_loffset = (offset_t)wa->wa_offset; 1072 uio.uio_resid = wa->wa_count; 1073 /* 1074 * The limit is checked on the client. We 1075 * should allow any size writes here. 1076 */ 1077 uio.uio_llimit = curproc->p_fsz_ctl; 1078 rlimit = uio.uio_llimit - wa->wa_offset; 1079 if (rlimit < (rlim64_t)uio.uio_resid) 1080 uio.uio_resid = (uint_t)rlimit; 1081 1082 /* 1083 * For now we assume no append mode. 1084 */ 1085 /* 1086 * We're changing creds because VM may fault and we need 1087 * the cred of the current thread to be used if quota 1088 * checking is enabled. 1089 */ 1090 savecred = curthread->t_cred; 1091 curthread->t_cred = cr; 1092 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1093 curthread->t_cred = savecred; 1094 1095 if (iovp != iov) 1096 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1097 } 1098 1099 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1100 1101 if (!error) { 1102 /* 1103 * Get attributes again so we send the latest mod 1104 * time to the client side for his cache. 1105 */ 1106 va.va_mask = AT_ALL; /* now we want everything */ 1107 1108 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1109 1110 /* check for overflows */ 1111 if (!error) { 1112 acl_perm(vp, exi, &va, cr); 1113 error = vattr_to_nattr(&va, &ns->ns_attr); 1114 } 1115 } 1116 1117 out: 1118 if (in_crit) 1119 nbl_end_crit(vp); 1120 VN_RELE(vp); 1121 1122 /* check if a monitor detected a delegation conflict */ 1123 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1124 /* mark as wouldblock so response is dropped */ 1125 curthread->t_flag |= T_WOULDBLOCK; 1126 else 1127 ns->ns_status = puterrno(error); 1128 1129 } 1130 1131 struct rfs_async_write { 1132 struct nfswriteargs *wa; 1133 struct nfsattrstat *ns; 1134 struct svc_req *req; 1135 cred_t *cr; 1136 kthread_t *thread; 1137 struct rfs_async_write *list; 1138 }; 1139 1140 struct rfs_async_write_list { 1141 fhandle_t *fhp; 1142 kcondvar_t cv; 1143 struct rfs_async_write *list; 1144 struct rfs_async_write_list *next; 1145 }; 1146 1147 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1148 static kmutex_t rfs_async_write_lock; 1149 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1150 1151 #define MAXCLIOVECS 42 1152 #define RFSWRITE_INITVAL (enum nfsstat) -1 1153 1154 #ifdef DEBUG 1155 static int rfs_write_hits = 0; 1156 static int rfs_write_misses = 0; 1157 #endif 1158 1159 /* 1160 * Write data to file. 1161 * Returns attributes of a file after writing some data to it. 1162 */ 1163 void 1164 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1165 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1166 { 1167 int error; 1168 vnode_t *vp; 1169 rlim64_t rlimit; 1170 struct vattr va; 1171 struct uio uio; 1172 struct rfs_async_write_list *lp; 1173 struct rfs_async_write_list *nlp; 1174 struct rfs_async_write *rp; 1175 struct rfs_async_write *nrp; 1176 struct rfs_async_write *trp; 1177 struct rfs_async_write *lrp; 1178 int data_written; 1179 int iovcnt; 1180 mblk_t *m; 1181 struct iovec *iovp; 1182 struct iovec *niovp; 1183 struct iovec iov[MAXCLIOVECS]; 1184 int count; 1185 int rcount; 1186 uint_t off; 1187 uint_t len; 1188 struct rfs_async_write nrpsp; 1189 struct rfs_async_write_list nlpsp; 1190 ushort_t t_flag; 1191 cred_t *savecred; 1192 int in_crit = 0; 1193 caller_context_t ct; 1194 1195 if (!rfs_write_async) { 1196 rfs_write_sync(wa, ns, exi, req, cr); 1197 return; 1198 } 1199 1200 /* 1201 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1202 * is considered an OK. 1203 */ 1204 ns->ns_status = RFSWRITE_INITVAL; 1205 1206 nrp = &nrpsp; 1207 nrp->wa = wa; 1208 nrp->ns = ns; 1209 nrp->req = req; 1210 nrp->cr = cr; 1211 nrp->thread = curthread; 1212 1213 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 1214 1215 /* 1216 * Look to see if there is already a cluster started 1217 * for this file. 1218 */ 1219 mutex_enter(&rfs_async_write_lock); 1220 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { 1221 if (bcmp(&wa->wa_fhandle, lp->fhp, 1222 sizeof (fhandle_t)) == 0) 1223 break; 1224 } 1225 1226 /* 1227 * If lp is non-NULL, then there is already a cluster 1228 * started. We need to place ourselves in the cluster 1229 * list in the right place as determined by starting 1230 * offset. Conflicts with non-blocking mandatory locked 1231 * regions will be checked when the cluster is processed. 1232 */ 1233 if (lp != NULL) { 1234 rp = lp->list; 1235 trp = NULL; 1236 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1237 trp = rp; 1238 rp = rp->list; 1239 } 1240 nrp->list = rp; 1241 if (trp == NULL) 1242 lp->list = nrp; 1243 else 1244 trp->list = nrp; 1245 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1246 cv_wait(&lp->cv, &rfs_async_write_lock); 1247 mutex_exit(&rfs_async_write_lock); 1248 1249 return; 1250 } 1251 1252 /* 1253 * No cluster started yet, start one and add ourselves 1254 * to the list of clusters. 1255 */ 1256 nrp->list = NULL; 1257 1258 nlp = &nlpsp; 1259 nlp->fhp = &wa->wa_fhandle; 1260 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1261 nlp->list = nrp; 1262 nlp->next = NULL; 1263 1264 if (rfs_async_write_head == NULL) { 1265 rfs_async_write_head = nlp; 1266 } else { 1267 lp = rfs_async_write_head; 1268 while (lp->next != NULL) 1269 lp = lp->next; 1270 lp->next = nlp; 1271 } 1272 mutex_exit(&rfs_async_write_lock); 1273 1274 /* 1275 * Convert the file handle common to all of the requests 1276 * in this cluster to a vnode. 1277 */ 1278 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1279 if (vp == NULL) { 1280 mutex_enter(&rfs_async_write_lock); 1281 if (rfs_async_write_head == nlp) 1282 rfs_async_write_head = nlp->next; 1283 else { 1284 lp = rfs_async_write_head; 1285 while (lp->next != nlp) 1286 lp = lp->next; 1287 lp->next = nlp->next; 1288 } 1289 t_flag = curthread->t_flag & T_WOULDBLOCK; 1290 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1291 rp->ns->ns_status = NFSERR_STALE; 1292 rp->thread->t_flag |= t_flag; 1293 } 1294 cv_broadcast(&nlp->cv); 1295 mutex_exit(&rfs_async_write_lock); 1296 1297 return; 1298 } 1299 1300 /* 1301 * Can only write regular files. Attempts to write any 1302 * other file types fail with EISDIR. 1303 */ 1304 if (vp->v_type != VREG) { 1305 VN_RELE(vp); 1306 mutex_enter(&rfs_async_write_lock); 1307 if (rfs_async_write_head == nlp) 1308 rfs_async_write_head = nlp->next; 1309 else { 1310 lp = rfs_async_write_head; 1311 while (lp->next != nlp) 1312 lp = lp->next; 1313 lp->next = nlp->next; 1314 } 1315 t_flag = curthread->t_flag & T_WOULDBLOCK; 1316 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1317 rp->ns->ns_status = NFSERR_ISDIR; 1318 rp->thread->t_flag |= t_flag; 1319 } 1320 cv_broadcast(&nlp->cv); 1321 mutex_exit(&rfs_async_write_lock); 1322 1323 return; 1324 } 1325 1326 /* 1327 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1328 * deadlock with ufs. 1329 */ 1330 if (nbl_need_check(vp)) { 1331 nbl_start_crit(vp, RW_READER); 1332 in_crit = 1; 1333 } 1334 1335 ct.cc_sysid = 0; 1336 ct.cc_pid = 0; 1337 ct.cc_caller_id = nfs2_srv_caller_id; 1338 ct.cc_flags = CC_DONTBLOCK; 1339 1340 /* 1341 * Lock the file for writing. This operation provides 1342 * the delay which allows clusters to grow. 1343 */ 1344 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1345 1346 /* check if a monitor detected a delegation conflict */ 1347 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1348 if (in_crit) 1349 nbl_end_crit(vp); 1350 VN_RELE(vp); 1351 /* mark as wouldblock so response is dropped */ 1352 curthread->t_flag |= T_WOULDBLOCK; 1353 mutex_enter(&rfs_async_write_lock); 1354 if (rfs_async_write_head == nlp) 1355 rfs_async_write_head = nlp->next; 1356 else { 1357 lp = rfs_async_write_head; 1358 while (lp->next != nlp) 1359 lp = lp->next; 1360 lp->next = nlp->next; 1361 } 1362 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1363 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1364 rp->ns->ns_status = puterrno(error); 1365 rp->thread->t_flag |= T_WOULDBLOCK; 1366 } 1367 } 1368 cv_broadcast(&nlp->cv); 1369 mutex_exit(&rfs_async_write_lock); 1370 1371 return; 1372 } 1373 1374 /* 1375 * Disconnect this cluster from the list of clusters. 1376 * The cluster that is being dealt with must be fixed 1377 * in size after this point, so there is no reason 1378 * to leave it on the list so that new requests can 1379 * find it. 1380 * 1381 * The algorithm is that the first write request will 1382 * create a cluster, convert the file handle to a 1383 * vnode pointer, and then lock the file for writing. 1384 * This request is not likely to be clustered with 1385 * any others. However, the next request will create 1386 * a new cluster and be blocked in VOP_RWLOCK while 1387 * the first request is being processed. This delay 1388 * will allow more requests to be clustered in this 1389 * second cluster. 1390 */ 1391 mutex_enter(&rfs_async_write_lock); 1392 if (rfs_async_write_head == nlp) 1393 rfs_async_write_head = nlp->next; 1394 else { 1395 lp = rfs_async_write_head; 1396 while (lp->next != nlp) 1397 lp = lp->next; 1398 lp->next = nlp->next; 1399 } 1400 mutex_exit(&rfs_async_write_lock); 1401 1402 /* 1403 * Step through the list of requests in this cluster. 1404 * We need to check permissions to make sure that all 1405 * of the requests have sufficient permission to write 1406 * the file. A cluster can be composed of requests 1407 * from different clients and different users on each 1408 * client. 1409 * 1410 * As a side effect, we also calculate the size of the 1411 * byte range that this cluster encompasses. 1412 */ 1413 rp = nlp->list; 1414 off = rp->wa->wa_offset; 1415 len = (uint_t)0; 1416 do { 1417 if (rdonly(exi, rp->req)) { 1418 rp->ns->ns_status = NFSERR_ROFS; 1419 t_flag = curthread->t_flag & T_WOULDBLOCK; 1420 rp->thread->t_flag |= t_flag; 1421 continue; 1422 } 1423 1424 va.va_mask = AT_UID|AT_MODE; 1425 1426 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1427 1428 if (!error) { 1429 if (crgetuid(rp->cr) != va.va_uid) { 1430 /* 1431 * This is a kludge to allow writes of files 1432 * created with read only permission. The 1433 * owner of the file is always allowed to 1434 * write it. 1435 */ 1436 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1437 } 1438 if (!error && MANDLOCK(vp, va.va_mode)) 1439 error = EACCES; 1440 } 1441 1442 /* 1443 * Check for a conflict with a nbmand-locked region. 1444 */ 1445 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1446 rp->wa->wa_count, 0, NULL)) { 1447 error = EACCES; 1448 } 1449 1450 if (error) { 1451 rp->ns->ns_status = puterrno(error); 1452 t_flag = curthread->t_flag & T_WOULDBLOCK; 1453 rp->thread->t_flag |= t_flag; 1454 continue; 1455 } 1456 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1457 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1458 } while ((rp = rp->list) != NULL); 1459 1460 /* 1461 * Step through the cluster attempting to gather as many 1462 * requests which are contiguous as possible. These 1463 * contiguous requests are handled via one call to VOP_WRITE 1464 * instead of different calls to VOP_WRITE. We also keep 1465 * track of the fact that any data was written. 1466 */ 1467 rp = nlp->list; 1468 data_written = 0; 1469 do { 1470 /* 1471 * Skip any requests which are already marked as having an 1472 * error. 1473 */ 1474 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1475 rp = rp->list; 1476 continue; 1477 } 1478 1479 /* 1480 * Count the number of iovec's which are required 1481 * to handle this set of requests. One iovec is 1482 * needed for each data buffer, whether addressed 1483 * by wa_data or by the b_rptr pointers in the 1484 * mblk chains. 1485 */ 1486 iovcnt = 0; 1487 lrp = rp; 1488 for (;;) { 1489 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1490 iovcnt++; 1491 else { 1492 m = lrp->wa->wa_mblk; 1493 while (m != NULL) { 1494 iovcnt++; 1495 m = m->b_cont; 1496 } 1497 } 1498 if (lrp->list == NULL || 1499 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1500 lrp->wa->wa_offset + lrp->wa->wa_count != 1501 lrp->list->wa->wa_offset) { 1502 lrp = lrp->list; 1503 break; 1504 } 1505 lrp = lrp->list; 1506 } 1507 1508 if (iovcnt <= MAXCLIOVECS) { 1509 #ifdef DEBUG 1510 rfs_write_hits++; 1511 #endif 1512 niovp = iov; 1513 } else { 1514 #ifdef DEBUG 1515 rfs_write_misses++; 1516 #endif 1517 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1518 } 1519 /* 1520 * Put together the scatter/gather iovecs. 1521 */ 1522 iovp = niovp; 1523 trp = rp; 1524 count = 0; 1525 do { 1526 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1527 if (trp->wa->wa_rlist) { 1528 iovp->iov_base = 1529 (char *)((trp->wa->wa_rlist)-> 1530 u.c_daddr3); 1531 iovp->iov_len = trp->wa->wa_count; 1532 } else { 1533 iovp->iov_base = trp->wa->wa_data; 1534 iovp->iov_len = trp->wa->wa_count; 1535 } 1536 iovp++; 1537 } else { 1538 m = trp->wa->wa_mblk; 1539 rcount = trp->wa->wa_count; 1540 while (m != NULL) { 1541 iovp->iov_base = (caddr_t)m->b_rptr; 1542 iovp->iov_len = (m->b_wptr - m->b_rptr); 1543 rcount -= iovp->iov_len; 1544 if (rcount < 0) 1545 iovp->iov_len += rcount; 1546 iovp++; 1547 if (rcount <= 0) 1548 break; 1549 m = m->b_cont; 1550 } 1551 } 1552 count += trp->wa->wa_count; 1553 trp = trp->list; 1554 } while (trp != lrp); 1555 1556 uio.uio_iov = niovp; 1557 uio.uio_iovcnt = iovcnt; 1558 uio.uio_segflg = UIO_SYSSPACE; 1559 uio.uio_extflg = UIO_COPY_DEFAULT; 1560 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1561 uio.uio_resid = count; 1562 /* 1563 * The limit is checked on the client. We 1564 * should allow any size writes here. 1565 */ 1566 uio.uio_llimit = curproc->p_fsz_ctl; 1567 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1568 if (rlimit < (rlim64_t)uio.uio_resid) 1569 uio.uio_resid = (uint_t)rlimit; 1570 1571 /* 1572 * For now we assume no append mode. 1573 */ 1574 1575 /* 1576 * We're changing creds because VM may fault 1577 * and we need the cred of the current 1578 * thread to be used if quota * checking is 1579 * enabled. 1580 */ 1581 savecred = curthread->t_cred; 1582 curthread->t_cred = cr; 1583 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1584 curthread->t_cred = savecred; 1585 1586 /* check if a monitor detected a delegation conflict */ 1587 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1588 /* mark as wouldblock so response is dropped */ 1589 curthread->t_flag |= T_WOULDBLOCK; 1590 1591 if (niovp != iov) 1592 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1593 1594 if (!error) { 1595 data_written = 1; 1596 /* 1597 * Get attributes again so we send the latest mod 1598 * time to the client side for his cache. 1599 */ 1600 va.va_mask = AT_ALL; /* now we want everything */ 1601 1602 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1603 1604 if (!error) 1605 acl_perm(vp, exi, &va, rp->cr); 1606 } 1607 1608 /* 1609 * Fill in the status responses for each request 1610 * which was just handled. Also, copy the latest 1611 * attributes in to the attribute responses if 1612 * appropriate. 1613 */ 1614 t_flag = curthread->t_flag & T_WOULDBLOCK; 1615 do { 1616 rp->thread->t_flag |= t_flag; 1617 /* check for overflows */ 1618 if (!error) { 1619 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1620 } 1621 rp->ns->ns_status = puterrno(error); 1622 rp = rp->list; 1623 } while (rp != lrp); 1624 } while (rp != NULL); 1625 1626 /* 1627 * If any data was written at all, then we need to flush 1628 * the data and metadata to stable storage. 1629 */ 1630 if (data_written) { 1631 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1632 1633 if (!error) { 1634 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1635 } 1636 } 1637 1638 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1639 1640 if (in_crit) 1641 nbl_end_crit(vp); 1642 VN_RELE(vp); 1643 1644 t_flag = curthread->t_flag & T_WOULDBLOCK; 1645 mutex_enter(&rfs_async_write_lock); 1646 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1647 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1648 rp->ns->ns_status = puterrno(error); 1649 rp->thread->t_flag |= t_flag; 1650 } 1651 } 1652 cv_broadcast(&nlp->cv); 1653 mutex_exit(&rfs_async_write_lock); 1654 1655 } 1656 1657 void * 1658 rfs_write_getfh(struct nfswriteargs *wa) 1659 { 1660 return (&wa->wa_fhandle); 1661 } 1662 1663 /* 1664 * Create a file. 1665 * Creates a file with given attributes and returns those attributes 1666 * and an fhandle for the new file. 1667 */ 1668 void 1669 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1670 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1671 { 1672 int error; 1673 int lookuperr; 1674 int in_crit = 0; 1675 struct vattr va; 1676 vnode_t *vp; 1677 vnode_t *realvp; 1678 vnode_t *dvp; 1679 char *name = args->ca_da.da_name; 1680 vnode_t *tvp = NULL; 1681 int mode; 1682 int lookup_ok; 1683 bool_t trunc; 1684 struct sockaddr *ca; 1685 1686 /* 1687 * Disallow NULL paths 1688 */ 1689 if (name == NULL || *name == '\0') { 1690 dr->dr_status = NFSERR_ACCES; 1691 return; 1692 } 1693 1694 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1695 if (dvp == NULL) { 1696 dr->dr_status = NFSERR_STALE; 1697 return; 1698 } 1699 1700 error = sattr_to_vattr(args->ca_sa, &va); 1701 if (error) { 1702 dr->dr_status = puterrno(error); 1703 return; 1704 } 1705 1706 /* 1707 * Must specify the mode. 1708 */ 1709 if (!(va.va_mask & AT_MODE)) { 1710 VN_RELE(dvp); 1711 dr->dr_status = NFSERR_INVAL; 1712 return; 1713 } 1714 1715 /* 1716 * This is a completely gross hack to make mknod 1717 * work over the wire until we can wack the protocol 1718 */ 1719 if ((va.va_mode & IFMT) == IFCHR) { 1720 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1721 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1722 else { 1723 va.va_type = VCHR; 1724 /* 1725 * uncompress the received dev_t 1726 * if the top half is zero indicating a request 1727 * from an `older style' OS. 1728 */ 1729 if ((va.va_size & 0xffff0000) == 0) 1730 va.va_rdev = nfsv2_expdev(va.va_size); 1731 else 1732 va.va_rdev = (dev_t)va.va_size; 1733 } 1734 va.va_mask &= ~AT_SIZE; 1735 } else if ((va.va_mode & IFMT) == IFBLK) { 1736 va.va_type = VBLK; 1737 /* 1738 * uncompress the received dev_t 1739 * if the top half is zero indicating a request 1740 * from an `older style' OS. 1741 */ 1742 if ((va.va_size & 0xffff0000) == 0) 1743 va.va_rdev = nfsv2_expdev(va.va_size); 1744 else 1745 va.va_rdev = (dev_t)va.va_size; 1746 va.va_mask &= ~AT_SIZE; 1747 } else if ((va.va_mode & IFMT) == IFSOCK) { 1748 va.va_type = VSOCK; 1749 } else { 1750 va.va_type = VREG; 1751 } 1752 va.va_mode &= ~IFMT; 1753 va.va_mask |= AT_TYPE; 1754 1755 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1756 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1757 MAXPATHLEN); 1758 if (name == NULL) { 1759 dr->dr_status = puterrno(EINVAL); 1760 return; 1761 } 1762 1763 /* 1764 * Why was the choice made to use VWRITE as the mode to the 1765 * call to VOP_CREATE ? This results in a bug. When a client 1766 * opens a file that already exists and is RDONLY, the second 1767 * open fails with an EACESS because of the mode. 1768 * bug ID 1054648. 1769 */ 1770 lookup_ok = 0; 1771 mode = VWRITE; 1772 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1773 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1774 NULL, NULL, NULL); 1775 if (!error) { 1776 struct vattr at; 1777 1778 lookup_ok = 1; 1779 at.va_mask = AT_MODE; 1780 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1781 if (!error) 1782 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1783 VN_RELE(tvp); 1784 tvp = NULL; 1785 } 1786 } 1787 1788 if (!lookup_ok) { 1789 if (rdonly(exi, req)) { 1790 error = EROFS; 1791 } else if (va.va_type != VREG && va.va_type != VFIFO && 1792 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1793 error = EPERM; 1794 } else { 1795 error = 0; 1796 } 1797 } 1798 1799 /* 1800 * If file size is being modified on an already existing file 1801 * make sure that there are no conflicting non-blocking mandatory 1802 * locks in the region being manipulated. Return EACCES if there 1803 * are conflicting locks. 1804 */ 1805 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1806 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1807 NULL, NULL, NULL); 1808 1809 if (!lookuperr && 1810 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1811 VN_RELE(tvp); 1812 curthread->t_flag |= T_WOULDBLOCK; 1813 goto out; 1814 } 1815 1816 if (!lookuperr && nbl_need_check(tvp)) { 1817 /* 1818 * The file exists. Now check if it has any 1819 * conflicting non-blocking mandatory locks 1820 * in the region being changed. 1821 */ 1822 struct vattr bva; 1823 u_offset_t offset; 1824 ssize_t length; 1825 1826 nbl_start_crit(tvp, RW_READER); 1827 in_crit = 1; 1828 1829 bva.va_mask = AT_SIZE; 1830 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1831 if (!error) { 1832 if (va.va_size < bva.va_size) { 1833 offset = va.va_size; 1834 length = bva.va_size - va.va_size; 1835 } else { 1836 offset = bva.va_size; 1837 length = va.va_size - bva.va_size; 1838 } 1839 if (length) { 1840 if (nbl_conflict(tvp, NBL_WRITE, 1841 offset, length, 0, NULL)) { 1842 error = EACCES; 1843 } 1844 } 1845 } 1846 if (error) { 1847 nbl_end_crit(tvp); 1848 VN_RELE(tvp); 1849 in_crit = 0; 1850 } 1851 } else if (tvp != NULL) { 1852 VN_RELE(tvp); 1853 } 1854 } 1855 1856 if (!error) { 1857 /* 1858 * If filesystem is shared with nosuid the remove any 1859 * setuid/setgid bits on create. 1860 */ 1861 if (va.va_type == VREG && 1862 exi->exi_export.ex_flags & EX_NOSUID) 1863 va.va_mode &= ~(VSUID | VSGID); 1864 1865 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1866 NULL, NULL); 1867 1868 if (!error) { 1869 1870 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 1871 trunc = TRUE; 1872 else 1873 trunc = FALSE; 1874 1875 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1876 VN_RELE(vp); 1877 curthread->t_flag |= T_WOULDBLOCK; 1878 goto out; 1879 } 1880 va.va_mask = AT_ALL; 1881 1882 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1883 1884 /* check for overflows */ 1885 if (!error) { 1886 acl_perm(vp, exi, &va, cr); 1887 error = vattr_to_nattr(&va, &dr->dr_attr); 1888 if (!error) { 1889 error = makefh(&dr->dr_fhandle, vp, 1890 exi); 1891 } 1892 } 1893 /* 1894 * Force modified metadata out to stable storage. 1895 * 1896 * if a underlying vp exists, pass it to VOP_FSYNC 1897 */ 1898 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1899 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 1900 else 1901 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1902 VN_RELE(vp); 1903 } 1904 1905 if (in_crit) { 1906 nbl_end_crit(tvp); 1907 VN_RELE(tvp); 1908 } 1909 } 1910 1911 /* 1912 * Force modified data and metadata out to stable storage. 1913 */ 1914 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1915 1916 out: 1917 1918 VN_RELE(dvp); 1919 1920 dr->dr_status = puterrno(error); 1921 1922 if (name != args->ca_da.da_name) 1923 kmem_free(name, MAXPATHLEN); 1924 } 1925 void * 1926 rfs_create_getfh(struct nfscreatargs *args) 1927 { 1928 return (args->ca_da.da_fhandle); 1929 } 1930 1931 /* 1932 * Remove a file. 1933 * Remove named file from parent directory. 1934 */ 1935 void 1936 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 1937 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1938 { 1939 int error = 0; 1940 vnode_t *vp; 1941 vnode_t *targvp; 1942 int in_crit = 0; 1943 1944 /* 1945 * Disallow NULL paths 1946 */ 1947 if (da->da_name == NULL || *da->da_name == '\0') { 1948 *status = NFSERR_ACCES; 1949 return; 1950 } 1951 1952 vp = nfs_fhtovp(da->da_fhandle, exi); 1953 if (vp == NULL) { 1954 *status = NFSERR_STALE; 1955 return; 1956 } 1957 1958 if (rdonly(exi, req)) { 1959 VN_RELE(vp); 1960 *status = NFSERR_ROFS; 1961 return; 1962 } 1963 1964 /* 1965 * Check for a conflict with a non-blocking mandatory share reservation. 1966 */ 1967 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 1968 NULL, cr, NULL, NULL, NULL); 1969 if (error != 0) { 1970 VN_RELE(vp); 1971 *status = puterrno(error); 1972 return; 1973 } 1974 1975 /* 1976 * If the file is delegated to an v4 client, then initiate 1977 * recall and drop this request (by setting T_WOULDBLOCK). 1978 * The client will eventually re-transmit the request and 1979 * (hopefully), by then, the v4 client will have returned 1980 * the delegation. 1981 */ 1982 1983 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 1984 VN_RELE(vp); 1985 VN_RELE(targvp); 1986 curthread->t_flag |= T_WOULDBLOCK; 1987 return; 1988 } 1989 1990 if (nbl_need_check(targvp)) { 1991 nbl_start_crit(targvp, RW_READER); 1992 in_crit = 1; 1993 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 1994 error = EACCES; 1995 goto out; 1996 } 1997 } 1998 1999 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 2000 2001 /* 2002 * Force modified data and metadata out to stable storage. 2003 */ 2004 (void) VOP_FSYNC(vp, 0, cr, NULL); 2005 2006 out: 2007 if (in_crit) 2008 nbl_end_crit(targvp); 2009 VN_RELE(targvp); 2010 VN_RELE(vp); 2011 2012 *status = puterrno(error); 2013 2014 } 2015 2016 void * 2017 rfs_remove_getfh(struct nfsdiropargs *da) 2018 { 2019 return (da->da_fhandle); 2020 } 2021 2022 /* 2023 * rename a file 2024 * Give a file (from) a new name (to). 2025 */ 2026 void 2027 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2028 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2029 { 2030 int error = 0; 2031 vnode_t *fromvp; 2032 vnode_t *tovp; 2033 struct exportinfo *to_exi; 2034 fhandle_t *fh; 2035 vnode_t *srcvp; 2036 vnode_t *targvp; 2037 int in_crit = 0; 2038 2039 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2040 if (fromvp == NULL) { 2041 *status = NFSERR_STALE; 2042 return; 2043 } 2044 2045 fh = args->rna_to.da_fhandle; 2046 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2047 if (to_exi == NULL) { 2048 VN_RELE(fromvp); 2049 *status = NFSERR_ACCES; 2050 return; 2051 } 2052 exi_rele(to_exi); 2053 2054 if (to_exi != exi) { 2055 VN_RELE(fromvp); 2056 *status = NFSERR_XDEV; 2057 return; 2058 } 2059 2060 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2061 if (tovp == NULL) { 2062 VN_RELE(fromvp); 2063 *status = NFSERR_STALE; 2064 return; 2065 } 2066 2067 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2068 VN_RELE(tovp); 2069 VN_RELE(fromvp); 2070 *status = NFSERR_NOTDIR; 2071 return; 2072 } 2073 2074 /* 2075 * Disallow NULL paths 2076 */ 2077 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2078 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2079 VN_RELE(tovp); 2080 VN_RELE(fromvp); 2081 *status = NFSERR_ACCES; 2082 return; 2083 } 2084 2085 if (rdonly(exi, req)) { 2086 VN_RELE(tovp); 2087 VN_RELE(fromvp); 2088 *status = NFSERR_ROFS; 2089 return; 2090 } 2091 2092 /* 2093 * Check for a conflict with a non-blocking mandatory share reservation. 2094 */ 2095 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2096 NULL, cr, NULL, NULL, NULL); 2097 if (error != 0) { 2098 VN_RELE(tovp); 2099 VN_RELE(fromvp); 2100 *status = puterrno(error); 2101 return; 2102 } 2103 2104 /* Check for delegations on the source file */ 2105 2106 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2107 VN_RELE(tovp); 2108 VN_RELE(fromvp); 2109 VN_RELE(srcvp); 2110 curthread->t_flag |= T_WOULDBLOCK; 2111 return; 2112 } 2113 2114 /* Check for delegation on the file being renamed over, if it exists */ 2115 2116 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && 2117 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2118 NULL, NULL, NULL) == 0) { 2119 2120 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2121 VN_RELE(tovp); 2122 VN_RELE(fromvp); 2123 VN_RELE(srcvp); 2124 VN_RELE(targvp); 2125 curthread->t_flag |= T_WOULDBLOCK; 2126 return; 2127 } 2128 VN_RELE(targvp); 2129 } 2130 2131 2132 if (nbl_need_check(srcvp)) { 2133 nbl_start_crit(srcvp, RW_READER); 2134 in_crit = 1; 2135 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2136 error = EACCES; 2137 goto out; 2138 } 2139 } 2140 2141 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2142 tovp, args->rna_to.da_name, cr, NULL, 0); 2143 2144 if (error == 0) 2145 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2146 strlen(args->rna_to.da_name)); 2147 2148 /* 2149 * Force modified data and metadata out to stable storage. 2150 */ 2151 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2152 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2153 2154 out: 2155 if (in_crit) 2156 nbl_end_crit(srcvp); 2157 VN_RELE(srcvp); 2158 VN_RELE(tovp); 2159 VN_RELE(fromvp); 2160 2161 *status = puterrno(error); 2162 2163 } 2164 void * 2165 rfs_rename_getfh(struct nfsrnmargs *args) 2166 { 2167 return (args->rna_from.da_fhandle); 2168 } 2169 2170 /* 2171 * Link to a file. 2172 * Create a file (to) which is a hard link to the given file (from). 2173 */ 2174 void 2175 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2176 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2177 { 2178 int error; 2179 vnode_t *fromvp; 2180 vnode_t *tovp; 2181 struct exportinfo *to_exi; 2182 fhandle_t *fh; 2183 2184 fromvp = nfs_fhtovp(args->la_from, exi); 2185 if (fromvp == NULL) { 2186 *status = NFSERR_STALE; 2187 return; 2188 } 2189 2190 fh = args->la_to.da_fhandle; 2191 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2192 if (to_exi == NULL) { 2193 VN_RELE(fromvp); 2194 *status = NFSERR_ACCES; 2195 return; 2196 } 2197 exi_rele(to_exi); 2198 2199 if (to_exi != exi) { 2200 VN_RELE(fromvp); 2201 *status = NFSERR_XDEV; 2202 return; 2203 } 2204 2205 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2206 if (tovp == NULL) { 2207 VN_RELE(fromvp); 2208 *status = NFSERR_STALE; 2209 return; 2210 } 2211 2212 if (tovp->v_type != VDIR) { 2213 VN_RELE(tovp); 2214 VN_RELE(fromvp); 2215 *status = NFSERR_NOTDIR; 2216 return; 2217 } 2218 /* 2219 * Disallow NULL paths 2220 */ 2221 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2222 VN_RELE(tovp); 2223 VN_RELE(fromvp); 2224 *status = NFSERR_ACCES; 2225 return; 2226 } 2227 2228 if (rdonly(exi, req)) { 2229 VN_RELE(tovp); 2230 VN_RELE(fromvp); 2231 *status = NFSERR_ROFS; 2232 return; 2233 } 2234 2235 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2236 2237 /* 2238 * Force modified data and metadata out to stable storage. 2239 */ 2240 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2241 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2242 2243 VN_RELE(tovp); 2244 VN_RELE(fromvp); 2245 2246 *status = puterrno(error); 2247 2248 } 2249 void * 2250 rfs_link_getfh(struct nfslinkargs *args) 2251 { 2252 return (args->la_from); 2253 } 2254 2255 /* 2256 * Symbolicly link to a file. 2257 * Create a file (to) with the given attributes which is a symbolic link 2258 * to the given path name (to). 2259 */ 2260 void 2261 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2262 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2263 { 2264 int error; 2265 struct vattr va; 2266 vnode_t *vp; 2267 vnode_t *svp; 2268 int lerror; 2269 struct sockaddr *ca; 2270 char *name = NULL; 2271 2272 /* 2273 * Disallow NULL paths 2274 */ 2275 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2276 *status = NFSERR_ACCES; 2277 return; 2278 } 2279 2280 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2281 if (vp == NULL) { 2282 *status = NFSERR_STALE; 2283 return; 2284 } 2285 2286 if (rdonly(exi, req)) { 2287 VN_RELE(vp); 2288 *status = NFSERR_ROFS; 2289 return; 2290 } 2291 2292 error = sattr_to_vattr(args->sla_sa, &va); 2293 if (error) { 2294 VN_RELE(vp); 2295 *status = puterrno(error); 2296 return; 2297 } 2298 2299 if (!(va.va_mask & AT_MODE)) { 2300 VN_RELE(vp); 2301 *status = NFSERR_INVAL; 2302 return; 2303 } 2304 2305 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2306 name = nfscmd_convname(ca, exi, args->sla_tnm, 2307 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2308 2309 if (name == NULL) { 2310 *status = NFSERR_ACCES; 2311 return; 2312 } 2313 2314 va.va_type = VLNK; 2315 va.va_mask |= AT_TYPE; 2316 2317 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2318 2319 /* 2320 * Force new data and metadata out to stable storage. 2321 */ 2322 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2323 NULL, cr, NULL, NULL, NULL); 2324 2325 if (!lerror) { 2326 (void) VOP_FSYNC(svp, 0, cr, NULL); 2327 VN_RELE(svp); 2328 } 2329 2330 /* 2331 * Force modified data and metadata out to stable storage. 2332 */ 2333 (void) VOP_FSYNC(vp, 0, cr, NULL); 2334 2335 VN_RELE(vp); 2336 2337 *status = puterrno(error); 2338 if (name != args->sla_tnm) 2339 kmem_free(name, MAXPATHLEN); 2340 2341 } 2342 void * 2343 rfs_symlink_getfh(struct nfsslargs *args) 2344 { 2345 return (args->sla_from.da_fhandle); 2346 } 2347 2348 /* 2349 * Make a directory. 2350 * Create a directory with the given name, parent directory, and attributes. 2351 * Returns a file handle and attributes for the new directory. 2352 */ 2353 void 2354 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2355 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2356 { 2357 int error; 2358 struct vattr va; 2359 vnode_t *dvp = NULL; 2360 vnode_t *vp; 2361 char *name = args->ca_da.da_name; 2362 2363 /* 2364 * Disallow NULL paths 2365 */ 2366 if (name == NULL || *name == '\0') { 2367 dr->dr_status = NFSERR_ACCES; 2368 return; 2369 } 2370 2371 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2372 if (vp == NULL) { 2373 dr->dr_status = NFSERR_STALE; 2374 return; 2375 } 2376 2377 if (rdonly(exi, req)) { 2378 VN_RELE(vp); 2379 dr->dr_status = NFSERR_ROFS; 2380 return; 2381 } 2382 2383 error = sattr_to_vattr(args->ca_sa, &va); 2384 if (error) { 2385 VN_RELE(vp); 2386 dr->dr_status = puterrno(error); 2387 return; 2388 } 2389 2390 if (!(va.va_mask & AT_MODE)) { 2391 VN_RELE(vp); 2392 dr->dr_status = NFSERR_INVAL; 2393 return; 2394 } 2395 2396 va.va_type = VDIR; 2397 va.va_mask |= AT_TYPE; 2398 2399 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2400 2401 if (!error) { 2402 /* 2403 * Attribtutes of the newly created directory should 2404 * be returned to the client. 2405 */ 2406 va.va_mask = AT_ALL; /* We want everything */ 2407 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2408 2409 /* check for overflows */ 2410 if (!error) { 2411 acl_perm(vp, exi, &va, cr); 2412 error = vattr_to_nattr(&va, &dr->dr_attr); 2413 if (!error) { 2414 error = makefh(&dr->dr_fhandle, dvp, exi); 2415 } 2416 } 2417 /* 2418 * Force new data and metadata out to stable storage. 2419 */ 2420 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2421 VN_RELE(dvp); 2422 } 2423 2424 /* 2425 * Force modified data and metadata out to stable storage. 2426 */ 2427 (void) VOP_FSYNC(vp, 0, cr, NULL); 2428 2429 VN_RELE(vp); 2430 2431 dr->dr_status = puterrno(error); 2432 2433 } 2434 void * 2435 rfs_mkdir_getfh(struct nfscreatargs *args) 2436 { 2437 return (args->ca_da.da_fhandle); 2438 } 2439 2440 /* 2441 * Remove a directory. 2442 * Remove the given directory name from the given parent directory. 2443 */ 2444 void 2445 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2446 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2447 { 2448 int error; 2449 vnode_t *vp; 2450 2451 2452 /* 2453 * Disallow NULL paths 2454 */ 2455 if (da->da_name == NULL || *da->da_name == '\0') { 2456 *status = NFSERR_ACCES; 2457 return; 2458 } 2459 2460 vp = nfs_fhtovp(da->da_fhandle, exi); 2461 if (vp == NULL) { 2462 *status = NFSERR_STALE; 2463 return; 2464 } 2465 2466 if (rdonly(exi, req)) { 2467 VN_RELE(vp); 2468 *status = NFSERR_ROFS; 2469 return; 2470 } 2471 2472 /* 2473 * VOP_RMDIR now takes a new third argument (the current 2474 * directory of the process). That's because someone 2475 * wants to return EINVAL if one tries to remove ".". 2476 * Of course, NFS servers have no idea what their 2477 * clients' current directories are. We fake it by 2478 * supplying a vnode known to exist and illegal to 2479 * remove. 2480 */ 2481 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); 2482 2483 /* 2484 * Force modified data and metadata out to stable storage. 2485 */ 2486 (void) VOP_FSYNC(vp, 0, cr, NULL); 2487 2488 VN_RELE(vp); 2489 2490 /* 2491 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2492 * if the directory is not empty. A System V NFS server 2493 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2494 * over the wire. 2495 */ 2496 if (error == EEXIST) 2497 *status = NFSERR_NOTEMPTY; 2498 else 2499 *status = puterrno(error); 2500 2501 } 2502 void * 2503 rfs_rmdir_getfh(struct nfsdiropargs *da) 2504 { 2505 return (da->da_fhandle); 2506 } 2507 2508 /* ARGSUSED */ 2509 void 2510 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2511 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2512 { 2513 int error; 2514 int iseof; 2515 struct iovec iov; 2516 struct uio uio; 2517 vnode_t *vp; 2518 char *ndata = NULL; 2519 struct sockaddr *ca; 2520 size_t nents; 2521 int ret; 2522 2523 vp = nfs_fhtovp(&rda->rda_fh, exi); 2524 if (vp == NULL) { 2525 rd->rd_entries = NULL; 2526 rd->rd_status = NFSERR_STALE; 2527 return; 2528 } 2529 2530 if (vp->v_type != VDIR) { 2531 VN_RELE(vp); 2532 rd->rd_entries = NULL; 2533 rd->rd_status = NFSERR_NOTDIR; 2534 return; 2535 } 2536 2537 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2538 2539 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2540 2541 if (error) { 2542 rd->rd_entries = NULL; 2543 goto bad; 2544 } 2545 2546 if (rda->rda_count == 0) { 2547 rd->rd_entries = NULL; 2548 rd->rd_size = 0; 2549 rd->rd_eof = FALSE; 2550 goto bad; 2551 } 2552 2553 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2554 2555 /* 2556 * Allocate data for entries. This will be freed by rfs_rddirfree. 2557 */ 2558 rd->rd_bufsize = (uint_t)rda->rda_count; 2559 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2560 2561 /* 2562 * Set up io vector to read directory data 2563 */ 2564 iov.iov_base = (caddr_t)rd->rd_entries; 2565 iov.iov_len = rda->rda_count; 2566 uio.uio_iov = &iov; 2567 uio.uio_iovcnt = 1; 2568 uio.uio_segflg = UIO_SYSSPACE; 2569 uio.uio_extflg = UIO_COPY_CACHED; 2570 uio.uio_loffset = (offset_t)rda->rda_offset; 2571 uio.uio_resid = rda->rda_count; 2572 2573 /* 2574 * read directory 2575 */ 2576 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2577 2578 /* 2579 * Clean up 2580 */ 2581 if (!error) { 2582 /* 2583 * set size and eof 2584 */ 2585 if (uio.uio_resid == rda->rda_count) { 2586 rd->rd_size = 0; 2587 rd->rd_eof = TRUE; 2588 } else { 2589 rd->rd_size = (uint32_t)(rda->rda_count - 2590 uio.uio_resid); 2591 rd->rd_eof = iseof ? TRUE : FALSE; 2592 } 2593 } 2594 2595 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2596 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2597 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2598 rda->rda_count, &ndata); 2599 2600 if (ret != 0) { 2601 size_t dropbytes; 2602 /* 2603 * We had to drop one or more entries in order to fit 2604 * during the character conversion. We need to patch 2605 * up the size and eof info. 2606 */ 2607 if (rd->rd_eof) 2608 rd->rd_eof = FALSE; 2609 dropbytes = nfscmd_dropped_entrysize( 2610 (struct dirent64 *)rd->rd_entries, nents, ret); 2611 rd->rd_size -= dropbytes; 2612 } 2613 if (ndata == NULL) { 2614 ndata = (char *)rd->rd_entries; 2615 } else if (ndata != (char *)rd->rd_entries) { 2616 kmem_free(rd->rd_entries, rd->rd_bufsize); 2617 rd->rd_entries = (void *)ndata; 2618 rd->rd_bufsize = rda->rda_count; 2619 } 2620 2621 bad: 2622 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2623 2624 #if 0 /* notyet */ 2625 /* 2626 * Don't do this. It causes local disk writes when just 2627 * reading the file and the overhead is deemed larger 2628 * than the benefit. 2629 */ 2630 /* 2631 * Force modified metadata out to stable storage. 2632 */ 2633 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2634 #endif 2635 2636 VN_RELE(vp); 2637 2638 rd->rd_status = puterrno(error); 2639 2640 } 2641 void * 2642 rfs_readdir_getfh(struct nfsrddirargs *rda) 2643 { 2644 return (&rda->rda_fh); 2645 } 2646 void 2647 rfs_rddirfree(struct nfsrddirres *rd) 2648 { 2649 if (rd->rd_entries != NULL) 2650 kmem_free(rd->rd_entries, rd->rd_bufsize); 2651 } 2652 2653 /* ARGSUSED */ 2654 void 2655 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2656 struct svc_req *req, cred_t *cr) 2657 { 2658 int error; 2659 struct statvfs64 sb; 2660 vnode_t *vp; 2661 2662 vp = nfs_fhtovp(fh, exi); 2663 if (vp == NULL) { 2664 fs->fs_status = NFSERR_STALE; 2665 return; 2666 } 2667 2668 error = VFS_STATVFS(vp->v_vfsp, &sb); 2669 2670 if (!error) { 2671 fs->fs_tsize = nfstsize(); 2672 fs->fs_bsize = sb.f_frsize; 2673 fs->fs_blocks = sb.f_blocks; 2674 fs->fs_bfree = sb.f_bfree; 2675 fs->fs_bavail = sb.f_bavail; 2676 } 2677 2678 VN_RELE(vp); 2679 2680 fs->fs_status = puterrno(error); 2681 2682 } 2683 void * 2684 rfs_statfs_getfh(fhandle_t *fh) 2685 { 2686 return (fh); 2687 } 2688 2689 static int 2690 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2691 { 2692 vap->va_mask = 0; 2693 2694 /* 2695 * There was a sign extension bug in some VFS based systems 2696 * which stored the mode as a short. When it would get 2697 * assigned to a u_long, no sign extension would occur. 2698 * It needed to, but this wasn't noticed because sa_mode 2699 * would then get assigned back to the short, thus ignoring 2700 * the upper 16 bits of sa_mode. 2701 * 2702 * To make this implementation work for both broken 2703 * clients and good clients, we check for both versions 2704 * of the mode. 2705 */ 2706 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2707 sa->sa_mode != (uint32_t)-1) { 2708 vap->va_mask |= AT_MODE; 2709 vap->va_mode = sa->sa_mode; 2710 } 2711 if (sa->sa_uid != (uint32_t)-1) { 2712 vap->va_mask |= AT_UID; 2713 vap->va_uid = sa->sa_uid; 2714 } 2715 if (sa->sa_gid != (uint32_t)-1) { 2716 vap->va_mask |= AT_GID; 2717 vap->va_gid = sa->sa_gid; 2718 } 2719 if (sa->sa_size != (uint32_t)-1) { 2720 vap->va_mask |= AT_SIZE; 2721 vap->va_size = sa->sa_size; 2722 } 2723 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2724 sa->sa_atime.tv_usec != (int32_t)-1) { 2725 #ifndef _LP64 2726 /* return error if time overflow */ 2727 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2728 return (EOVERFLOW); 2729 #endif 2730 vap->va_mask |= AT_ATIME; 2731 /* 2732 * nfs protocol defines times as unsigned so don't extend sign, 2733 * unless sysadmin set nfs_allow_preepoch_time. 2734 */ 2735 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2736 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2737 } 2738 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2739 sa->sa_mtime.tv_usec != (int32_t)-1) { 2740 #ifndef _LP64 2741 /* return error if time overflow */ 2742 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2743 return (EOVERFLOW); 2744 #endif 2745 vap->va_mask |= AT_MTIME; 2746 /* 2747 * nfs protocol defines times as unsigned so don't extend sign, 2748 * unless sysadmin set nfs_allow_preepoch_time. 2749 */ 2750 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2751 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2752 } 2753 return (0); 2754 } 2755 2756 static enum nfsftype vt_to_nf[] = { 2757 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2758 }; 2759 2760 /* 2761 * check the following fields for overflow: nodeid, size, and time. 2762 * There could be a problem when converting 64-bit LP64 fields 2763 * into 32-bit ones. Return an error if there is an overflow. 2764 */ 2765 int 2766 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2767 { 2768 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2769 na->na_type = vt_to_nf[vap->va_type]; 2770 2771 if (vap->va_mode == (unsigned short) -1) 2772 na->na_mode = (uint32_t)-1; 2773 else 2774 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2775 2776 if (vap->va_uid == (unsigned short)(-1)) 2777 na->na_uid = (uint32_t)(-1); 2778 else if (vap->va_uid == UID_NOBODY) 2779 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2780 else 2781 na->na_uid = vap->va_uid; 2782 2783 if (vap->va_gid == (unsigned short)(-1)) 2784 na->na_gid = (uint32_t)-1; 2785 else if (vap->va_gid == GID_NOBODY) 2786 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2787 else 2788 na->na_gid = vap->va_gid; 2789 2790 /* 2791 * Do we need to check fsid for overflow? It is 64-bit in the 2792 * vattr, but are bigger than 32 bit values supported? 2793 */ 2794 na->na_fsid = vap->va_fsid; 2795 2796 na->na_nodeid = vap->va_nodeid; 2797 2798 /* 2799 * Check to make sure that the nodeid is representable over the 2800 * wire without losing bits. 2801 */ 2802 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2803 return (EFBIG); 2804 na->na_nlink = vap->va_nlink; 2805 2806 /* 2807 * Check for big files here, instead of at the caller. See 2808 * comments in cstat for large special file explanation. 2809 */ 2810 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2811 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2812 return (EFBIG); 2813 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2814 /* UNKNOWN_SIZE | OVERFLOW */ 2815 na->na_size = MAXOFF32_T; 2816 } else 2817 na->na_size = vap->va_size; 2818 } else 2819 na->na_size = vap->va_size; 2820 2821 /* 2822 * If the vnode times overflow the 32-bit times that NFS2 2823 * uses on the wire then return an error. 2824 */ 2825 if (!NFS_VAP_TIME_OK(vap)) { 2826 return (EOVERFLOW); 2827 } 2828 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2829 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2830 2831 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2832 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2833 2834 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2835 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2836 2837 /* 2838 * If the dev_t will fit into 16 bits then compress 2839 * it, otherwise leave it alone. See comments in 2840 * nfs_client.c. 2841 */ 2842 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2843 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2844 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2845 else 2846 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2847 2848 na->na_blocks = vap->va_nblocks; 2849 na->na_blocksize = vap->va_blksize; 2850 2851 /* 2852 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2853 * over-the-wire protocols for named-pipe vnodes. It remaps the 2854 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2855 * 2856 * BUYER BEWARE: 2857 * If you are porting the NFS to a non-Sun server, you probably 2858 * don't want to include the following block of code. The 2859 * over-the-wire special file types will be changing with the 2860 * NFS Protocol Revision. 2861 */ 2862 if (vap->va_type == VFIFO) 2863 NA_SETFIFO(na); 2864 return (0); 2865 } 2866 2867 /* 2868 * acl v2 support: returns approximate permission. 2869 * default: returns minimal permission (more restrictive) 2870 * aclok: returns maximal permission (less restrictive) 2871 * This routine changes the permissions that are alaredy in *va. 2872 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 2873 * CLASS_OBJ is always the same as GROUP_OBJ entry. 2874 */ 2875 static void 2876 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 2877 { 2878 vsecattr_t vsa; 2879 int aclcnt; 2880 aclent_t *aclentp; 2881 mode_t mask_perm; 2882 mode_t grp_perm; 2883 mode_t other_perm; 2884 mode_t other_orig; 2885 int error; 2886 2887 /* dont care default acl */ 2888 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 2889 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 2890 2891 if (!error) { 2892 aclcnt = vsa.vsa_aclcnt; 2893 if (aclcnt > MIN_ACL_ENTRIES) { 2894 /* non-trivial ACL */ 2895 aclentp = vsa.vsa_aclentp; 2896 if (exi->exi_export.ex_flags & EX_ACLOK) { 2897 /* maximal permissions */ 2898 grp_perm = 0; 2899 other_perm = 0; 2900 for (; aclcnt > 0; aclcnt--, aclentp++) { 2901 switch (aclentp->a_type) { 2902 case USER_OBJ: 2903 break; 2904 case USER: 2905 grp_perm |= 2906 aclentp->a_perm << 3; 2907 other_perm |= aclentp->a_perm; 2908 break; 2909 case GROUP_OBJ: 2910 grp_perm |= 2911 aclentp->a_perm << 3; 2912 break; 2913 case GROUP: 2914 other_perm |= aclentp->a_perm; 2915 break; 2916 case OTHER_OBJ: 2917 other_orig = aclentp->a_perm; 2918 break; 2919 case CLASS_OBJ: 2920 mask_perm = aclentp->a_perm; 2921 break; 2922 default: 2923 break; 2924 } 2925 } 2926 grp_perm &= mask_perm << 3; 2927 other_perm &= mask_perm; 2928 other_perm |= other_orig; 2929 2930 } else { 2931 /* minimal permissions */ 2932 grp_perm = 070; 2933 other_perm = 07; 2934 for (; aclcnt > 0; aclcnt--, aclentp++) { 2935 switch (aclentp->a_type) { 2936 case USER_OBJ: 2937 break; 2938 case USER: 2939 case CLASS_OBJ: 2940 grp_perm &= 2941 aclentp->a_perm << 3; 2942 other_perm &= 2943 aclentp->a_perm; 2944 break; 2945 case GROUP_OBJ: 2946 grp_perm &= 2947 aclentp->a_perm << 3; 2948 break; 2949 case GROUP: 2950 other_perm &= 2951 aclentp->a_perm; 2952 break; 2953 case OTHER_OBJ: 2954 other_perm &= 2955 aclentp->a_perm; 2956 break; 2957 default: 2958 break; 2959 } 2960 } 2961 } 2962 /* copy to va */ 2963 va->va_mode &= ~077; 2964 va->va_mode |= grp_perm | other_perm; 2965 } 2966 if (vsa.vsa_aclcnt) 2967 kmem_free(vsa.vsa_aclentp, 2968 vsa.vsa_aclcnt * sizeof (aclent_t)); 2969 } 2970 } 2971 2972 void 2973 rfs_srvrinit(void) 2974 { 2975 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); 2976 nfs2_srv_caller_id = fs_new_caller_id(); 2977 } 2978 2979 void 2980 rfs_srvrfini(void) 2981 { 2982 mutex_destroy(&rfs_async_write_lock); 2983 } 2984 2985 static int 2986 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 2987 { 2988 struct clist *wcl; 2989 int wlist_len; 2990 uint32_t count = rr->rr_count; 2991 2992 wcl = ra->ra_wlist; 2993 2994 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 2995 return (FALSE); 2996 } 2997 2998 wcl = ra->ra_wlist; 2999 rr->rr_ok.rrok_wlist_len = wlist_len; 3000 rr->rr_ok.rrok_wlist = wcl; 3001 3002 return (TRUE); 3003 }