1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
27 * All rights reserved.
28 */
29
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/buf.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/uio.h>
38 #include <sys/stat.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/kstat.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/vtrace.h>
48 #include <sys/mode.h>
49 #include <sys/acl.h>
50 #include <sys/nbmlock.h>
51 #include <sys/policy.h>
52 #include <sys/sdt.h>
53
54 #include <rpc/types.h>
55 #include <rpc/auth.h>
56 #include <rpc/svc.h>
57
58 #include <nfs/nfs.h>
59 #include <nfs/export.h>
60 #include <nfs/nfs_cmd.h>
61
62 #include <vm/hat.h>
63 #include <vm/as.h>
64 #include <vm/seg.h>
65 #include <vm/seg_map.h>
66 #include <vm/seg_kmem.h>
67
68 #include <sys/strsubr.h>
69
70 /*
71 * These are the interface routines for the server side of the
72 * Network File System. See the NFS version 2 protocol specification
73 * for a description of this interface.
74 */
75
76 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
77 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
78 cred_t *);
79
80 /*
81 * Some "over the wire" UNIX file types. These are encoded
82 * into the mode. This needs to be fixed in the next rev.
83 */
84 #define IFMT 0170000 /* type of file */
85 #define IFCHR 0020000 /* character special */
86 #define IFBLK 0060000 /* block special */
87 #define IFSOCK 0140000 /* socket */
88
89 u_longlong_t nfs2_srv_caller_id;
90
91 /*
92 * Get file attributes.
93 * Returns the current attributes of the file with the given fhandle.
94 */
95 /* ARGSUSED */
96 void
97 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
98 struct svc_req *req, cred_t *cr)
99 {
100 int error;
101 vnode_t *vp;
102 struct vattr va;
103
104 vp = nfs_fhtovp(fhp, exi);
105 if (vp == NULL) {
106 ns->ns_status = NFSERR_STALE;
107 return;
108 }
109
110 /*
111 * Do the getattr.
112 */
113 va.va_mask = AT_ALL; /* we want all the attributes */
114
115 error = rfs4_delegated_getattr(vp, &va, 0, cr);
116
117 /* check for overflows */
118 if (!error) {
119 /* Lie about the object type for a referral */
120 if (vn_is_nfs_reparse(vp, cr))
121 va.va_type = VLNK;
122
123 acl_perm(vp, exi, &va, cr);
124 error = vattr_to_nattr(&va, &ns->ns_attr);
125 }
126
127 VN_RELE(vp);
128
129 ns->ns_status = puterrno(error);
130 }
131 void *
132 rfs_getattr_getfh(fhandle_t *fhp)
133 {
134 return (fhp);
135 }
136
137 /*
138 * Set file attributes.
139 * Sets the attributes of the file with the given fhandle. Returns
140 * the new attributes.
141 */
142 void
143 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
144 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
145 {
146 int error;
147 int flag;
148 int in_crit = 0;
149 vnode_t *vp;
150 struct vattr va;
151 struct vattr bva;
152 struct flock64 bf;
153 caller_context_t ct;
154
155
156 vp = nfs_fhtovp(&args->saa_fh, exi);
157 if (vp == NULL) {
158 ns->ns_status = NFSERR_STALE;
159 return;
160 }
161
162 if (rdonly(exi, req) || vn_is_readonly(vp)) {
163 VN_RELE(vp);
164 ns->ns_status = NFSERR_ROFS;
165 return;
166 }
167
168 error = sattr_to_vattr(&args->saa_sa, &va);
169 if (error) {
170 VN_RELE(vp);
171 ns->ns_status = puterrno(error);
172 return;
173 }
174
175 /*
176 * If the client is requesting a change to the mtime,
177 * but the nanosecond field is set to 1 billion, then
178 * this is a flag to the server that it should set the
179 * atime and mtime fields to the server's current time.
180 * The 1 billion number actually came from the client
181 * as 1 million, but the units in the over the wire
182 * request are microseconds instead of nanoseconds.
183 *
184 * This is an overload of the protocol and should be
185 * documented in the NFS Version 2 protocol specification.
186 */
187 if (va.va_mask & AT_MTIME) {
188 if (va.va_mtime.tv_nsec == 1000000000) {
189 gethrestime(&va.va_mtime);
190 va.va_atime = va.va_mtime;
191 va.va_mask |= AT_ATIME;
192 flag = 0;
193 } else
194 flag = ATTR_UTIME;
195 } else
196 flag = 0;
197
198 /*
199 * If the filesystem is exported with nosuid, then mask off
200 * the setuid and setgid bits.
201 */
202 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
203 (exi->exi_export.ex_flags & EX_NOSUID))
204 va.va_mode &= ~(VSUID | VSGID);
205
206 ct.cc_sysid = 0;
207 ct.cc_pid = 0;
208 ct.cc_caller_id = nfs2_srv_caller_id;
209 ct.cc_flags = CC_DONTBLOCK;
210
211 /*
212 * We need to specially handle size changes because it is
213 * possible for the client to create a file with modes
214 * which indicate read-only, but with the file opened for
215 * writing. If the client then tries to set the size of
216 * the file, then the normal access checking done in
217 * VOP_SETATTR would prevent the client from doing so,
218 * although it should be legal for it to do so. To get
219 * around this, we do the access checking for ourselves
220 * and then use VOP_SPACE which doesn't do the access
221 * checking which VOP_SETATTR does. VOP_SPACE can only
222 * operate on VREG files, let VOP_SETATTR handle the other
223 * extremely rare cases.
224 * Also the client should not be allowed to change the
225 * size of the file if there is a conflicting non-blocking
226 * mandatory lock in the region of change.
227 */
228 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
229 if (nbl_need_check(vp)) {
230 nbl_start_crit(vp, RW_READER);
231 in_crit = 1;
232 }
233
234 bva.va_mask = AT_UID | AT_SIZE;
235
236 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
237
238 if (error) {
239 if (in_crit)
240 nbl_end_crit(vp);
241 VN_RELE(vp);
242 ns->ns_status = puterrno(error);
243 return;
244 }
245
246 if (in_crit) {
247 u_offset_t offset;
248 ssize_t length;
249
250 if (va.va_size < bva.va_size) {
251 offset = va.va_size;
252 length = bva.va_size - va.va_size;
253 } else {
254 offset = bva.va_size;
255 length = va.va_size - bva.va_size;
256 }
257 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
258 NULL)) {
259 error = EACCES;
260 }
261 }
262
263 if (crgetuid(cr) == bva.va_uid && !error &&
264 va.va_size != bva.va_size) {
265 va.va_mask &= ~AT_SIZE;
266 bf.l_type = F_WRLCK;
267 bf.l_whence = 0;
268 bf.l_start = (off64_t)va.va_size;
269 bf.l_len = 0;
270 bf.l_sysid = 0;
271 bf.l_pid = 0;
272
273 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
274 (offset_t)va.va_size, cr, &ct);
275 }
276 if (in_crit)
277 nbl_end_crit(vp);
278 } else
279 error = 0;
280
281 /*
282 * Do the setattr.
283 */
284 if (!error && va.va_mask) {
285 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
286 }
287
288 /*
289 * check if the monitor on either vop_space or vop_setattr detected
290 * a delegation conflict and if so, mark the thread flag as
291 * wouldblock so that the response is dropped and the client will
292 * try again.
293 */
294 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
295 VN_RELE(vp);
296 curthread->t_flag |= T_WOULDBLOCK;
297 return;
298 }
299
300 if (!error) {
301 va.va_mask = AT_ALL; /* get everything */
302
303 error = rfs4_delegated_getattr(vp, &va, 0, cr);
304
305 /* check for overflows */
306 if (!error) {
307 acl_perm(vp, exi, &va, cr);
308 error = vattr_to_nattr(&va, &ns->ns_attr);
309 }
310 }
311
312 ct.cc_flags = 0;
313
314 /*
315 * Force modified metadata out to stable storage.
316 */
317 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
318
319 VN_RELE(vp);
320
321 ns->ns_status = puterrno(error);
322 }
323 void *
324 rfs_setattr_getfh(struct nfssaargs *args)
325 {
326 return (&args->saa_fh);
327 }
328
329 /* Change and release @exip and @vpp only in success */
330 int
331 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
332 {
333 struct exportinfo *exi;
334 vnode_t *vp;
335 fid_t fid;
336 int error;
337
338 vp = *vpp;
339
340 /* traverse() releases argument in success */
341 VN_HOLD(*vpp);
342
343 if ((error = traverse(&vp)) != 0) {
344 VN_RELE(*vpp);
345 return (error);
346 }
347
348 bzero(&fid, sizeof (fid));
349 fid.fid_len = MAXFIDSZ;
350 error = VOP_FID(vp, &fid, NULL);
351 if (error) {
352 VN_RELE(vp);
353 return (error);
354 }
355
356 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
357 if (exi == NULL ||
358 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
359 /* It is not error, just subdir is not exported
360 * or "nohide" is not set
361 */
362 VN_RELE(vp);
363 } else {
364 /* go to submount */
365 exi_rele(*exip);
366 *exip = exi;
367
368 VN_RELE(*vpp);
369 *vpp = vp;
370 }
371 return (0);
372 }
373
374 /*
375 * Directory lookup.
376 * Returns an fhandle and file attributes for file name in a directory.
377 */
378 /* ARGSUSED */
379 void
380 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
381 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
382 {
383 int error;
384 vnode_t *dvp;
385 vnode_t *vp;
386 struct vattr va;
387 fhandle_t *fhp = da->da_fhandle;
388 struct sec_ol sec = {0, 0};
389 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
390 char *name;
391 struct sockaddr *ca;
392
393 /*
394 * Trusted Extension doesn't support NFSv2. MOUNT
395 * will reject v2 clients. Need to prevent v2 client
396 * access via WebNFS here.
397 */
398 if (is_system_labeled() && req->rq_vers == 2) {
399 dr->dr_status = NFSERR_ACCES;
400 return;
401 }
402
403 /*
404 * Disallow NULL paths
405 */
406 if (da->da_name == NULL || *da->da_name == '\0') {
407 dr->dr_status = NFSERR_ACCES;
408 return;
409 }
410
411 /*
412 * Allow lookups from the root - the default
413 * location of the public filehandle.
414 */
415 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
416 dvp = rootdir;
417 VN_HOLD(dvp);
418 } else {
419 dvp = nfs_fhtovp(fhp, exi);
420 if (dvp == NULL) {
421 dr->dr_status = NFSERR_STALE;
422 return;
423 }
424 }
425
426 /*
427 * Not allow lookup beyond root.
428 * If the filehandle matches a filehandle of the exi,
429 * then the ".." refers beyond the root of an exported filesystem.
430 */
431 if (strcmp(da->da_name, "..") == 0 &&
432 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
433 VN_RELE(dvp);
434 dr->dr_status = NFSERR_NOENT;
435 return;
436 }
437
438 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
439 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
440 MAXPATHLEN);
441
442 if (name == NULL) {
443 dr->dr_status = NFSERR_ACCES;
444 return;
445 }
446
447 exi_hold(exi);
448
449 /*
450 * If the public filehandle is used then allow
451 * a multi-component lookup, i.e. evaluate
452 * a pathname and follow symbolic links if
453 * necessary.
454 *
455 * This may result in a vnode in another filesystem
456 * which is OK as long as the filesystem is exported.
457 */
458 if (PUBLIC_FH2(fhp)) {
459 struct exportinfo *new;
460
461 publicfh_flag = TRUE;
462 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &new,
463 &sec);
464
465 if (error == 0) {
466 exi_rele(exi);
467 exi = new;
468 }
469 } else {
470 /*
471 * Do a normal single component lookup.
472 */
473 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
474 NULL, NULL, NULL);
475 }
476
477 if (name != da->da_name)
478 kmem_free(name, MAXPATHLEN);
479
480 if (error == 0 && vn_ismntpt(vp)) {
481 error = rfs_cross_mnt(&vp, &exi);
482 if (error)
483 VN_RELE(vp);
484 }
485
486 if (!error) {
487 va.va_mask = AT_ALL; /* we want everything */
488
489 error = rfs4_delegated_getattr(vp, &va, 0, cr);
490
491 /* check for overflows */
492 if (!error) {
493 acl_perm(vp, exi, &va, cr);
494 error = vattr_to_nattr(&va, &dr->dr_attr);
495 if (!error) {
496 if (sec.sec_flags & SEC_QUERY)
497 error = makefh_ol(&dr->dr_fhandle, exi,
498 sec.sec_index);
499 else {
500 error = makefh(&dr->dr_fhandle, vp,
501 exi);
502 if (!error && publicfh_flag &&
503 !chk_clnt_sec(exi, req))
504 auth_weak = TRUE;
505 }
506 }
507 }
508 VN_RELE(vp);
509 }
510
511 VN_RELE(dvp);
512
513 /* The passed argument exportinfo is released by the
514 * caller, comon_dispatch
515 */
516 exi_rele(exi);
517
518 /*
519 * If it's public fh, no 0x81, and client's flavor is
520 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
521 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
522 */
523 if (auth_weak)
524 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
525 else
526 dr->dr_status = puterrno(error);
527 }
528 void *
529 rfs_lookup_getfh(struct nfsdiropargs *da)
530 {
531 return (da->da_fhandle);
532 }
533
534 /*
535 * Read symbolic link.
536 * Returns the string in the symbolic link at the given fhandle.
537 */
538 /* ARGSUSED */
539 void
540 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
541 struct svc_req *req, cred_t *cr)
542 {
543 int error;
544 struct iovec iov;
545 struct uio uio;
546 vnode_t *vp;
547 struct vattr va;
548 struct sockaddr *ca;
549 char *name = NULL;
550 int is_referral = 0;
551
552 vp = nfs_fhtovp(fhp, exi);
553 if (vp == NULL) {
554 rl->rl_data = NULL;
555 rl->rl_status = NFSERR_STALE;
556 return;
557 }
558
559 va.va_mask = AT_MODE;
560
561 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
562
563 if (error) {
564 VN_RELE(vp);
565 rl->rl_data = NULL;
566 rl->rl_status = puterrno(error);
567 return;
568 }
569
570 if (MANDLOCK(vp, va.va_mode)) {
571 VN_RELE(vp);
572 rl->rl_data = NULL;
573 rl->rl_status = NFSERR_ACCES;
574 return;
575 }
576
577 /* We lied about the object type for a referral */
578 if (vn_is_nfs_reparse(vp, cr))
579 is_referral = 1;
580
581 /*
582 * XNFS and RFC1094 require us to return ENXIO if argument
583 * is not a link. BUGID 1138002.
584 */
585 if (vp->v_type != VLNK && !is_referral) {
586 VN_RELE(vp);
587 rl->rl_data = NULL;
588 rl->rl_status = NFSERR_NXIO;
589 return;
590 }
591
592 /*
593 * Allocate data for pathname. This will be freed by rfs_rlfree.
594 */
595 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
596
597 if (is_referral) {
598 char *s;
599 size_t strsz;
600
601 /* Get an artificial symlink based on a referral */
602 s = build_symlink(vp, cr, &strsz);
603 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
604 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
605 vnode_t *, vp, char *, s);
606 if (s == NULL)
607 error = EINVAL;
608 else {
609 error = 0;
610 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
611 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
612 kmem_free(s, strsz);
613 }
614
615 } else {
616
617 /*
618 * Set up io vector to read sym link data
619 */
620 iov.iov_base = rl->rl_data;
621 iov.iov_len = NFS_MAXPATHLEN;
622 uio.uio_iov = &iov;
623 uio.uio_iovcnt = 1;
624 uio.uio_segflg = UIO_SYSSPACE;
625 uio.uio_extflg = UIO_COPY_CACHED;
626 uio.uio_loffset = (offset_t)0;
627 uio.uio_resid = NFS_MAXPATHLEN;
628
629 /*
630 * Do the readlink.
631 */
632 error = VOP_READLINK(vp, &uio, cr, NULL);
633
634 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
635
636 if (!error)
637 rl->rl_data[rl->rl_count] = '\0';
638
639 }
640
641
642 VN_RELE(vp);
643
644 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
645 name = nfscmd_convname(ca, exi, rl->rl_data,
646 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
647
648 if (name != NULL && name != rl->rl_data) {
649 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
650 rl->rl_data = name;
651 }
652
653 /*
654 * XNFS and RFC1094 require us to return ENXIO if argument
655 * is not a link. UFS returns EINVAL if this is the case,
656 * so we do the mapping here. BUGID 1138002.
657 */
658 if (error == EINVAL)
659 rl->rl_status = NFSERR_NXIO;
660 else
661 rl->rl_status = puterrno(error);
662
663 }
664 void *
665 rfs_readlink_getfh(fhandle_t *fhp)
666 {
667 return (fhp);
668 }
669 /*
670 * Free data allocated by rfs_readlink
671 */
672 void
673 rfs_rlfree(struct nfsrdlnres *rl)
674 {
675 if (rl->rl_data != NULL)
676 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
677 }
678
679 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
680
681 /*
682 * Read data.
683 * Returns some data read from the file at the given fhandle.
684 */
685 /* ARGSUSED */
686 void
687 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
688 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
689 {
690 vnode_t *vp;
691 int error;
692 struct vattr va;
693 struct iovec iov;
694 struct uio uio;
695 mblk_t *mp;
696 int alloc_err = 0;
697 int in_crit = 0;
698 caller_context_t ct;
699
700 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
701 if (vp == NULL) {
702 rr->rr_data = NULL;
703 rr->rr_status = NFSERR_STALE;
704 return;
705 }
706
707 if (vp->v_type != VREG) {
708 VN_RELE(vp);
709 rr->rr_data = NULL;
710 rr->rr_status = NFSERR_ISDIR;
711 return;
712 }
713
714 ct.cc_sysid = 0;
715 ct.cc_pid = 0;
716 ct.cc_caller_id = nfs2_srv_caller_id;
717 ct.cc_flags = CC_DONTBLOCK;
718
719 /*
720 * Enter the critical region before calling VOP_RWLOCK
721 * to avoid a deadlock with write requests.
722 */
723 if (nbl_need_check(vp)) {
724 nbl_start_crit(vp, RW_READER);
725 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
726 0, NULL)) {
727 nbl_end_crit(vp);
728 VN_RELE(vp);
729 rr->rr_data = NULL;
730 rr->rr_status = NFSERR_ACCES;
731 return;
732 }
733 in_crit = 1;
734 }
735
736 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
737
738 /* check if a monitor detected a delegation conflict */
739 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
740 VN_RELE(vp);
741 /* mark as wouldblock so response is dropped */
742 curthread->t_flag |= T_WOULDBLOCK;
743
744 rr->rr_data = NULL;
745 return;
746 }
747
748 va.va_mask = AT_ALL;
749
750 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
751
752 if (error) {
753 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
754 if (in_crit)
755 nbl_end_crit(vp);
756
757 VN_RELE(vp);
758 rr->rr_data = NULL;
759 rr->rr_status = puterrno(error);
760
761 return;
762 }
763
764 /*
765 * This is a kludge to allow reading of files created
766 * with no read permission. The owner of the file
767 * is always allowed to read it.
768 */
769 if (crgetuid(cr) != va.va_uid) {
770 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
771
772 if (error) {
773 /*
774 * Exec is the same as read over the net because
775 * of demand loading.
776 */
777 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
778 }
779 if (error) {
780 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
781 if (in_crit)
782 nbl_end_crit(vp);
783 VN_RELE(vp);
784 rr->rr_data = NULL;
785 rr->rr_status = puterrno(error);
786
787 return;
788 }
789 }
790
791 if (MANDLOCK(vp, va.va_mode)) {
792 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
793 if (in_crit)
794 nbl_end_crit(vp);
795
796 VN_RELE(vp);
797 rr->rr_data = NULL;
798 rr->rr_status = NFSERR_ACCES;
799
800 return;
801 }
802
803 rr->rr_ok.rrok_wlist_len = 0;
804 rr->rr_ok.rrok_wlist = NULL;
805
806 if ((u_offset_t)ra->ra_offset >= va.va_size) {
807 rr->rr_count = 0;
808 rr->rr_data = NULL;
809 /*
810 * In this case, status is NFS_OK, but there is no data
811 * to encode. So set rr_mp to NULL.
812 */
813 rr->rr_mp = NULL;
814 rr->rr_ok.rrok_wlist = ra->ra_wlist;
815 if (rr->rr_ok.rrok_wlist)
816 clist_zero_len(rr->rr_ok.rrok_wlist);
817 goto done;
818 }
819
820 if (ra->ra_wlist) {
821 mp = NULL;
822 rr->rr_mp = NULL;
823 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
824 if (ra->ra_count > iov.iov_len) {
825 rr->rr_data = NULL;
826 rr->rr_status = NFSERR_INVAL;
827 goto done;
828 }
829 } else {
830 /*
831 * mp will contain the data to be sent out in the read reply.
832 * This will be freed after the reply has been sent out (by the
833 * driver).
834 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
835 * that the call to xdrmblk_putmblk() never fails.
836 */
837 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
838 &alloc_err);
839 ASSERT(mp != NULL);
840 ASSERT(alloc_err == 0);
841
842 rr->rr_mp = mp;
843
844 /*
845 * Set up io vector
846 */
847 iov.iov_base = (caddr_t)mp->b_datap->db_base;
848 iov.iov_len = ra->ra_count;
849 }
850
851 uio.uio_iov = &iov;
852 uio.uio_iovcnt = 1;
853 uio.uio_segflg = UIO_SYSSPACE;
854 uio.uio_extflg = UIO_COPY_CACHED;
855 uio.uio_loffset = (offset_t)ra->ra_offset;
856 uio.uio_resid = ra->ra_count;
857
858 error = VOP_READ(vp, &uio, 0, cr, &ct);
859
860 if (error) {
861 if (mp)
862 freeb(mp);
863
864 /*
865 * check if a monitor detected a delegation conflict and
866 * mark as wouldblock so response is dropped
867 */
868 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
869 curthread->t_flag |= T_WOULDBLOCK;
870 else
871 rr->rr_status = puterrno(error);
872
873 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
874 if (in_crit)
875 nbl_end_crit(vp);
876
877 VN_RELE(vp);
878 rr->rr_data = NULL;
879
880 return;
881 }
882
883 /*
884 * Get attributes again so we can send the latest access
885 * time to the client side for his cache.
886 */
887 va.va_mask = AT_ALL;
888
889 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
890
891 if (error) {
892 if (mp)
893 freeb(mp);
894
895 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
896 if (in_crit)
897 nbl_end_crit(vp);
898
899 VN_RELE(vp);
900 rr->rr_data = NULL;
901 rr->rr_status = puterrno(error);
902
903 return;
904 }
905
906 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
907
908 if (mp) {
909 rr->rr_data = (char *)mp->b_datap->db_base;
910 } else {
911 if (ra->ra_wlist) {
912 rr->rr_data = (caddr_t)iov.iov_base;
913 if (!rdma_setup_read_data2(ra, rr)) {
914 rr->rr_data = NULL;
915 rr->rr_status = puterrno(NFSERR_INVAL);
916 }
917 }
918 }
919 done:
920 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
921 if (in_crit)
922 nbl_end_crit(vp);
923
924 acl_perm(vp, exi, &va, cr);
925
926 /* check for overflows */
927 error = vattr_to_nattr(&va, &rr->rr_attr);
928
929 VN_RELE(vp);
930
931 rr->rr_status = puterrno(error);
932 }
933
934 /*
935 * Free data allocated by rfs_read
936 */
937 void
938 rfs_rdfree(struct nfsrdresult *rr)
939 {
940 mblk_t *mp;
941
942 if (rr->rr_status == NFS_OK) {
943 mp = rr->rr_mp;
944 if (mp != NULL)
945 freeb(mp);
946 }
947 }
948
949 void *
950 rfs_read_getfh(struct nfsreadargs *ra)
951 {
952 return (&ra->ra_fhandle);
953 }
954
955 #define MAX_IOVECS 12
956
957 #ifdef DEBUG
958 static int rfs_write_sync_hits = 0;
959 static int rfs_write_sync_misses = 0;
960 #endif
961
962 /*
963 * Write data to file.
964 * Returns attributes of a file after writing some data to it.
965 *
966 * Any changes made here, especially in error handling might have
967 * to also be done in rfs_write (which clusters write requests).
968 */
969 void
970 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
971 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
972 {
973 int error;
974 vnode_t *vp;
975 rlim64_t rlimit;
976 struct vattr va;
977 struct uio uio;
978 struct iovec iov[MAX_IOVECS];
979 mblk_t *m;
980 struct iovec *iovp;
981 int iovcnt;
982 cred_t *savecred;
983 int in_crit = 0;
984 caller_context_t ct;
985
986 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
987 if (vp == NULL) {
988 ns->ns_status = NFSERR_STALE;
989 return;
990 }
991
992 if (rdonly(exi, req)) {
993 VN_RELE(vp);
994 ns->ns_status = NFSERR_ROFS;
995 return;
996 }
997
998 if (vp->v_type != VREG) {
999 VN_RELE(vp);
1000 ns->ns_status = NFSERR_ISDIR;
1001 return;
1002 }
1003
1004 ct.cc_sysid = 0;
1005 ct.cc_pid = 0;
1006 ct.cc_caller_id = nfs2_srv_caller_id;
1007 ct.cc_flags = CC_DONTBLOCK;
1008
1009 va.va_mask = AT_UID|AT_MODE;
1010
1011 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1012
1013 if (error) {
1014 VN_RELE(vp);
1015 ns->ns_status = puterrno(error);
1016
1017 return;
1018 }
1019
1020 if (crgetuid(cr) != va.va_uid) {
1021 /*
1022 * This is a kludge to allow writes of files created
1023 * with read only permission. The owner of the file
1024 * is always allowed to write it.
1025 */
1026 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1027
1028 if (error) {
1029 VN_RELE(vp);
1030 ns->ns_status = puterrno(error);
1031 return;
1032 }
1033 }
1034
1035 /*
1036 * Can't access a mandatory lock file. This might cause
1037 * the NFS service thread to block forever waiting for a
1038 * lock to be released that will never be released.
1039 */
1040 if (MANDLOCK(vp, va.va_mode)) {
1041 VN_RELE(vp);
1042 ns->ns_status = NFSERR_ACCES;
1043 return;
1044 }
1045
1046 /*
1047 * We have to enter the critical region before calling VOP_RWLOCK
1048 * to avoid a deadlock with ufs.
1049 */
1050 if (nbl_need_check(vp)) {
1051 nbl_start_crit(vp, RW_READER);
1052 in_crit = 1;
1053 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1054 wa->wa_count, 0, NULL)) {
1055 error = EACCES;
1056 goto out;
1057 }
1058 }
1059
1060 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1061
1062 /* check if a monitor detected a delegation conflict */
1063 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1064 VN_RELE(vp);
1065 /* mark as wouldblock so response is dropped */
1066 curthread->t_flag |= T_WOULDBLOCK;
1067 return;
1068 }
1069
1070 if (wa->wa_data || wa->wa_rlist) {
1071 /* Do the RDMA thing if necessary */
1072 if (wa->wa_rlist) {
1073 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1074 iov[0].iov_len = wa->wa_count;
1075 } else {
1076 iov[0].iov_base = wa->wa_data;
1077 iov[0].iov_len = wa->wa_count;
1078 }
1079 uio.uio_iov = iov;
1080 uio.uio_iovcnt = 1;
1081 uio.uio_segflg = UIO_SYSSPACE;
1082 uio.uio_extflg = UIO_COPY_DEFAULT;
1083 uio.uio_loffset = (offset_t)wa->wa_offset;
1084 uio.uio_resid = wa->wa_count;
1085 /*
1086 * The limit is checked on the client. We
1087 * should allow any size writes here.
1088 */
1089 uio.uio_llimit = curproc->p_fsz_ctl;
1090 rlimit = uio.uio_llimit - wa->wa_offset;
1091 if (rlimit < (rlim64_t)uio.uio_resid)
1092 uio.uio_resid = (uint_t)rlimit;
1093
1094 /*
1095 * for now we assume no append mode
1096 */
1097 /*
1098 * We're changing creds because VM may fault and we need
1099 * the cred of the current thread to be used if quota
1100 * checking is enabled.
1101 */
1102 savecred = curthread->t_cred;
1103 curthread->t_cred = cr;
1104 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1105 curthread->t_cred = savecred;
1106 } else {
1107 iovcnt = 0;
1108 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1109 iovcnt++;
1110 if (iovcnt <= MAX_IOVECS) {
1111 #ifdef DEBUG
1112 rfs_write_sync_hits++;
1113 #endif
1114 iovp = iov;
1115 } else {
1116 #ifdef DEBUG
1117 rfs_write_sync_misses++;
1118 #endif
1119 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1120 }
1121 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1122 uio.uio_iov = iovp;
1123 uio.uio_iovcnt = iovcnt;
1124 uio.uio_segflg = UIO_SYSSPACE;
1125 uio.uio_extflg = UIO_COPY_DEFAULT;
1126 uio.uio_loffset = (offset_t)wa->wa_offset;
1127 uio.uio_resid = wa->wa_count;
1128 /*
1129 * The limit is checked on the client. We
1130 * should allow any size writes here.
1131 */
1132 uio.uio_llimit = curproc->p_fsz_ctl;
1133 rlimit = uio.uio_llimit - wa->wa_offset;
1134 if (rlimit < (rlim64_t)uio.uio_resid)
1135 uio.uio_resid = (uint_t)rlimit;
1136
1137 /*
1138 * For now we assume no append mode.
1139 */
1140 /*
1141 * We're changing creds because VM may fault and we need
1142 * the cred of the current thread to be used if quota
1143 * checking is enabled.
1144 */
1145 savecred = curthread->t_cred;
1146 curthread->t_cred = cr;
1147 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1148 curthread->t_cred = savecred;
1149
1150 if (iovp != iov)
1151 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1152 }
1153
1154 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1155
1156 if (!error) {
1157 /*
1158 * Get attributes again so we send the latest mod
1159 * time to the client side for his cache.
1160 */
1161 va.va_mask = AT_ALL; /* now we want everything */
1162
1163 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1164
1165 /* check for overflows */
1166 if (!error) {
1167 acl_perm(vp, exi, &va, cr);
1168 error = vattr_to_nattr(&va, &ns->ns_attr);
1169 }
1170 }
1171
1172 out:
1173 if (in_crit)
1174 nbl_end_crit(vp);
1175 VN_RELE(vp);
1176
1177 /* check if a monitor detected a delegation conflict */
1178 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1179 /* mark as wouldblock so response is dropped */
1180 curthread->t_flag |= T_WOULDBLOCK;
1181 else
1182 ns->ns_status = puterrno(error);
1183
1184 }
1185
1186 struct rfs_async_write {
1187 struct nfswriteargs *wa;
1188 struct nfsattrstat *ns;
1189 struct svc_req *req;
1190 cred_t *cr;
1191 kthread_t *thread;
1192 struct rfs_async_write *list;
1193 };
1194
1195 struct rfs_async_write_list {
1196 fhandle_t *fhp;
1197 kcondvar_t cv;
1198 struct rfs_async_write *list;
1199 struct rfs_async_write_list *next;
1200 };
1201
1202 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1203 static kmutex_t rfs_async_write_lock;
1204 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1205
1206 #define MAXCLIOVECS 42
1207 #define RFSWRITE_INITVAL (enum nfsstat) -1
1208
1209 #ifdef DEBUG
1210 static int rfs_write_hits = 0;
1211 static int rfs_write_misses = 0;
1212 #endif
1213
1214 /*
1215 * Write data to file.
1216 * Returns attributes of a file after writing some data to it.
1217 */
1218 void
1219 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1220 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1221 {
1222 int error;
1223 vnode_t *vp;
1224 rlim64_t rlimit;
1225 struct vattr va;
1226 struct uio uio;
1227 struct rfs_async_write_list *lp;
1228 struct rfs_async_write_list *nlp;
1229 struct rfs_async_write *rp;
1230 struct rfs_async_write *nrp;
1231 struct rfs_async_write *trp;
1232 struct rfs_async_write *lrp;
1233 int data_written;
1234 int iovcnt;
1235 mblk_t *m;
1236 struct iovec *iovp;
1237 struct iovec *niovp;
1238 struct iovec iov[MAXCLIOVECS];
1239 int count;
1240 int rcount;
1241 uint_t off;
1242 uint_t len;
1243 struct rfs_async_write nrpsp;
1244 struct rfs_async_write_list nlpsp;
1245 ushort_t t_flag;
1246 cred_t *savecred;
1247 int in_crit = 0;
1248 caller_context_t ct;
1249
1250 if (!rfs_write_async) {
1251 rfs_write_sync(wa, ns, exi, req, cr);
1252 return;
1253 }
1254
1255 /*
1256 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1257 * is considered an OK.
1258 */
1259 ns->ns_status = RFSWRITE_INITVAL;
1260
1261 nrp = &nrpsp;
1262 nrp->wa = wa;
1263 nrp->ns = ns;
1264 nrp->req = req;
1265 nrp->cr = cr;
1266 nrp->thread = curthread;
1267
1268 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1269
1270 /*
1271 * Look to see if there is already a cluster started
1272 * for this file.
1273 */
1274 mutex_enter(&rfs_async_write_lock);
1275 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1276 if (bcmp(&wa->wa_fhandle, lp->fhp,
1277 sizeof (fhandle_t)) == 0)
1278 break;
1279 }
1280
1281 /*
1282 * If lp is non-NULL, then there is already a cluster
1283 * started. We need to place ourselves in the cluster
1284 * list in the right place as determined by starting
1285 * offset. Conflicts with non-blocking mandatory locked
1286 * regions will be checked when the cluster is processed.
1287 */
1288 if (lp != NULL) {
1289 rp = lp->list;
1290 trp = NULL;
1291 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1292 trp = rp;
1293 rp = rp->list;
1294 }
1295 nrp->list = rp;
1296 if (trp == NULL)
1297 lp->list = nrp;
1298 else
1299 trp->list = nrp;
1300 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1301 cv_wait(&lp->cv, &rfs_async_write_lock);
1302 mutex_exit(&rfs_async_write_lock);
1303
1304 return;
1305 }
1306
1307 /*
1308 * No cluster started yet, start one and add ourselves
1309 * to the list of clusters.
1310 */
1311 nrp->list = NULL;
1312
1313 nlp = &nlpsp;
1314 nlp->fhp = &wa->wa_fhandle;
1315 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1316 nlp->list = nrp;
1317 nlp->next = NULL;
1318
1319 if (rfs_async_write_head == NULL) {
1320 rfs_async_write_head = nlp;
1321 } else {
1322 lp = rfs_async_write_head;
1323 while (lp->next != NULL)
1324 lp = lp->next;
1325 lp->next = nlp;
1326 }
1327 mutex_exit(&rfs_async_write_lock);
1328
1329 /*
1330 * Convert the file handle common to all of the requests
1331 * in this cluster to a vnode.
1332 */
1333 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1334 if (vp == NULL) {
1335 mutex_enter(&rfs_async_write_lock);
1336 if (rfs_async_write_head == nlp)
1337 rfs_async_write_head = nlp->next;
1338 else {
1339 lp = rfs_async_write_head;
1340 while (lp->next != nlp)
1341 lp = lp->next;
1342 lp->next = nlp->next;
1343 }
1344 t_flag = curthread->t_flag & T_WOULDBLOCK;
1345 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1346 rp->ns->ns_status = NFSERR_STALE;
1347 rp->thread->t_flag |= t_flag;
1348 }
1349 cv_broadcast(&nlp->cv);
1350 mutex_exit(&rfs_async_write_lock);
1351
1352 return;
1353 }
1354
1355 /*
1356 * Can only write regular files. Attempts to write any
1357 * other file types fail with EISDIR.
1358 */
1359 if (vp->v_type != VREG) {
1360 VN_RELE(vp);
1361 mutex_enter(&rfs_async_write_lock);
1362 if (rfs_async_write_head == nlp)
1363 rfs_async_write_head = nlp->next;
1364 else {
1365 lp = rfs_async_write_head;
1366 while (lp->next != nlp)
1367 lp = lp->next;
1368 lp->next = nlp->next;
1369 }
1370 t_flag = curthread->t_flag & T_WOULDBLOCK;
1371 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1372 rp->ns->ns_status = NFSERR_ISDIR;
1373 rp->thread->t_flag |= t_flag;
1374 }
1375 cv_broadcast(&nlp->cv);
1376 mutex_exit(&rfs_async_write_lock);
1377
1378 return;
1379 }
1380
1381 /*
1382 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1383 * deadlock with ufs.
1384 */
1385 if (nbl_need_check(vp)) {
1386 nbl_start_crit(vp, RW_READER);
1387 in_crit = 1;
1388 }
1389
1390 ct.cc_sysid = 0;
1391 ct.cc_pid = 0;
1392 ct.cc_caller_id = nfs2_srv_caller_id;
1393 ct.cc_flags = CC_DONTBLOCK;
1394
1395 /*
1396 * Lock the file for writing. This operation provides
1397 * the delay which allows clusters to grow.
1398 */
1399 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1400
1401 /* check if a monitor detected a delegation conflict */
1402 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1403 if (in_crit)
1404 nbl_end_crit(vp);
1405 VN_RELE(vp);
1406 /* mark as wouldblock so response is dropped */
1407 curthread->t_flag |= T_WOULDBLOCK;
1408 mutex_enter(&rfs_async_write_lock);
1409 if (rfs_async_write_head == nlp)
1410 rfs_async_write_head = nlp->next;
1411 else {
1412 lp = rfs_async_write_head;
1413 while (lp->next != nlp)
1414 lp = lp->next;
1415 lp->next = nlp->next;
1416 }
1417 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1418 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1419 rp->ns->ns_status = puterrno(error);
1420 rp->thread->t_flag |= T_WOULDBLOCK;
1421 }
1422 }
1423 cv_broadcast(&nlp->cv);
1424 mutex_exit(&rfs_async_write_lock);
1425
1426 return;
1427 }
1428
1429 /*
1430 * Disconnect this cluster from the list of clusters.
1431 * The cluster that is being dealt with must be fixed
1432 * in size after this point, so there is no reason
1433 * to leave it on the list so that new requests can
1434 * find it.
1435 *
1436 * The algorithm is that the first write request will
1437 * create a cluster, convert the file handle to a
1438 * vnode pointer, and then lock the file for writing.
1439 * This request is not likely to be clustered with
1440 * any others. However, the next request will create
1441 * a new cluster and be blocked in VOP_RWLOCK while
1442 * the first request is being processed. This delay
1443 * will allow more requests to be clustered in this
1444 * second cluster.
1445 */
1446 mutex_enter(&rfs_async_write_lock);
1447 if (rfs_async_write_head == nlp)
1448 rfs_async_write_head = nlp->next;
1449 else {
1450 lp = rfs_async_write_head;
1451 while (lp->next != nlp)
1452 lp = lp->next;
1453 lp->next = nlp->next;
1454 }
1455 mutex_exit(&rfs_async_write_lock);
1456
1457 /*
1458 * Step through the list of requests in this cluster.
1459 * We need to check permissions to make sure that all
1460 * of the requests have sufficient permission to write
1461 * the file. A cluster can be composed of requests
1462 * from different clients and different users on each
1463 * client.
1464 *
1465 * As a side effect, we also calculate the size of the
1466 * byte range that this cluster encompasses.
1467 */
1468 rp = nlp->list;
1469 off = rp->wa->wa_offset;
1470 len = (uint_t)0;
1471 do {
1472 if (rdonly(exi, rp->req)) {
1473 rp->ns->ns_status = NFSERR_ROFS;
1474 t_flag = curthread->t_flag & T_WOULDBLOCK;
1475 rp->thread->t_flag |= t_flag;
1476 continue;
1477 }
1478
1479 va.va_mask = AT_UID|AT_MODE;
1480
1481 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1482
1483 if (!error) {
1484 if (crgetuid(rp->cr) != va.va_uid) {
1485 /*
1486 * This is a kludge to allow writes of files
1487 * created with read only permission. The
1488 * owner of the file is always allowed to
1489 * write it.
1490 */
1491 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1492 }
1493 if (!error && MANDLOCK(vp, va.va_mode))
1494 error = EACCES;
1495 }
1496
1497 /*
1498 * Check for a conflict with a nbmand-locked region.
1499 */
1500 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1501 rp->wa->wa_count, 0, NULL)) {
1502 error = EACCES;
1503 }
1504
1505 if (error) {
1506 rp->ns->ns_status = puterrno(error);
1507 t_flag = curthread->t_flag & T_WOULDBLOCK;
1508 rp->thread->t_flag |= t_flag;
1509 continue;
1510 }
1511 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1512 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1513 } while ((rp = rp->list) != NULL);
1514
1515 /*
1516 * Step through the cluster attempting to gather as many
1517 * requests which are contiguous as possible. These
1518 * contiguous requests are handled via one call to VOP_WRITE
1519 * instead of different calls to VOP_WRITE. We also keep
1520 * track of the fact that any data was written.
1521 */
1522 rp = nlp->list;
1523 data_written = 0;
1524 do {
1525 /*
1526 * Skip any requests which are already marked as having an
1527 * error.
1528 */
1529 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1530 rp = rp->list;
1531 continue;
1532 }
1533
1534 /*
1535 * Count the number of iovec's which are required
1536 * to handle this set of requests. One iovec is
1537 * needed for each data buffer, whether addressed
1538 * by wa_data or by the b_rptr pointers in the
1539 * mblk chains.
1540 */
1541 iovcnt = 0;
1542 lrp = rp;
1543 for (;;) {
1544 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1545 iovcnt++;
1546 else {
1547 m = lrp->wa->wa_mblk;
1548 while (m != NULL) {
1549 iovcnt++;
1550 m = m->b_cont;
1551 }
1552 }
1553 if (lrp->list == NULL ||
1554 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1555 lrp->wa->wa_offset + lrp->wa->wa_count !=
1556 lrp->list->wa->wa_offset) {
1557 lrp = lrp->list;
1558 break;
1559 }
1560 lrp = lrp->list;
1561 }
1562
1563 if (iovcnt <= MAXCLIOVECS) {
1564 #ifdef DEBUG
1565 rfs_write_hits++;
1566 #endif
1567 niovp = iov;
1568 } else {
1569 #ifdef DEBUG
1570 rfs_write_misses++;
1571 #endif
1572 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1573 }
1574 /*
1575 * Put together the scatter/gather iovecs.
1576 */
1577 iovp = niovp;
1578 trp = rp;
1579 count = 0;
1580 do {
1581 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1582 if (trp->wa->wa_rlist) {
1583 iovp->iov_base =
1584 (char *)((trp->wa->wa_rlist)->
1585 u.c_daddr3);
1586 iovp->iov_len = trp->wa->wa_count;
1587 } else {
1588 iovp->iov_base = trp->wa->wa_data;
1589 iovp->iov_len = trp->wa->wa_count;
1590 }
1591 iovp++;
1592 } else {
1593 m = trp->wa->wa_mblk;
1594 rcount = trp->wa->wa_count;
1595 while (m != NULL) {
1596 iovp->iov_base = (caddr_t)m->b_rptr;
1597 iovp->iov_len = (m->b_wptr - m->b_rptr);
1598 rcount -= iovp->iov_len;
1599 if (rcount < 0)
1600 iovp->iov_len += rcount;
1601 iovp++;
1602 if (rcount <= 0)
1603 break;
1604 m = m->b_cont;
1605 }
1606 }
1607 count += trp->wa->wa_count;
1608 trp = trp->list;
1609 } while (trp != lrp);
1610
1611 uio.uio_iov = niovp;
1612 uio.uio_iovcnt = iovcnt;
1613 uio.uio_segflg = UIO_SYSSPACE;
1614 uio.uio_extflg = UIO_COPY_DEFAULT;
1615 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1616 uio.uio_resid = count;
1617 /*
1618 * The limit is checked on the client. We
1619 * should allow any size writes here.
1620 */
1621 uio.uio_llimit = curproc->p_fsz_ctl;
1622 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1623 if (rlimit < (rlim64_t)uio.uio_resid)
1624 uio.uio_resid = (uint_t)rlimit;
1625
1626 /*
1627 * For now we assume no append mode.
1628 */
1629
1630 /*
1631 * We're changing creds because VM may fault
1632 * and we need the cred of the current
1633 * thread to be used if quota * checking is
1634 * enabled.
1635 */
1636 savecred = curthread->t_cred;
1637 curthread->t_cred = cr;
1638 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1639 curthread->t_cred = savecred;
1640
1641 /* check if a monitor detected a delegation conflict */
1642 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1643 /* mark as wouldblock so response is dropped */
1644 curthread->t_flag |= T_WOULDBLOCK;
1645
1646 if (niovp != iov)
1647 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1648
1649 if (!error) {
1650 data_written = 1;
1651 /*
1652 * Get attributes again so we send the latest mod
1653 * time to the client side for his cache.
1654 */
1655 va.va_mask = AT_ALL; /* now we want everything */
1656
1657 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1658
1659 if (!error)
1660 acl_perm(vp, exi, &va, rp->cr);
1661 }
1662
1663 /*
1664 * Fill in the status responses for each request
1665 * which was just handled. Also, copy the latest
1666 * attributes in to the attribute responses if
1667 * appropriate.
1668 */
1669 t_flag = curthread->t_flag & T_WOULDBLOCK;
1670 do {
1671 rp->thread->t_flag |= t_flag;
1672 /* check for overflows */
1673 if (!error) {
1674 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1675 }
1676 rp->ns->ns_status = puterrno(error);
1677 rp = rp->list;
1678 } while (rp != lrp);
1679 } while (rp != NULL);
1680
1681 /*
1682 * If any data was written at all, then we need to flush
1683 * the data and metadata to stable storage.
1684 */
1685 if (data_written) {
1686 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1687
1688 if (!error) {
1689 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1690 }
1691 }
1692
1693 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1694
1695 if (in_crit)
1696 nbl_end_crit(vp);
1697 VN_RELE(vp);
1698
1699 t_flag = curthread->t_flag & T_WOULDBLOCK;
1700 mutex_enter(&rfs_async_write_lock);
1701 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1702 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1703 rp->ns->ns_status = puterrno(error);
1704 rp->thread->t_flag |= t_flag;
1705 }
1706 }
1707 cv_broadcast(&nlp->cv);
1708 mutex_exit(&rfs_async_write_lock);
1709
1710 }
1711
1712 void *
1713 rfs_write_getfh(struct nfswriteargs *wa)
1714 {
1715 return (&wa->wa_fhandle);
1716 }
1717
1718 /*
1719 * Create a file.
1720 * Creates a file with given attributes and returns those attributes
1721 * and an fhandle for the new file.
1722 */
1723 void
1724 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1725 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1726 {
1727 int error;
1728 int lookuperr;
1729 int in_crit = 0;
1730 struct vattr va;
1731 vnode_t *vp;
1732 vnode_t *realvp;
1733 vnode_t *dvp;
1734 char *name = args->ca_da.da_name;
1735 vnode_t *tvp = NULL;
1736 int mode;
1737 int lookup_ok;
1738 bool_t trunc;
1739 struct sockaddr *ca;
1740
1741 /*
1742 * Disallow NULL paths
1743 */
1744 if (name == NULL || *name == '\0') {
1745 dr->dr_status = NFSERR_ACCES;
1746 return;
1747 }
1748
1749 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1750 if (dvp == NULL) {
1751 dr->dr_status = NFSERR_STALE;
1752 return;
1753 }
1754
1755 error = sattr_to_vattr(args->ca_sa, &va);
1756 if (error) {
1757 dr->dr_status = puterrno(error);
1758 return;
1759 }
1760
1761 /*
1762 * Must specify the mode.
1763 */
1764 if (!(va.va_mask & AT_MODE)) {
1765 VN_RELE(dvp);
1766 dr->dr_status = NFSERR_INVAL;
1767 return;
1768 }
1769
1770 /*
1771 * This is a completely gross hack to make mknod
1772 * work over the wire until we can wack the protocol
1773 */
1774 if ((va.va_mode & IFMT) == IFCHR) {
1775 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1776 va.va_type = VFIFO; /* xtra kludge for named pipe */
1777 else {
1778 va.va_type = VCHR;
1779 /*
1780 * uncompress the received dev_t
1781 * if the top half is zero indicating a request
1782 * from an `older style' OS.
1783 */
1784 if ((va.va_size & 0xffff0000) == 0)
1785 va.va_rdev = nfsv2_expdev(va.va_size);
1786 else
1787 va.va_rdev = (dev_t)va.va_size;
1788 }
1789 va.va_mask &= ~AT_SIZE;
1790 } else if ((va.va_mode & IFMT) == IFBLK) {
1791 va.va_type = VBLK;
1792 /*
1793 * uncompress the received dev_t
1794 * if the top half is zero indicating a request
1795 * from an `older style' OS.
1796 */
1797 if ((va.va_size & 0xffff0000) == 0)
1798 va.va_rdev = nfsv2_expdev(va.va_size);
1799 else
1800 va.va_rdev = (dev_t)va.va_size;
1801 va.va_mask &= ~AT_SIZE;
1802 } else if ((va.va_mode & IFMT) == IFSOCK) {
1803 va.va_type = VSOCK;
1804 } else {
1805 va.va_type = VREG;
1806 }
1807 va.va_mode &= ~IFMT;
1808 va.va_mask |= AT_TYPE;
1809
1810 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1811 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1812 MAXPATHLEN);
1813 if (name == NULL) {
1814 dr->dr_status = puterrno(EINVAL);
1815 return;
1816 }
1817
1818 /*
1819 * Why was the choice made to use VWRITE as the mode to the
1820 * call to VOP_CREATE ? This results in a bug. When a client
1821 * opens a file that already exists and is RDONLY, the second
1822 * open fails with an EACESS because of the mode.
1823 * bug ID 1054648.
1824 */
1825 lookup_ok = 0;
1826 mode = VWRITE;
1827 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1828 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1829 NULL, NULL, NULL);
1830 if (!error) {
1831 struct vattr at;
1832
1833 lookup_ok = 1;
1834 at.va_mask = AT_MODE;
1835 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1836 if (!error)
1837 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1838 VN_RELE(tvp);
1839 tvp = NULL;
1840 }
1841 }
1842
1843 if (!lookup_ok) {
1844 if (rdonly(exi, req)) {
1845 error = EROFS;
1846 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1847 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1848 error = EPERM;
1849 } else {
1850 error = 0;
1851 }
1852 }
1853
1854 /*
1855 * If file size is being modified on an already existing file
1856 * make sure that there are no conflicting non-blocking mandatory
1857 * locks in the region being manipulated. Return EACCES if there
1858 * are conflicting locks.
1859 */
1860 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1861 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1862 NULL, NULL, NULL);
1863
1864 if (!lookuperr &&
1865 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1866 VN_RELE(tvp);
1867 curthread->t_flag |= T_WOULDBLOCK;
1868 goto out;
1869 }
1870
1871 if (!lookuperr && nbl_need_check(tvp)) {
1872 /*
1873 * The file exists. Now check if it has any
1874 * conflicting non-blocking mandatory locks
1875 * in the region being changed.
1876 */
1877 struct vattr bva;
1878 u_offset_t offset;
1879 ssize_t length;
1880
1881 nbl_start_crit(tvp, RW_READER);
1882 in_crit = 1;
1883
1884 bva.va_mask = AT_SIZE;
1885 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1886 if (!error) {
1887 if (va.va_size < bva.va_size) {
1888 offset = va.va_size;
1889 length = bva.va_size - va.va_size;
1890 } else {
1891 offset = bva.va_size;
1892 length = va.va_size - bva.va_size;
1893 }
1894 if (length) {
1895 if (nbl_conflict(tvp, NBL_WRITE,
1896 offset, length, 0, NULL)) {
1897 error = EACCES;
1898 }
1899 }
1900 }
1901 if (error) {
1902 nbl_end_crit(tvp);
1903 VN_RELE(tvp);
1904 in_crit = 0;
1905 }
1906 } else if (tvp != NULL) {
1907 VN_RELE(tvp);
1908 }
1909 }
1910
1911 if (!error) {
1912 /*
1913 * If filesystem is shared with nosuid the remove any
1914 * setuid/setgid bits on create.
1915 */
1916 if (va.va_type == VREG &&
1917 exi->exi_export.ex_flags & EX_NOSUID)
1918 va.va_mode &= ~(VSUID | VSGID);
1919
1920 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1921 NULL, NULL);
1922
1923 if (!error) {
1924
1925 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1926 trunc = TRUE;
1927 else
1928 trunc = FALSE;
1929
1930 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1931 VN_RELE(vp);
1932 curthread->t_flag |= T_WOULDBLOCK;
1933 goto out;
1934 }
1935 va.va_mask = AT_ALL;
1936
1937 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1938
1939 /* check for overflows */
1940 if (!error) {
1941 acl_perm(vp, exi, &va, cr);
1942 error = vattr_to_nattr(&va, &dr->dr_attr);
1943 if (!error) {
1944 error = makefh(&dr->dr_fhandle, vp,
1945 exi);
1946 }
1947 }
1948 /*
1949 * Force modified metadata out to stable storage.
1950 *
1951 * if a underlying vp exists, pass it to VOP_FSYNC
1952 */
1953 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1954 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1955 else
1956 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1957 VN_RELE(vp);
1958 }
1959
1960 if (in_crit) {
1961 nbl_end_crit(tvp);
1962 VN_RELE(tvp);
1963 }
1964 }
1965
1966 /*
1967 * Force modified data and metadata out to stable storage.
1968 */
1969 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1970
1971 out:
1972
1973 VN_RELE(dvp);
1974
1975 dr->dr_status = puterrno(error);
1976
1977 if (name != args->ca_da.da_name)
1978 kmem_free(name, MAXPATHLEN);
1979 }
1980 void *
1981 rfs_create_getfh(struct nfscreatargs *args)
1982 {
1983 return (args->ca_da.da_fhandle);
1984 }
1985
1986 /*
1987 * Remove a file.
1988 * Remove named file from parent directory.
1989 */
1990 void
1991 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
1992 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1993 {
1994 int error = 0;
1995 vnode_t *vp;
1996 vnode_t *targvp;
1997 int in_crit = 0;
1998
1999 /*
2000 * Disallow NULL paths
2001 */
2002 if (da->da_name == NULL || *da->da_name == '\0') {
2003 *status = NFSERR_ACCES;
2004 return;
2005 }
2006
2007 vp = nfs_fhtovp(da->da_fhandle, exi);
2008 if (vp == NULL) {
2009 *status = NFSERR_STALE;
2010 return;
2011 }
2012
2013 if (rdonly(exi, req)) {
2014 VN_RELE(vp);
2015 *status = NFSERR_ROFS;
2016 return;
2017 }
2018
2019 /*
2020 * Check for a conflict with a non-blocking mandatory share reservation.
2021 */
2022 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2023 NULL, cr, NULL, NULL, NULL);
2024 if (error != 0) {
2025 VN_RELE(vp);
2026 *status = puterrno(error);
2027 return;
2028 }
2029
2030 /*
2031 * If the file is delegated to an v4 client, then initiate
2032 * recall and drop this request (by setting T_WOULDBLOCK).
2033 * The client will eventually re-transmit the request and
2034 * (hopefully), by then, the v4 client will have returned
2035 * the delegation.
2036 */
2037
2038 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2039 VN_RELE(vp);
2040 VN_RELE(targvp);
2041 curthread->t_flag |= T_WOULDBLOCK;
2042 return;
2043 }
2044
2045 if (nbl_need_check(targvp)) {
2046 nbl_start_crit(targvp, RW_READER);
2047 in_crit = 1;
2048 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2049 error = EACCES;
2050 goto out;
2051 }
2052 }
2053
2054 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2055
2056 /*
2057 * Force modified data and metadata out to stable storage.
2058 */
2059 (void) VOP_FSYNC(vp, 0, cr, NULL);
2060
2061 out:
2062 if (in_crit)
2063 nbl_end_crit(targvp);
2064 VN_RELE(targvp);
2065 VN_RELE(vp);
2066
2067 *status = puterrno(error);
2068
2069 }
2070
2071 void *
2072 rfs_remove_getfh(struct nfsdiropargs *da)
2073 {
2074 return (da->da_fhandle);
2075 }
2076
2077 /*
2078 * rename a file
2079 * Give a file (from) a new name (to).
2080 */
2081 void
2082 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2083 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2084 {
2085 int error = 0;
2086 vnode_t *fromvp;
2087 vnode_t *tovp;
2088 struct exportinfo *to_exi;
2089 fhandle_t *fh;
2090 vnode_t *srcvp;
2091 vnode_t *targvp;
2092 int in_crit = 0;
2093
2094 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2095 if (fromvp == NULL) {
2096 *status = NFSERR_STALE;
2097 return;
2098 }
2099
2100 fh = args->rna_to.da_fhandle;
2101 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2102 if (to_exi == NULL) {
2103 VN_RELE(fromvp);
2104 *status = NFSERR_ACCES;
2105 return;
2106 }
2107 exi_rele(to_exi);
2108
2109 if (to_exi != exi) {
2110 VN_RELE(fromvp);
2111 *status = NFSERR_XDEV;
2112 return;
2113 }
2114
2115 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2116 if (tovp == NULL) {
2117 VN_RELE(fromvp);
2118 *status = NFSERR_STALE;
2119 return;
2120 }
2121
2122 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2123 VN_RELE(tovp);
2124 VN_RELE(fromvp);
2125 *status = NFSERR_NOTDIR;
2126 return;
2127 }
2128
2129 /*
2130 * Disallow NULL paths
2131 */
2132 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2133 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2134 VN_RELE(tovp);
2135 VN_RELE(fromvp);
2136 *status = NFSERR_ACCES;
2137 return;
2138 }
2139
2140 if (rdonly(exi, req)) {
2141 VN_RELE(tovp);
2142 VN_RELE(fromvp);
2143 *status = NFSERR_ROFS;
2144 return;
2145 }
2146
2147 /*
2148 * Check for a conflict with a non-blocking mandatory share reservation.
2149 */
2150 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2151 NULL, cr, NULL, NULL, NULL);
2152 if (error != 0) {
2153 VN_RELE(tovp);
2154 VN_RELE(fromvp);
2155 *status = puterrno(error);
2156 return;
2157 }
2158
2159 /* Check for delegations on the source file */
2160
2161 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2162 VN_RELE(tovp);
2163 VN_RELE(fromvp);
2164 VN_RELE(srcvp);
2165 curthread->t_flag |= T_WOULDBLOCK;
2166 return;
2167 }
2168
2169 /* Check for delegation on the file being renamed over, if it exists */
2170
2171 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2172 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2173 NULL, NULL, NULL) == 0) {
2174
2175 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2176 VN_RELE(tovp);
2177 VN_RELE(fromvp);
2178 VN_RELE(srcvp);
2179 VN_RELE(targvp);
2180 curthread->t_flag |= T_WOULDBLOCK;
2181 return;
2182 }
2183 VN_RELE(targvp);
2184 }
2185
2186
2187 if (nbl_need_check(srcvp)) {
2188 nbl_start_crit(srcvp, RW_READER);
2189 in_crit = 1;
2190 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2191 error = EACCES;
2192 goto out;
2193 }
2194 }
2195
2196 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2197 tovp, args->rna_to.da_name, cr, NULL, 0);
2198
2199 if (error == 0)
2200 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2201 strlen(args->rna_to.da_name));
2202
2203 /*
2204 * Force modified data and metadata out to stable storage.
2205 */
2206 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2207 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2208
2209 out:
2210 if (in_crit)
2211 nbl_end_crit(srcvp);
2212 VN_RELE(srcvp);
2213 VN_RELE(tovp);
2214 VN_RELE(fromvp);
2215
2216 *status = puterrno(error);
2217
2218 }
2219 void *
2220 rfs_rename_getfh(struct nfsrnmargs *args)
2221 {
2222 return (args->rna_from.da_fhandle);
2223 }
2224
2225 /*
2226 * Link to a file.
2227 * Create a file (to) which is a hard link to the given file (from).
2228 */
2229 void
2230 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2231 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2232 {
2233 int error;
2234 vnode_t *fromvp;
2235 vnode_t *tovp;
2236 struct exportinfo *to_exi;
2237 fhandle_t *fh;
2238
2239 fromvp = nfs_fhtovp(args->la_from, exi);
2240 if (fromvp == NULL) {
2241 *status = NFSERR_STALE;
2242 return;
2243 }
2244
2245 fh = args->la_to.da_fhandle;
2246 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2247 if (to_exi == NULL) {
2248 VN_RELE(fromvp);
2249 *status = NFSERR_ACCES;
2250 return;
2251 }
2252 exi_rele(to_exi);
2253
2254 if (to_exi != exi) {
2255 VN_RELE(fromvp);
2256 *status = NFSERR_XDEV;
2257 return;
2258 }
2259
2260 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2261 if (tovp == NULL) {
2262 VN_RELE(fromvp);
2263 *status = NFSERR_STALE;
2264 return;
2265 }
2266
2267 if (tovp->v_type != VDIR) {
2268 VN_RELE(tovp);
2269 VN_RELE(fromvp);
2270 *status = NFSERR_NOTDIR;
2271 return;
2272 }
2273 /*
2274 * Disallow NULL paths
2275 */
2276 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2277 VN_RELE(tovp);
2278 VN_RELE(fromvp);
2279 *status = NFSERR_ACCES;
2280 return;
2281 }
2282
2283 if (rdonly(exi, req)) {
2284 VN_RELE(tovp);
2285 VN_RELE(fromvp);
2286 *status = NFSERR_ROFS;
2287 return;
2288 }
2289
2290 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2291
2292 /*
2293 * Force modified data and metadata out to stable storage.
2294 */
2295 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2296 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2297
2298 VN_RELE(tovp);
2299 VN_RELE(fromvp);
2300
2301 *status = puterrno(error);
2302
2303 }
2304 void *
2305 rfs_link_getfh(struct nfslinkargs *args)
2306 {
2307 return (args->la_from);
2308 }
2309
2310 /*
2311 * Symbolicly link to a file.
2312 * Create a file (to) with the given attributes which is a symbolic link
2313 * to the given path name (to).
2314 */
2315 void
2316 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2317 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2318 {
2319 int error;
2320 struct vattr va;
2321 vnode_t *vp;
2322 vnode_t *svp;
2323 int lerror;
2324 struct sockaddr *ca;
2325 char *name = NULL;
2326
2327 /*
2328 * Disallow NULL paths
2329 */
2330 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2331 *status = NFSERR_ACCES;
2332 return;
2333 }
2334
2335 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2336 if (vp == NULL) {
2337 *status = NFSERR_STALE;
2338 return;
2339 }
2340
2341 if (rdonly(exi, req)) {
2342 VN_RELE(vp);
2343 *status = NFSERR_ROFS;
2344 return;
2345 }
2346
2347 error = sattr_to_vattr(args->sla_sa, &va);
2348 if (error) {
2349 VN_RELE(vp);
2350 *status = puterrno(error);
2351 return;
2352 }
2353
2354 if (!(va.va_mask & AT_MODE)) {
2355 VN_RELE(vp);
2356 *status = NFSERR_INVAL;
2357 return;
2358 }
2359
2360 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2361 name = nfscmd_convname(ca, exi, args->sla_tnm,
2362 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2363
2364 if (name == NULL) {
2365 *status = NFSERR_ACCES;
2366 return;
2367 }
2368
2369 va.va_type = VLNK;
2370 va.va_mask |= AT_TYPE;
2371
2372 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2373
2374 /*
2375 * Force new data and metadata out to stable storage.
2376 */
2377 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2378 NULL, cr, NULL, NULL, NULL);
2379
2380 if (!lerror) {
2381 (void) VOP_FSYNC(svp, 0, cr, NULL);
2382 VN_RELE(svp);
2383 }
2384
2385 /*
2386 * Force modified data and metadata out to stable storage.
2387 */
2388 (void) VOP_FSYNC(vp, 0, cr, NULL);
2389
2390 VN_RELE(vp);
2391
2392 *status = puterrno(error);
2393 if (name != args->sla_tnm)
2394 kmem_free(name, MAXPATHLEN);
2395
2396 }
2397 void *
2398 rfs_symlink_getfh(struct nfsslargs *args)
2399 {
2400 return (args->sla_from.da_fhandle);
2401 }
2402
2403 /*
2404 * Make a directory.
2405 * Create a directory with the given name, parent directory, and attributes.
2406 * Returns a file handle and attributes for the new directory.
2407 */
2408 void
2409 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2410 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2411 {
2412 int error;
2413 struct vattr va;
2414 vnode_t *dvp = NULL;
2415 vnode_t *vp;
2416 char *name = args->ca_da.da_name;
2417
2418 /*
2419 * Disallow NULL paths
2420 */
2421 if (name == NULL || *name == '\0') {
2422 dr->dr_status = NFSERR_ACCES;
2423 return;
2424 }
2425
2426 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2427 if (vp == NULL) {
2428 dr->dr_status = NFSERR_STALE;
2429 return;
2430 }
2431
2432 if (rdonly(exi, req)) {
2433 VN_RELE(vp);
2434 dr->dr_status = NFSERR_ROFS;
2435 return;
2436 }
2437
2438 error = sattr_to_vattr(args->ca_sa, &va);
2439 if (error) {
2440 VN_RELE(vp);
2441 dr->dr_status = puterrno(error);
2442 return;
2443 }
2444
2445 if (!(va.va_mask & AT_MODE)) {
2446 VN_RELE(vp);
2447 dr->dr_status = NFSERR_INVAL;
2448 return;
2449 }
2450
2451 va.va_type = VDIR;
2452 va.va_mask |= AT_TYPE;
2453
2454 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2455
2456 if (!error) {
2457 /*
2458 * Attribtutes of the newly created directory should
2459 * be returned to the client.
2460 */
2461 va.va_mask = AT_ALL; /* We want everything */
2462 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2463
2464 /* check for overflows */
2465 if (!error) {
2466 acl_perm(vp, exi, &va, cr);
2467 error = vattr_to_nattr(&va, &dr->dr_attr);
2468 if (!error) {
2469 error = makefh(&dr->dr_fhandle, dvp, exi);
2470 }
2471 }
2472 /*
2473 * Force new data and metadata out to stable storage.
2474 */
2475 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2476 VN_RELE(dvp);
2477 }
2478
2479 /*
2480 * Force modified data and metadata out to stable storage.
2481 */
2482 (void) VOP_FSYNC(vp, 0, cr, NULL);
2483
2484 VN_RELE(vp);
2485
2486 dr->dr_status = puterrno(error);
2487
2488 }
2489 void *
2490 rfs_mkdir_getfh(struct nfscreatargs *args)
2491 {
2492 return (args->ca_da.da_fhandle);
2493 }
2494
2495 /*
2496 * Remove a directory.
2497 * Remove the given directory name from the given parent directory.
2498 */
2499 void
2500 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2501 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2502 {
2503 int error;
2504 vnode_t *vp;
2505
2506
2507 /*
2508 * Disallow NULL paths
2509 */
2510 if (da->da_name == NULL || *da->da_name == '\0') {
2511 *status = NFSERR_ACCES;
2512 return;
2513 }
2514
2515 vp = nfs_fhtovp(da->da_fhandle, exi);
2516 if (vp == NULL) {
2517 *status = NFSERR_STALE;
2518 return;
2519 }
2520
2521 if (rdonly(exi, req)) {
2522 VN_RELE(vp);
2523 *status = NFSERR_ROFS;
2524 return;
2525 }
2526
2527 /*
2528 * VOP_RMDIR now takes a new third argument (the current
2529 * directory of the process). That's because someone
2530 * wants to return EINVAL if one tries to remove ".".
2531 * Of course, NFS servers have no idea what their
2532 * clients' current directories are. We fake it by
2533 * supplying a vnode known to exist and illegal to
2534 * remove.
2535 */
2536 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2537
2538 /*
2539 * Force modified data and metadata out to stable storage.
2540 */
2541 (void) VOP_FSYNC(vp, 0, cr, NULL);
2542
2543 VN_RELE(vp);
2544
2545 /*
2546 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2547 * if the directory is not empty. A System V NFS server
2548 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2549 * over the wire.
2550 */
2551 if (error == EEXIST)
2552 *status = NFSERR_NOTEMPTY;
2553 else
2554 *status = puterrno(error);
2555
2556 }
2557 void *
2558 rfs_rmdir_getfh(struct nfsdiropargs *da)
2559 {
2560 return (da->da_fhandle);
2561 }
2562
2563 /* ARGSUSED */
2564 void
2565 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2566 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2567 {
2568 int error;
2569 int iseof;
2570 struct iovec iov;
2571 struct uio uio;
2572 vnode_t *vp;
2573 char *ndata = NULL;
2574 struct sockaddr *ca;
2575 size_t nents;
2576 int ret;
2577
2578 vp = nfs_fhtovp(&rda->rda_fh, exi);
2579 if (vp == NULL) {
2580 rd->rd_entries = NULL;
2581 rd->rd_status = NFSERR_STALE;
2582 return;
2583 }
2584
2585 if (vp->v_type != VDIR) {
2586 VN_RELE(vp);
2587 rd->rd_entries = NULL;
2588 rd->rd_status = NFSERR_NOTDIR;
2589 return;
2590 }
2591
2592 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2593
2594 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2595
2596 if (error) {
2597 rd->rd_entries = NULL;
2598 goto bad;
2599 }
2600
2601 if (rda->rda_count == 0) {
2602 rd->rd_entries = NULL;
2603 rd->rd_size = 0;
2604 rd->rd_eof = FALSE;
2605 goto bad;
2606 }
2607
2608 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2609
2610 /*
2611 * Allocate data for entries. This will be freed by rfs_rddirfree.
2612 */
2613 rd->rd_bufsize = (uint_t)rda->rda_count;
2614 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2615
2616 /*
2617 * Set up io vector to read directory data
2618 */
2619 iov.iov_base = (caddr_t)rd->rd_entries;
2620 iov.iov_len = rda->rda_count;
2621 uio.uio_iov = &iov;
2622 uio.uio_iovcnt = 1;
2623 uio.uio_segflg = UIO_SYSSPACE;
2624 uio.uio_extflg = UIO_COPY_CACHED;
2625 uio.uio_loffset = (offset_t)rda->rda_offset;
2626 uio.uio_resid = rda->rda_count;
2627
2628 /*
2629 * read directory
2630 */
2631 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2632
2633 /*
2634 * Clean up
2635 */
2636 if (!error) {
2637 /*
2638 * set size and eof
2639 */
2640 if (uio.uio_resid == rda->rda_count) {
2641 rd->rd_size = 0;
2642 rd->rd_eof = TRUE;
2643 } else {
2644 rd->rd_size = (uint32_t)(rda->rda_count -
2645 uio.uio_resid);
2646 rd->rd_eof = iseof ? TRUE : FALSE;
2647 }
2648 }
2649
2650 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2651 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2652 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2653 rda->rda_count, &ndata);
2654
2655 if (ret != 0) {
2656 size_t dropbytes;
2657 /*
2658 * We had to drop one or more entries in order to fit
2659 * during the character conversion. We need to patch
2660 * up the size and eof info.
2661 */
2662 if (rd->rd_eof)
2663 rd->rd_eof = FALSE;
2664 dropbytes = nfscmd_dropped_entrysize(
2665 (struct dirent64 *)rd->rd_entries, nents, ret);
2666 rd->rd_size -= dropbytes;
2667 }
2668 if (ndata == NULL) {
2669 ndata = (char *)rd->rd_entries;
2670 } else if (ndata != (char *)rd->rd_entries) {
2671 kmem_free(rd->rd_entries, rd->rd_bufsize);
2672 rd->rd_entries = (void *)ndata;
2673 rd->rd_bufsize = rda->rda_count;
2674 }
2675
2676 bad:
2677 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2678
2679 #if 0 /* notyet */
2680 /*
2681 * Don't do this. It causes local disk writes when just
2682 * reading the file and the overhead is deemed larger
2683 * than the benefit.
2684 */
2685 /*
2686 * Force modified metadata out to stable storage.
2687 */
2688 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2689 #endif
2690
2691 VN_RELE(vp);
2692
2693 rd->rd_status = puterrno(error);
2694
2695 }
2696 void *
2697 rfs_readdir_getfh(struct nfsrddirargs *rda)
2698 {
2699 return (&rda->rda_fh);
2700 }
2701 void
2702 rfs_rddirfree(struct nfsrddirres *rd)
2703 {
2704 if (rd->rd_entries != NULL)
2705 kmem_free(rd->rd_entries, rd->rd_bufsize);
2706 }
2707
2708 /* ARGSUSED */
2709 void
2710 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2711 struct svc_req *req, cred_t *cr)
2712 {
2713 int error;
2714 struct statvfs64 sb;
2715 vnode_t *vp;
2716
2717 vp = nfs_fhtovp(fh, exi);
2718 if (vp == NULL) {
2719 fs->fs_status = NFSERR_STALE;
2720 return;
2721 }
2722
2723 error = VFS_STATVFS(vp->v_vfsp, &sb);
2724
2725 if (!error) {
2726 fs->fs_tsize = nfstsize();
2727 fs->fs_bsize = sb.f_frsize;
2728 fs->fs_blocks = sb.f_blocks;
2729 fs->fs_bfree = sb.f_bfree;
2730 fs->fs_bavail = sb.f_bavail;
2731 }
2732
2733 VN_RELE(vp);
2734
2735 fs->fs_status = puterrno(error);
2736
2737 }
2738 void *
2739 rfs_statfs_getfh(fhandle_t *fh)
2740 {
2741 return (fh);
2742 }
2743
2744 static int
2745 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2746 {
2747 vap->va_mask = 0;
2748
2749 /*
2750 * There was a sign extension bug in some VFS based systems
2751 * which stored the mode as a short. When it would get
2752 * assigned to a u_long, no sign extension would occur.
2753 * It needed to, but this wasn't noticed because sa_mode
2754 * would then get assigned back to the short, thus ignoring
2755 * the upper 16 bits of sa_mode.
2756 *
2757 * To make this implementation work for both broken
2758 * clients and good clients, we check for both versions
2759 * of the mode.
2760 */
2761 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2762 sa->sa_mode != (uint32_t)-1) {
2763 vap->va_mask |= AT_MODE;
2764 vap->va_mode = sa->sa_mode;
2765 }
2766 if (sa->sa_uid != (uint32_t)-1) {
2767 vap->va_mask |= AT_UID;
2768 vap->va_uid = sa->sa_uid;
2769 }
2770 if (sa->sa_gid != (uint32_t)-1) {
2771 vap->va_mask |= AT_GID;
2772 vap->va_gid = sa->sa_gid;
2773 }
2774 if (sa->sa_size != (uint32_t)-1) {
2775 vap->va_mask |= AT_SIZE;
2776 vap->va_size = sa->sa_size;
2777 }
2778 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2779 sa->sa_atime.tv_usec != (int32_t)-1) {
2780 #ifndef _LP64
2781 /* return error if time overflow */
2782 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2783 return (EOVERFLOW);
2784 #endif
2785 vap->va_mask |= AT_ATIME;
2786 /*
2787 * nfs protocol defines times as unsigned so don't extend sign,
2788 * unless sysadmin set nfs_allow_preepoch_time.
2789 */
2790 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2791 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2792 }
2793 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2794 sa->sa_mtime.tv_usec != (int32_t)-1) {
2795 #ifndef _LP64
2796 /* return error if time overflow */
2797 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2798 return (EOVERFLOW);
2799 #endif
2800 vap->va_mask |= AT_MTIME;
2801 /*
2802 * nfs protocol defines times as unsigned so don't extend sign,
2803 * unless sysadmin set nfs_allow_preepoch_time.
2804 */
2805 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2806 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2807 }
2808 return (0);
2809 }
2810
2811 static enum nfsftype vt_to_nf[] = {
2812 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2813 };
2814
2815 /*
2816 * check the following fields for overflow: nodeid, size, and time.
2817 * There could be a problem when converting 64-bit LP64 fields
2818 * into 32-bit ones. Return an error if there is an overflow.
2819 */
2820 int
2821 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2822 {
2823 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2824 na->na_type = vt_to_nf[vap->va_type];
2825
2826 if (vap->va_mode == (unsigned short) -1)
2827 na->na_mode = (uint32_t)-1;
2828 else
2829 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2830
2831 if (vap->va_uid == (unsigned short)(-1))
2832 na->na_uid = (uint32_t)(-1);
2833 else if (vap->va_uid == UID_NOBODY)
2834 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2835 else
2836 na->na_uid = vap->va_uid;
2837
2838 if (vap->va_gid == (unsigned short)(-1))
2839 na->na_gid = (uint32_t)-1;
2840 else if (vap->va_gid == GID_NOBODY)
2841 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2842 else
2843 na->na_gid = vap->va_gid;
2844
2845 /*
2846 * Do we need to check fsid for overflow? It is 64-bit in the
2847 * vattr, but are bigger than 32 bit values supported?
2848 */
2849 na->na_fsid = vap->va_fsid;
2850
2851 na->na_nodeid = vap->va_nodeid;
2852
2853 /*
2854 * Check to make sure that the nodeid is representable over the
2855 * wire without losing bits.
2856 */
2857 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2858 return (EFBIG);
2859 na->na_nlink = vap->va_nlink;
2860
2861 /*
2862 * Check for big files here, instead of at the caller. See
2863 * comments in cstat for large special file explanation.
2864 */
2865 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2866 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2867 return (EFBIG);
2868 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2869 /* UNKNOWN_SIZE | OVERFLOW */
2870 na->na_size = MAXOFF32_T;
2871 } else
2872 na->na_size = vap->va_size;
2873 } else
2874 na->na_size = vap->va_size;
2875
2876 /*
2877 * If the vnode times overflow the 32-bit times that NFS2
2878 * uses on the wire then return an error.
2879 */
2880 if (!NFS_VAP_TIME_OK(vap)) {
2881 return (EOVERFLOW);
2882 }
2883 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2884 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2885
2886 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2887 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2888
2889 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2890 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2891
2892 /*
2893 * If the dev_t will fit into 16 bits then compress
2894 * it, otherwise leave it alone. See comments in
2895 * nfs_client.c.
2896 */
2897 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2898 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2899 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2900 else
2901 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2902
2903 na->na_blocks = vap->va_nblocks;
2904 na->na_blocksize = vap->va_blksize;
2905
2906 /*
2907 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2908 * over-the-wire protocols for named-pipe vnodes. It remaps the
2909 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2910 *
2911 * BUYER BEWARE:
2912 * If you are porting the NFS to a non-Sun server, you probably
2913 * don't want to include the following block of code. The
2914 * over-the-wire special file types will be changing with the
2915 * NFS Protocol Revision.
2916 */
2917 if (vap->va_type == VFIFO)
2918 NA_SETFIFO(na);
2919 return (0);
2920 }
2921
2922 /*
2923 * acl v2 support: returns approximate permission.
2924 * default: returns minimal permission (more restrictive)
2925 * aclok: returns maximal permission (less restrictive)
2926 * This routine changes the permissions that are alaredy in *va.
2927 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2928 * CLASS_OBJ is always the same as GROUP_OBJ entry.
2929 */
2930 static void
2931 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2932 {
2933 vsecattr_t vsa;
2934 int aclcnt;
2935 aclent_t *aclentp;
2936 mode_t mask_perm;
2937 mode_t grp_perm;
2938 mode_t other_perm;
2939 mode_t other_orig;
2940 int error;
2941
2942 /* dont care default acl */
2943 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2944 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2945
2946 if (!error) {
2947 aclcnt = vsa.vsa_aclcnt;
2948 if (aclcnt > MIN_ACL_ENTRIES) {
2949 /* non-trivial ACL */
2950 aclentp = vsa.vsa_aclentp;
2951 if (exi->exi_export.ex_flags & EX_ACLOK) {
2952 /* maximal permissions */
2953 grp_perm = 0;
2954 other_perm = 0;
2955 for (; aclcnt > 0; aclcnt--, aclentp++) {
2956 switch (aclentp->a_type) {
2957 case USER_OBJ:
2958 break;
2959 case USER:
2960 grp_perm |=
2961 aclentp->a_perm << 3;
2962 other_perm |= aclentp->a_perm;
2963 break;
2964 case GROUP_OBJ:
2965 grp_perm |=
2966 aclentp->a_perm << 3;
2967 break;
2968 case GROUP:
2969 other_perm |= aclentp->a_perm;
2970 break;
2971 case OTHER_OBJ:
2972 other_orig = aclentp->a_perm;
2973 break;
2974 case CLASS_OBJ:
2975 mask_perm = aclentp->a_perm;
2976 break;
2977 default:
2978 break;
2979 }
2980 }
2981 grp_perm &= mask_perm << 3;
2982 other_perm &= mask_perm;
2983 other_perm |= other_orig;
2984
2985 } else {
2986 /* minimal permissions */
2987 grp_perm = 070;
2988 other_perm = 07;
2989 for (; aclcnt > 0; aclcnt--, aclentp++) {
2990 switch (aclentp->a_type) {
2991 case USER_OBJ:
2992 break;
2993 case USER:
2994 case CLASS_OBJ:
2995 grp_perm &=
2996 aclentp->a_perm << 3;
2997 other_perm &=
2998 aclentp->a_perm;
2999 break;
3000 case GROUP_OBJ:
3001 grp_perm &=
3002 aclentp->a_perm << 3;
3003 break;
3004 case GROUP:
3005 other_perm &=
3006 aclentp->a_perm;
3007 break;
3008 case OTHER_OBJ:
3009 other_perm &=
3010 aclentp->a_perm;
3011 break;
3012 default:
3013 break;
3014 }
3015 }
3016 }
3017 /* copy to va */
3018 va->va_mode &= ~077;
3019 va->va_mode |= grp_perm | other_perm;
3020 }
3021 if (vsa.vsa_aclcnt)
3022 kmem_free(vsa.vsa_aclentp,
3023 vsa.vsa_aclcnt * sizeof (aclent_t));
3024 }
3025 }
3026
3027 void
3028 rfs_srvrinit(void)
3029 {
3030 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3031 nfs2_srv_caller_id = fs_new_caller_id();
3032 }
3033
3034 void
3035 rfs_srvrfini(void)
3036 {
3037 mutex_destroy(&rfs_async_write_lock);
3038 }
3039
3040 static int
3041 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3042 {
3043 struct clist *wcl;
3044 int wlist_len;
3045 uint32_t count = rr->rr_count;
3046
3047 wcl = ra->ra_wlist;
3048
3049 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3050 return (FALSE);
3051 }
3052
3053 wcl = ra->ra_wlist;
3054 rr->rr_ok.rrok_wlist_len = wlist_len;
3055 rr->rr_ok.rrok_wlist = wcl;
3056
3057 return (TRUE);
3058 }