1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
26 /* All Rights Reserved */
27
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/cred.h>
32 #include <sys/buf.h>
33 #include <sys/vfs.h>
34 #include <sys/vnode.h>
35 #include <sys/uio.h>
36 #include <sys/errno.h>
37 #include <sys/sysmacros.h>
38 #include <sys/statvfs.h>
39 #include <sys/kmem.h>
40 #include <sys/dirent.h>
41 #include <sys/cmn_err.h>
42 #include <sys/debug.h>
43 #include <sys/systeminfo.h>
44 #include <sys/flock.h>
45 #include <sys/nbmlock.h>
46 #include <sys/policy.h>
47 #include <sys/sdt.h>
48
49 #include <rpc/types.h>
50 #include <rpc/auth.h>
51 #include <rpc/svc.h>
52 #include <rpc/rpc_rdma.h>
53
54 #include <nfs/nfs.h>
55 #include <nfs/export.h>
56 #include <nfs/nfs_cmd.h>
57
58 #include <sys/strsubr.h>
59
60 #include <sys/tsol/label.h>
61 #include <sys/tsol/tndb.h>
62
63 #include <sys/zone.h>
64
65 #include <inet/ip.h>
66 #include <inet/ip6.h>
67
68 /*
69 * These are the interface routines for the server side of the
70 * Network File System. See the NFS version 3 protocol specification
71 * for a description of this interface.
72 */
73
74 static writeverf3 write3verf;
75
76 static int sattr3_to_vattr(sattr3 *, struct vattr *);
77 static int vattr_to_fattr3(struct vattr *, fattr3 *);
78 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
79 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
80 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
81 static int rdma_setup_read_data3(READ3args *, READ3resok *);
82
83 extern int nfs_loaned_buffers;
84
85 u_longlong_t nfs3_srv_caller_id;
86
87 /* ARGSUSED */
88 void
89 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
90 struct svc_req *req, cred_t *cr)
91 {
92 int error;
93 vnode_t *vp;
94 struct vattr va;
95
96 vp = nfs3_fhtovp(&args->object, exi);
97
98 DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
99 cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
100
101 if (vp == NULL) {
102 error = ESTALE;
103 goto out;
104 }
105
106 va.va_mask = AT_ALL;
107 error = rfs4_delegated_getattr(vp, &va, 0, cr);
108
109 if (!error) {
110 /* Lie about the object type for a referral */
111 if (vn_is_nfs_reparse(vp, cr))
112 va.va_type = VLNK;
113
114 /* overflow error if time or size is out of range */
115 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
116 if (error)
117 goto out;
118 resp->status = NFS3_OK;
119
120 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
121 cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
122
123 VN_RELE(vp);
124
125 return;
126 }
127
128 out:
129 if (curthread->t_flag & T_WOULDBLOCK) {
130 curthread->t_flag &= ~T_WOULDBLOCK;
131 resp->status = NFS3ERR_JUKEBOX;
132 } else
133 resp->status = puterrno3(error);
134
135 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
136 cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
137
138 if (vp != NULL)
139 VN_RELE(vp);
140 }
141
142 void *
143 rfs3_getattr_getfh(GETATTR3args *args)
144 {
145
146 return (&args->object);
147 }
148
149 void
150 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
151 struct svc_req *req, cred_t *cr)
152 {
153 int error;
154 vnode_t *vp;
155 struct vattr *bvap;
156 struct vattr bva;
157 struct vattr *avap;
158 struct vattr ava;
159 int flag;
160 int in_crit = 0;
161 struct flock64 bf;
162 caller_context_t ct;
163
164 bvap = NULL;
165 avap = NULL;
166
167 vp = nfs3_fhtovp(&args->object, exi);
168
169 DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
170 cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
171
172 if (vp == NULL) {
173 error = ESTALE;
174 goto out;
175 }
176
177 error = sattr3_to_vattr(&args->new_attributes, &ava);
178 if (error)
179 goto out;
180
181 if (is_system_labeled()) {
182 bslabel_t *clabel = req->rq_label;
183
184 ASSERT(clabel != NULL);
185 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
186 "got client label from request(1)", struct svc_req *, req);
187
188 if (!blequal(&l_admin_low->tsl_label, clabel)) {
189 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
190 exi)) {
191 resp->status = NFS3ERR_ACCES;
192 goto out1;
193 }
194 }
195 }
196
197 /*
198 * We need to specially handle size changes because of
199 * possible conflicting NBMAND locks. Get into critical
200 * region before VOP_GETATTR, so the size attribute is
201 * valid when checking conflicts.
202 *
203 * Also, check to see if the v4 side of the server has
204 * delegated this file. If so, then we return JUKEBOX to
205 * allow the client to retrasmit its request.
206 */
207 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
208 if (nbl_need_check(vp)) {
209 nbl_start_crit(vp, RW_READER);
210 in_crit = 1;
211 }
212 }
213
214 bva.va_mask = AT_ALL;
215 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
216
217 /*
218 * If we can't get the attributes, then we can't do the
219 * right access checking. So, we'll fail the request.
220 */
221 if (error)
222 goto out;
223
224 bvap = &bva;
225
226 if (rdonly(exi, req) || vn_is_readonly(vp)) {
227 resp->status = NFS3ERR_ROFS;
228 goto out1;
229 }
230
231 if (args->guard.check &&
232 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
233 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
234 resp->status = NFS3ERR_NOT_SYNC;
235 goto out1;
236 }
237
238 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
239 flag = ATTR_UTIME;
240 else
241 flag = 0;
242
243 /*
244 * If the filesystem is exported with nosuid, then mask off
245 * the setuid and setgid bits.
246 */
247 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
248 (exi->exi_export.ex_flags & EX_NOSUID))
249 ava.va_mode &= ~(VSUID | VSGID);
250
251 ct.cc_sysid = 0;
252 ct.cc_pid = 0;
253 ct.cc_caller_id = nfs3_srv_caller_id;
254 ct.cc_flags = CC_DONTBLOCK;
255
256 /*
257 * We need to specially handle size changes because it is
258 * possible for the client to create a file with modes
259 * which indicate read-only, but with the file opened for
260 * writing. If the client then tries to set the size of
261 * the file, then the normal access checking done in
262 * VOP_SETATTR would prevent the client from doing so,
263 * although it should be legal for it to do so. To get
264 * around this, we do the access checking for ourselves
265 * and then use VOP_SPACE which doesn't do the access
266 * checking which VOP_SETATTR does. VOP_SPACE can only
267 * operate on VREG files, let VOP_SETATTR handle the other
268 * extremely rare cases.
269 * Also the client should not be allowed to change the
270 * size of the file if there is a conflicting non-blocking
271 * mandatory lock in the region the change.
272 */
273 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
274 if (in_crit) {
275 u_offset_t offset;
276 ssize_t length;
277
278 if (ava.va_size < bva.va_size) {
279 offset = ava.va_size;
280 length = bva.va_size - ava.va_size;
281 } else {
282 offset = bva.va_size;
283 length = ava.va_size - bva.va_size;
284 }
285 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
286 NULL)) {
287 error = EACCES;
288 goto out;
289 }
290 }
291
292 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
293 ava.va_mask &= ~AT_SIZE;
294 bf.l_type = F_WRLCK;
295 bf.l_whence = 0;
296 bf.l_start = (off64_t)ava.va_size;
297 bf.l_len = 0;
298 bf.l_sysid = 0;
299 bf.l_pid = 0;
300 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
301 (offset_t)ava.va_size, cr, &ct);
302 }
303 }
304
305 if (!error && ava.va_mask)
306 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
307
308 /* check if a monitor detected a delegation conflict */
309 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
310 resp->status = NFS3ERR_JUKEBOX;
311 goto out1;
312 }
313
314 ava.va_mask = AT_ALL;
315 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
316
317 /*
318 * Force modified metadata out to stable storage.
319 */
320 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
321
322 if (error)
323 goto out;
324
325 if (in_crit)
326 nbl_end_crit(vp);
327
328 resp->status = NFS3_OK;
329 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
330
331 DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
332 cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
333
334 VN_RELE(vp);
335
336 return;
337
338 out:
339 if (curthread->t_flag & T_WOULDBLOCK) {
340 curthread->t_flag &= ~T_WOULDBLOCK;
341 resp->status = NFS3ERR_JUKEBOX;
342 } else
343 resp->status = puterrno3(error);
344 out1:
345 DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
346 cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
347
348 if (vp != NULL) {
349 if (in_crit)
350 nbl_end_crit(vp);
351 VN_RELE(vp);
352 }
353 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
354 }
355
356 void *
357 rfs3_setattr_getfh(SETATTR3args *args)
358 {
359
360 return (&args->object);
361 }
362
363 /* ARGSUSED */
364 void
365 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
366 struct svc_req *req, cred_t *cr)
367 {
368 int error;
369 vnode_t *vp;
370 vnode_t *dvp;
371 struct vattr *vap;
372 struct vattr va;
373 struct vattr *dvap;
374 struct vattr dva;
375 nfs_fh3 *fhp;
376 struct sec_ol sec = {0, 0};
377 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
378 struct sockaddr *ca;
379 char *name = NULL;
380
381 dvap = NULL;
382
383 /*
384 * Allow lookups from the root - the default
385 * location of the public filehandle.
386 */
387 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
388 dvp = rootdir;
389 VN_HOLD(dvp);
390
391 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
392 cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
393 } else {
394 dvp = nfs3_fhtovp(&args->what.dir, exi);
395
396 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
397 cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
398
399 if (dvp == NULL) {
400 error = ESTALE;
401 goto out;
402 }
403 }
404
405 dva.va_mask = AT_ALL;
406 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
407
408 if (args->what.name == nfs3nametoolong) {
409 resp->status = NFS3ERR_NAMETOOLONG;
410 goto out1;
411 }
412
413 if (args->what.name == NULL || *(args->what.name) == '\0') {
414 resp->status = NFS3ERR_ACCES;
415 goto out1;
416 }
417
418 fhp = &args->what.dir;
419 if (strcmp(args->what.name, "..") == 0 &&
420 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
421 resp->status = NFS3ERR_NOENT;
422 goto out1;
423 }
424
425 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
426 name = nfscmd_convname(ca, exi, args->what.name,
427 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
428
429 if (name == NULL) {
430 resp->status = NFS3ERR_ACCES;
431 goto out1;
432 }
433
434 exi_hold(exi);
435
436 /*
437 * If the public filehandle is used then allow
438 * a multi-component lookup
439 */
440 if (PUBLIC_FH3(&args->what.dir)) {
441 struct exportinfo *new;
442
443 publicfh_flag = TRUE;
444
445 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
446 &new, &sec);
447
448 if (error == 0) {
449 exi_rele(exi);
450 exi = new;
451 }
452
453 /*
454 * Since WebNFS may bypass MOUNT, we need to ensure this
455 * request didn't come from an unlabeled admin_low client.
456 */
457 if (is_system_labeled() && error == 0) {
458 int addr_type;
459 void *ipaddr;
460 tsol_tpc_t *tp;
461
462 if (ca->sa_family == AF_INET) {
463 addr_type = IPV4_VERSION;
464 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
465 } else if (ca->sa_family == AF_INET6) {
466 addr_type = IPV6_VERSION;
467 ipaddr = &((struct sockaddr_in6 *)
468 ca)->sin6_addr;
469 }
470 tp = find_tpc(ipaddr, addr_type, B_FALSE);
471 if (tp == NULL || tp->tpc_tp.tp_doi !=
472 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
473 SUN_CIPSO) {
474 VN_RELE(vp);
475 resp->status = NFS3ERR_ACCES;
476 error = 1;
477 }
478 if (tp != NULL)
479 TPC_RELE(tp);
480 }
481 } else {
482 error = VOP_LOOKUP(dvp, name, &vp,
483 NULL, 0, NULL, cr, NULL, NULL, NULL);
484 }
485
486 if (name != args->what.name)
487 kmem_free(name, MAXPATHLEN + 1);
488
489 if (error == 0 && vn_ismntpt(vp)) {
490 error = rfs_cross_mnt(&vp, &exi);
491 if (error)
492 VN_RELE(vp);
493 }
494
495 if (is_system_labeled() && error == 0) {
496 bslabel_t *clabel = req->rq_label;
497
498 ASSERT(clabel != NULL);
499 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
500 "got client label from request(1)", struct svc_req *, req);
501
502 if (!blequal(&l_admin_low->tsl_label, clabel)) {
503 if (!do_rfs_label_check(clabel, dvp,
504 DOMINANCE_CHECK, exi)) {
505 VN_RELE(vp);
506 resp->status = NFS3ERR_ACCES;
507 error = 1;
508 }
509 }
510 }
511
512 dva.va_mask = AT_ALL;
513 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
514
515 if (error)
516 goto out;
517
518 if (sec.sec_flags & SEC_QUERY) {
519 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
520 } else {
521 error = makefh3(&resp->resok.object, vp, exi);
522 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
523 auth_weak = TRUE;
524 }
525
526 if (error) {
527 VN_RELE(vp);
528 goto out;
529 }
530
531 va.va_mask = AT_ALL;
532 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
533
534 exi_rele(exi);
535 VN_RELE(vp);
536
537 resp->status = NFS3_OK;
538 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
539 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
540
541 /*
542 * If it's public fh, no 0x81, and client's flavor is
543 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
544 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
545 */
546 if (auth_weak)
547 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
548
549 DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
550 cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
551 VN_RELE(dvp);
552
553 return;
554
555 out:
556 /*
557 * The passed argument exportinfo is released by the
558 * caller, common_dispatch
559 */
560 exi_rele(exi);
561
562 if (curthread->t_flag & T_WOULDBLOCK) {
563 curthread->t_flag &= ~T_WOULDBLOCK;
564 resp->status = NFS3ERR_JUKEBOX;
565 } else
566 resp->status = puterrno3(error);
567 out1:
568 DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
569 cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
570
571 if (dvp != NULL)
572 VN_RELE(dvp);
573 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
574
575 }
576
577 void *
578 rfs3_lookup_getfh(LOOKUP3args *args)
579 {
580
581 return (&args->what.dir);
582 }
583
584 /* ARGSUSED */
585 void
586 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
587 struct svc_req *req, cred_t *cr)
588 {
589 int error;
590 vnode_t *vp;
591 struct vattr *vap;
592 struct vattr va;
593 int checkwriteperm;
594 boolean_t dominant_label = B_FALSE;
595 boolean_t equal_label = B_FALSE;
596 boolean_t admin_low_client;
597
598 vap = NULL;
599
600 vp = nfs3_fhtovp(&args->object, exi);
601
602 DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
603 cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
604
605 if (vp == NULL) {
606 error = ESTALE;
607 goto out;
608 }
609
610 /*
611 * If the file system is exported read only, it is not appropriate
612 * to check write permissions for regular files and directories.
613 * Special files are interpreted by the client, so the underlying
614 * permissions are sent back to the client for interpretation.
615 */
616 if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
617 checkwriteperm = 0;
618 else
619 checkwriteperm = 1;
620
621 /*
622 * We need the mode so that we can correctly determine access
623 * permissions relative to a mandatory lock file. Access to
624 * mandatory lock files is denied on the server, so it might
625 * as well be reflected to the server during the open.
626 */
627 va.va_mask = AT_MODE;
628 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
629 if (error)
630 goto out;
631
632 vap = &va;
633
634 resp->resok.access = 0;
635
636 if (is_system_labeled()) {
637 bslabel_t *clabel = req->rq_label;
638
639 ASSERT(clabel != NULL);
640 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
641 "got client label from request(1)", struct svc_req *, req);
642
643 if (!blequal(&l_admin_low->tsl_label, clabel)) {
644 if ((equal_label = do_rfs_label_check(clabel, vp,
645 EQUALITY_CHECK, exi)) == B_FALSE) {
646 dominant_label = do_rfs_label_check(clabel,
647 vp, DOMINANCE_CHECK, exi);
648 } else
649 dominant_label = B_TRUE;
650 admin_low_client = B_FALSE;
651 } else
652 admin_low_client = B_TRUE;
653 }
654
655 if (args->access & ACCESS3_READ) {
656 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
657 if (error) {
658 if (curthread->t_flag & T_WOULDBLOCK)
659 goto out;
660 } else if (!MANDLOCK(vp, va.va_mode) &&
661 (!is_system_labeled() || admin_low_client ||
662 dominant_label))
663 resp->resok.access |= ACCESS3_READ;
664 }
665 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
666 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
667 if (error) {
668 if (curthread->t_flag & T_WOULDBLOCK)
669 goto out;
670 } else if (!is_system_labeled() || admin_low_client ||
671 dominant_label)
672 resp->resok.access |= ACCESS3_LOOKUP;
673 }
674 if (checkwriteperm &&
675 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
676 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
677 if (error) {
678 if (curthread->t_flag & T_WOULDBLOCK)
679 goto out;
680 } else if (!MANDLOCK(vp, va.va_mode) &&
681 (!is_system_labeled() || admin_low_client || equal_label)) {
682 resp->resok.access |=
683 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
684 }
685 }
686 if (checkwriteperm &&
687 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
688 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
689 if (error) {
690 if (curthread->t_flag & T_WOULDBLOCK)
691 goto out;
692 } else if (!is_system_labeled() || admin_low_client ||
693 equal_label)
694 resp->resok.access |= ACCESS3_DELETE;
695 }
696 if (args->access & ACCESS3_EXECUTE) {
697 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
698 if (error) {
699 if (curthread->t_flag & T_WOULDBLOCK)
700 goto out;
701 } else if (!MANDLOCK(vp, va.va_mode) &&
702 (!is_system_labeled() || admin_low_client ||
703 dominant_label))
704 resp->resok.access |= ACCESS3_EXECUTE;
705 }
706
707 va.va_mask = AT_ALL;
708 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
709
710 resp->status = NFS3_OK;
711 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
712
713 DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
714 cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
715
716 VN_RELE(vp);
717
718 return;
719
720 out:
721 if (curthread->t_flag & T_WOULDBLOCK) {
722 curthread->t_flag &= ~T_WOULDBLOCK;
723 resp->status = NFS3ERR_JUKEBOX;
724 } else
725 resp->status = puterrno3(error);
726 DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
727 cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
728 if (vp != NULL)
729 VN_RELE(vp);
730 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
731 }
732
733 void *
734 rfs3_access_getfh(ACCESS3args *args)
735 {
736
737 return (&args->object);
738 }
739
740 /* ARGSUSED */
741 void
742 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
743 struct svc_req *req, cred_t *cr)
744 {
745 int error;
746 vnode_t *vp;
747 struct vattr *vap;
748 struct vattr va;
749 struct iovec iov;
750 struct uio uio;
751 char *data;
752 struct sockaddr *ca;
753 char *name = NULL;
754 int is_referral = 0;
755
756 vap = NULL;
757
758 vp = nfs3_fhtovp(&args->symlink, exi);
759
760 DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
761 cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
762
763 if (vp == NULL) {
764 error = ESTALE;
765 goto out;
766 }
767
768 va.va_mask = AT_ALL;
769 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
770 if (error)
771 goto out;
772
773 vap = &va;
774
775 /* We lied about the object type for a referral */
776 if (vn_is_nfs_reparse(vp, cr))
777 is_referral = 1;
778
779 if (vp->v_type != VLNK && !is_referral) {
780 resp->status = NFS3ERR_INVAL;
781 goto out1;
782 }
783
784 if (MANDLOCK(vp, va.va_mode)) {
785 resp->status = NFS3ERR_ACCES;
786 goto out1;
787 }
788
789 if (is_system_labeled()) {
790 bslabel_t *clabel = req->rq_label;
791
792 ASSERT(clabel != NULL);
793 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
794 "got client label from request(1)", struct svc_req *, req);
795
796 if (!blequal(&l_admin_low->tsl_label, clabel)) {
797 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
798 exi)) {
799 resp->status = NFS3ERR_ACCES;
800 goto out1;
801 }
802 }
803 }
804
805 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
806
807 if (is_referral) {
808 char *s;
809 size_t strsz;
810
811 /* Get an artificial symlink based on a referral */
812 s = build_symlink(vp, cr, &strsz);
813 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
814 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
815 vnode_t *, vp, char *, s);
816 if (s == NULL)
817 error = EINVAL;
818 else {
819 error = 0;
820 (void) strlcpy(data, s, MAXPATHLEN + 1);
821 kmem_free(s, strsz);
822 }
823
824 } else {
825
826 iov.iov_base = data;
827 iov.iov_len = MAXPATHLEN;
828 uio.uio_iov = &iov;
829 uio.uio_iovcnt = 1;
830 uio.uio_segflg = UIO_SYSSPACE;
831 uio.uio_extflg = UIO_COPY_CACHED;
832 uio.uio_loffset = 0;
833 uio.uio_resid = MAXPATHLEN;
834
835 error = VOP_READLINK(vp, &uio, cr, NULL);
836
837 if (!error)
838 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
839 }
840
841 va.va_mask = AT_ALL;
842 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
843
844 /* Lie about object type again just to be consistent */
845 if (is_referral && vap != NULL)
846 vap->va_type = VLNK;
847
848 #if 0 /* notyet */
849 /*
850 * Don't do this. It causes local disk writes when just
851 * reading the file and the overhead is deemed larger
852 * than the benefit.
853 */
854 /*
855 * Force modified metadata out to stable storage.
856 */
857 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
858 #endif
859
860 if (error) {
861 kmem_free(data, MAXPATHLEN + 1);
862 goto out;
863 }
864
865 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
866 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
867 MAXPATHLEN + 1);
868
869 if (name == NULL) {
870 /*
871 * Even though the conversion failed, we return
872 * something. We just don't translate it.
873 */
874 name = data;
875 }
876
877 resp->status = NFS3_OK;
878 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
879 resp->resok.data = name;
880
881 DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
882 cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
883 VN_RELE(vp);
884
885 if (name != data)
886 kmem_free(data, MAXPATHLEN + 1);
887
888 return;
889
890 out:
891 if (curthread->t_flag & T_WOULDBLOCK) {
892 curthread->t_flag &= ~T_WOULDBLOCK;
893 resp->status = NFS3ERR_JUKEBOX;
894 } else
895 resp->status = puterrno3(error);
896 out1:
897 DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
898 cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
899 if (vp != NULL)
900 VN_RELE(vp);
901 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
902 }
903
904 void *
905 rfs3_readlink_getfh(READLINK3args *args)
906 {
907
908 return (&args->symlink);
909 }
910
911 void
912 rfs3_readlink_free(READLINK3res *resp)
913 {
914
915 if (resp->status == NFS3_OK)
916 kmem_free(resp->resok.data, MAXPATHLEN + 1);
917 }
918
919 /*
920 * Server routine to handle read
921 * May handle RDMA data as well as mblks
922 */
923 /* ARGSUSED */
924 void
925 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
926 struct svc_req *req, cred_t *cr)
927 {
928 int error;
929 vnode_t *vp;
930 struct vattr *vap;
931 struct vattr va;
932 struct iovec iov;
933 struct uio uio;
934 u_offset_t offset;
935 mblk_t *mp = NULL;
936 int alloc_err = 0;
937 int in_crit = 0;
938 int need_rwunlock = 0;
939 caller_context_t ct;
940 int rdma_used = 0;
941 int loaned_buffers;
942 struct uio *uiop;
943
944 vap = NULL;
945
946 vp = nfs3_fhtovp(&args->file, exi);
947
948 DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
949 cred_t *, cr, vnode_t *, vp, READ3args *, args);
950
951 if (vp == NULL) {
952 error = ESTALE;
953 goto out;
954 }
955
956 if (args->wlist) {
957 if (args->count > clist_len(args->wlist)) {
958 error = EINVAL;
959 goto out;
960 }
961 rdma_used = 1;
962 }
963
964 /* use loaned buffers for TCP */
965 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
966
967 if (is_system_labeled()) {
968 bslabel_t *clabel = req->rq_label;
969
970 ASSERT(clabel != NULL);
971 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
972 "got client label from request(1)", struct svc_req *, req);
973
974 if (!blequal(&l_admin_low->tsl_label, clabel)) {
975 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
976 exi)) {
977 resp->status = NFS3ERR_ACCES;
978 goto out1;
979 }
980 }
981 }
982
983 ct.cc_sysid = 0;
984 ct.cc_pid = 0;
985 ct.cc_caller_id = nfs3_srv_caller_id;
986 ct.cc_flags = CC_DONTBLOCK;
987
988 /*
989 * Enter the critical region before calling VOP_RWLOCK
990 * to avoid a deadlock with write requests.
991 */
992 if (nbl_need_check(vp)) {
993 nbl_start_crit(vp, RW_READER);
994 in_crit = 1;
995 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
996 NULL)) {
997 error = EACCES;
998 goto out;
999 }
1000 }
1001
1002 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1003
1004 /* check if a monitor detected a delegation conflict */
1005 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1006 resp->status = NFS3ERR_JUKEBOX;
1007 goto out1;
1008 }
1009
1010 need_rwunlock = 1;
1011
1012 va.va_mask = AT_ALL;
1013 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1014
1015 /*
1016 * If we can't get the attributes, then we can't do the
1017 * right access checking. So, we'll fail the request.
1018 */
1019 if (error)
1020 goto out;
1021
1022 vap = &va;
1023
1024 if (vp->v_type != VREG) {
1025 resp->status = NFS3ERR_INVAL;
1026 goto out1;
1027 }
1028
1029 if (crgetuid(cr) != va.va_uid) {
1030 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1031 if (error) {
1032 if (curthread->t_flag & T_WOULDBLOCK)
1033 goto out;
1034 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1035 if (error)
1036 goto out;
1037 }
1038 }
1039
1040 if (MANDLOCK(vp, va.va_mode)) {
1041 resp->status = NFS3ERR_ACCES;
1042 goto out1;
1043 }
1044
1045 offset = args->offset;
1046 if (offset >= va.va_size) {
1047 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1048 if (in_crit)
1049 nbl_end_crit(vp);
1050 resp->status = NFS3_OK;
1051 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1052 resp->resok.count = 0;
1053 resp->resok.eof = TRUE;
1054 resp->resok.data.data_len = 0;
1055 resp->resok.data.data_val = NULL;
1056 resp->resok.data.mp = NULL;
1057 /* RDMA */
1058 resp->resok.wlist = args->wlist;
1059 resp->resok.wlist_len = resp->resok.count;
1060 if (resp->resok.wlist)
1061 clist_zero_len(resp->resok.wlist);
1062 goto done;
1063 }
1064
1065 if (args->count == 0) {
1066 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1067 if (in_crit)
1068 nbl_end_crit(vp);
1069 resp->status = NFS3_OK;
1070 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1071 resp->resok.count = 0;
1072 resp->resok.eof = FALSE;
1073 resp->resok.data.data_len = 0;
1074 resp->resok.data.data_val = NULL;
1075 resp->resok.data.mp = NULL;
1076 /* RDMA */
1077 resp->resok.wlist = args->wlist;
1078 resp->resok.wlist_len = resp->resok.count;
1079 if (resp->resok.wlist)
1080 clist_zero_len(resp->resok.wlist);
1081 goto done;
1082 }
1083
1084 /*
1085 * do not allocate memory more the max. allowed
1086 * transfer size
1087 */
1088 if (args->count > rfs3_tsize(req))
1089 args->count = rfs3_tsize(req);
1090
1091 if (loaned_buffers) {
1092 uiop = (uio_t *)rfs_setup_xuio(vp);
1093 ASSERT(uiop != NULL);
1094 uiop->uio_segflg = UIO_SYSSPACE;
1095 uiop->uio_loffset = args->offset;
1096 uiop->uio_resid = args->count;
1097
1098 /* Jump to do the read if successful */
1099 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1100 /*
1101 * Need to hold the vnode until after VOP_RETZCBUF()
1102 * is called.
1103 */
1104 VN_HOLD(vp);
1105 goto doio_read;
1106 }
1107
1108 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1109 uiop->uio_loffset, int, uiop->uio_resid);
1110
1111 uiop->uio_extflg = 0;
1112 /* failure to setup for zero copy */
1113 rfs_free_xuio((void *)uiop);
1114 loaned_buffers = 0;
1115 }
1116
1117 /*
1118 * If returning data via RDMA Write, then grab the chunk list.
1119 * If we aren't returning READ data w/RDMA_WRITE, then grab
1120 * a mblk.
1121 */
1122 if (rdma_used) {
1123 (void) rdma_get_wchunk(req, &iov, args->wlist);
1124 } else {
1125 /*
1126 * mp will contain the data to be sent out in the read reply.
1127 * This will be freed after the reply has been sent out (by the
1128 * driver).
1129 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1130 * that the call to xdrmblk_putmblk() never fails.
1131 */
1132 mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
1133 &alloc_err);
1134 ASSERT(mp != NULL);
1135 ASSERT(alloc_err == 0);
1136
1137 iov.iov_base = (caddr_t)mp->b_datap->db_base;
1138 iov.iov_len = args->count;
1139 }
1140
1141 uio.uio_iov = &iov;
1142 uio.uio_iovcnt = 1;
1143 uio.uio_segflg = UIO_SYSSPACE;
1144 uio.uio_extflg = UIO_COPY_CACHED;
1145 uio.uio_loffset = args->offset;
1146 uio.uio_resid = args->count;
1147 uiop = &uio;
1148
1149 doio_read:
1150 error = VOP_READ(vp, uiop, 0, cr, &ct);
1151
1152 if (error) {
1153 if (mp)
1154 freemsg(mp);
1155 /* check if a monitor detected a delegation conflict */
1156 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1157 resp->status = NFS3ERR_JUKEBOX;
1158 goto out1;
1159 }
1160 goto out;
1161 }
1162
1163 /* make mblk using zc buffers */
1164 if (loaned_buffers) {
1165 mp = uio_to_mblk(uiop);
1166 ASSERT(mp != NULL);
1167 }
1168
1169 va.va_mask = AT_ALL;
1170 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1171
1172 if (error)
1173 vap = NULL;
1174 else
1175 vap = &va;
1176
1177 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1178
1179 if (in_crit)
1180 nbl_end_crit(vp);
1181
1182 resp->status = NFS3_OK;
1183 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1184 resp->resok.count = args->count - uiop->uio_resid;
1185 if (!error && offset + resp->resok.count == va.va_size)
1186 resp->resok.eof = TRUE;
1187 else
1188 resp->resok.eof = FALSE;
1189 resp->resok.data.data_len = resp->resok.count;
1190
1191 if (mp)
1192 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1193
1194 resp->resok.data.mp = mp;
1195 resp->resok.size = (uint_t)args->count;
1196
1197 if (rdma_used) {
1198 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1199 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1200 resp->status = NFS3ERR_INVAL;
1201 }
1202 } else {
1203 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1204 (resp->resok).wlist = NULL;
1205 }
1206
1207 done:
1208 DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1209 cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1210
1211 VN_RELE(vp);
1212
1213 return;
1214
1215 out:
1216 if (curthread->t_flag & T_WOULDBLOCK) {
1217 curthread->t_flag &= ~T_WOULDBLOCK;
1218 resp->status = NFS3ERR_JUKEBOX;
1219 } else
1220 resp->status = puterrno3(error);
1221 out1:
1222 DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1223 cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1224
1225 if (vp != NULL) {
1226 if (need_rwunlock)
1227 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1228 if (in_crit)
1229 nbl_end_crit(vp);
1230 VN_RELE(vp);
1231 }
1232 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1233 }
1234
1235 void
1236 rfs3_read_free(READ3res *resp)
1237 {
1238 mblk_t *mp;
1239
1240 if (resp->status == NFS3_OK) {
1241 mp = resp->resok.data.mp;
1242 if (mp != NULL)
1243 freemsg(mp);
1244 }
1245 }
1246
1247 void *
1248 rfs3_read_getfh(READ3args *args)
1249 {
1250
1251 return (&args->file);
1252 }
1253
1254 #define MAX_IOVECS 12
1255
1256 #ifdef DEBUG
1257 static int rfs3_write_hits = 0;
1258 static int rfs3_write_misses = 0;
1259 #endif
1260
1261 void
1262 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1263 struct svc_req *req, cred_t *cr)
1264 {
1265 int error;
1266 vnode_t *vp;
1267 struct vattr *bvap = NULL;
1268 struct vattr bva;
1269 struct vattr *avap = NULL;
1270 struct vattr ava;
1271 u_offset_t rlimit;
1272 struct uio uio;
1273 struct iovec iov[MAX_IOVECS];
1274 mblk_t *m;
1275 struct iovec *iovp;
1276 int iovcnt;
1277 int ioflag;
1278 cred_t *savecred;
1279 int in_crit = 0;
1280 int rwlock_ret = -1;
1281 caller_context_t ct;
1282
1283 vp = nfs3_fhtovp(&args->file, exi);
1284
1285 DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1286 cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1287
1288 if (vp == NULL) {
1289 error = ESTALE;
1290 goto err;
1291 }
1292
1293 if (is_system_labeled()) {
1294 bslabel_t *clabel = req->rq_label;
1295
1296 ASSERT(clabel != NULL);
1297 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1298 "got client label from request(1)", struct svc_req *, req);
1299
1300 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1301 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1302 exi)) {
1303 resp->status = NFS3ERR_ACCES;
1304 goto err1;
1305 }
1306 }
1307 }
1308
1309 ct.cc_sysid = 0;
1310 ct.cc_pid = 0;
1311 ct.cc_caller_id = nfs3_srv_caller_id;
1312 ct.cc_flags = CC_DONTBLOCK;
1313
1314 /*
1315 * We have to enter the critical region before calling VOP_RWLOCK
1316 * to avoid a deadlock with ufs.
1317 */
1318 if (nbl_need_check(vp)) {
1319 nbl_start_crit(vp, RW_READER);
1320 in_crit = 1;
1321 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1322 NULL)) {
1323 error = EACCES;
1324 goto err;
1325 }
1326 }
1327
1328 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1329
1330 /* check if a monitor detected a delegation conflict */
1331 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1332 resp->status = NFS3ERR_JUKEBOX;
1333 rwlock_ret = -1;
1334 goto err1;
1335 }
1336
1337
1338 bva.va_mask = AT_ALL;
1339 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1340
1341 /*
1342 * If we can't get the attributes, then we can't do the
1343 * right access checking. So, we'll fail the request.
1344 */
1345 if (error)
1346 goto err;
1347
1348 bvap = &bva;
1349 avap = bvap;
1350
1351 if (args->count != args->data.data_len) {
1352 resp->status = NFS3ERR_INVAL;
1353 goto err1;
1354 }
1355
1356 if (rdonly(exi, req)) {
1357 resp->status = NFS3ERR_ROFS;
1358 goto err1;
1359 }
1360
1361 if (vp->v_type != VREG) {
1362 resp->status = NFS3ERR_INVAL;
1363 goto err1;
1364 }
1365
1366 if (crgetuid(cr) != bva.va_uid &&
1367 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1368 goto err;
1369
1370 if (MANDLOCK(vp, bva.va_mode)) {
1371 resp->status = NFS3ERR_ACCES;
1372 goto err1;
1373 }
1374
1375 if (args->count == 0) {
1376 resp->status = NFS3_OK;
1377 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1378 resp->resok.count = 0;
1379 resp->resok.committed = args->stable;
1380 resp->resok.verf = write3verf;
1381 goto out;
1382 }
1383
1384 if (args->mblk != NULL) {
1385 iovcnt = 0;
1386 for (m = args->mblk; m != NULL; m = m->b_cont)
1387 iovcnt++;
1388 if (iovcnt <= MAX_IOVECS) {
1389 #ifdef DEBUG
1390 rfs3_write_hits++;
1391 #endif
1392 iovp = iov;
1393 } else {
1394 #ifdef DEBUG
1395 rfs3_write_misses++;
1396 #endif
1397 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1398 }
1399 mblk_to_iov(args->mblk, iovcnt, iovp);
1400
1401 } else if (args->rlist != NULL) {
1402 iovcnt = 1;
1403 iovp = iov;
1404 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1405 iovp->iov_len = args->count;
1406 } else {
1407 iovcnt = 1;
1408 iovp = iov;
1409 iovp->iov_base = args->data.data_val;
1410 iovp->iov_len = args->count;
1411 }
1412
1413 uio.uio_iov = iovp;
1414 uio.uio_iovcnt = iovcnt;
1415
1416 uio.uio_segflg = UIO_SYSSPACE;
1417 uio.uio_extflg = UIO_COPY_DEFAULT;
1418 uio.uio_loffset = args->offset;
1419 uio.uio_resid = args->count;
1420 uio.uio_llimit = curproc->p_fsz_ctl;
1421 rlimit = uio.uio_llimit - args->offset;
1422 if (rlimit < (u_offset_t)uio.uio_resid)
1423 uio.uio_resid = (int)rlimit;
1424
1425 if (args->stable == UNSTABLE)
1426 ioflag = 0;
1427 else if (args->stable == FILE_SYNC)
1428 ioflag = FSYNC;
1429 else if (args->stable == DATA_SYNC)
1430 ioflag = FDSYNC;
1431 else {
1432 if (iovp != iov)
1433 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1434 resp->status = NFS3ERR_INVAL;
1435 goto err1;
1436 }
1437
1438 /*
1439 * We're changing creds because VM may fault and we need
1440 * the cred of the current thread to be used if quota
1441 * checking is enabled.
1442 */
1443 savecred = curthread->t_cred;
1444 curthread->t_cred = cr;
1445 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1446 curthread->t_cred = savecred;
1447
1448 if (iovp != iov)
1449 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1450
1451 /* check if a monitor detected a delegation conflict */
1452 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1453 resp->status = NFS3ERR_JUKEBOX;
1454 goto err1;
1455 }
1456
1457 ava.va_mask = AT_ALL;
1458 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1459
1460 if (error)
1461 goto err;
1462
1463 /*
1464 * If we were unable to get the V_WRITELOCK_TRUE, then we
1465 * may not have accurate after attrs, so check if
1466 * we have both attributes, they have a non-zero va_seq, and
1467 * va_seq has changed by exactly one,
1468 * if not, turn off the before attr.
1469 */
1470 if (rwlock_ret != V_WRITELOCK_TRUE) {
1471 if (bvap == NULL || avap == NULL ||
1472 bvap->va_seq == 0 || avap->va_seq == 0 ||
1473 avap->va_seq != (bvap->va_seq + 1)) {
1474 bvap = NULL;
1475 }
1476 }
1477
1478 resp->status = NFS3_OK;
1479 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1480 resp->resok.count = args->count - uio.uio_resid;
1481 resp->resok.committed = args->stable;
1482 resp->resok.verf = write3verf;
1483 goto out;
1484
1485 err:
1486 if (curthread->t_flag & T_WOULDBLOCK) {
1487 curthread->t_flag &= ~T_WOULDBLOCK;
1488 resp->status = NFS3ERR_JUKEBOX;
1489 } else
1490 resp->status = puterrno3(error);
1491 err1:
1492 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1493 out:
1494 DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1495 cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1496
1497 if (vp != NULL) {
1498 if (rwlock_ret != -1)
1499 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1500 if (in_crit)
1501 nbl_end_crit(vp);
1502 VN_RELE(vp);
1503 }
1504 }
1505
1506 void *
1507 rfs3_write_getfh(WRITE3args *args)
1508 {
1509
1510 return (&args->file);
1511 }
1512
1513 void
1514 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1515 struct svc_req *req, cred_t *cr)
1516 {
1517 int error;
1518 int in_crit = 0;
1519 vnode_t *vp;
1520 vnode_t *tvp = NULL;
1521 vnode_t *dvp;
1522 struct vattr *vap;
1523 struct vattr va;
1524 struct vattr *dbvap;
1525 struct vattr dbva;
1526 struct vattr *davap;
1527 struct vattr dava;
1528 enum vcexcl excl;
1529 nfstime3 *mtime;
1530 len_t reqsize;
1531 bool_t trunc;
1532 struct sockaddr *ca;
1533 char *name = NULL;
1534
1535 dbvap = NULL;
1536 davap = NULL;
1537
1538 dvp = nfs3_fhtovp(&args->where.dir, exi);
1539
1540 DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1541 cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1542
1543 if (dvp == NULL) {
1544 error = ESTALE;
1545 goto out;
1546 }
1547
1548 dbva.va_mask = AT_ALL;
1549 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1550 davap = dbvap;
1551
1552 if (args->where.name == nfs3nametoolong) {
1553 resp->status = NFS3ERR_NAMETOOLONG;
1554 goto out1;
1555 }
1556
1557 if (args->where.name == NULL || *(args->where.name) == '\0') {
1558 resp->status = NFS3ERR_ACCES;
1559 goto out1;
1560 }
1561
1562 if (rdonly(exi, req)) {
1563 resp->status = NFS3ERR_ROFS;
1564 goto out1;
1565 }
1566
1567 if (is_system_labeled()) {
1568 bslabel_t *clabel = req->rq_label;
1569
1570 ASSERT(clabel != NULL);
1571 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1572 "got client label from request(1)", struct svc_req *, req);
1573
1574 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1575 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1576 exi)) {
1577 resp->status = NFS3ERR_ACCES;
1578 goto out1;
1579 }
1580 }
1581 }
1582
1583 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1584 name = nfscmd_convname(ca, exi, args->where.name,
1585 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1586
1587 if (name == NULL) {
1588 /* This is really a Solaris EILSEQ */
1589 resp->status = NFS3ERR_INVAL;
1590 goto out1;
1591 }
1592
1593 if (args->how.mode == EXCLUSIVE) {
1594 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1595 va.va_type = VREG;
1596 va.va_mode = (mode_t)0;
1597 /*
1598 * Ensure no time overflows and that types match
1599 */
1600 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1601 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1602 va.va_mtime.tv_nsec = mtime->nseconds;
1603 excl = EXCL;
1604 } else {
1605 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1606 &va);
1607 if (error)
1608 goto out;
1609 va.va_mask |= AT_TYPE;
1610 va.va_type = VREG;
1611 if (args->how.mode == GUARDED)
1612 excl = EXCL;
1613 else {
1614 excl = NONEXCL;
1615
1616 /*
1617 * During creation of file in non-exclusive mode
1618 * if size of file is being set then make sure
1619 * that if the file already exists that no conflicting
1620 * non-blocking mandatory locks exists in the region
1621 * being modified. If there are conflicting locks fail
1622 * the operation with EACCES.
1623 */
1624 if (va.va_mask & AT_SIZE) {
1625 struct vattr tva;
1626
1627 /*
1628 * Does file already exist?
1629 */
1630 error = VOP_LOOKUP(dvp, name, &tvp,
1631 NULL, 0, NULL, cr, NULL, NULL, NULL);
1632
1633 /*
1634 * Check to see if the file has been delegated
1635 * to a v4 client. If so, then begin recall of
1636 * the delegation and return JUKEBOX to allow
1637 * the client to retrasmit its request.
1638 */
1639
1640 trunc = va.va_size == 0;
1641 if (!error &&
1642 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1643 resp->status = NFS3ERR_JUKEBOX;
1644 goto out1;
1645 }
1646
1647 /*
1648 * Check for NBMAND lock conflicts
1649 */
1650 if (!error && nbl_need_check(tvp)) {
1651 u_offset_t offset;
1652 ssize_t len;
1653
1654 nbl_start_crit(tvp, RW_READER);
1655 in_crit = 1;
1656
1657 tva.va_mask = AT_SIZE;
1658 error = VOP_GETATTR(tvp, &tva, 0, cr,
1659 NULL);
1660 /*
1661 * Can't check for conflicts, so return
1662 * error.
1663 */
1664 if (error)
1665 goto out;
1666
1667 offset = tva.va_size < va.va_size ?
1668 tva.va_size : va.va_size;
1669 len = tva.va_size < va.va_size ?
1670 va.va_size - tva.va_size :
1671 tva.va_size - va.va_size;
1672 if (nbl_conflict(tvp, NBL_WRITE,
1673 offset, len, 0, NULL)) {
1674 error = EACCES;
1675 goto out;
1676 }
1677 } else if (tvp) {
1678 VN_RELE(tvp);
1679 tvp = NULL;
1680 }
1681 }
1682 }
1683 if (va.va_mask & AT_SIZE)
1684 reqsize = va.va_size;
1685 }
1686
1687 /*
1688 * Must specify the mode.
1689 */
1690 if (!(va.va_mask & AT_MODE)) {
1691 resp->status = NFS3ERR_INVAL;
1692 goto out1;
1693 }
1694
1695 /*
1696 * If the filesystem is exported with nosuid, then mask off
1697 * the setuid and setgid bits.
1698 */
1699 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1700 va.va_mode &= ~(VSUID | VSGID);
1701
1702 tryagain:
1703 /*
1704 * The file open mode used is VWRITE. If the client needs
1705 * some other semantic, then it should do the access checking
1706 * itself. It would have been nice to have the file open mode
1707 * passed as part of the arguments.
1708 */
1709 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1710 &vp, cr, 0, NULL, NULL);
1711
1712 dava.va_mask = AT_ALL;
1713 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1714
1715 if (error) {
1716 /*
1717 * If we got something other than file already exists
1718 * then just return this error. Otherwise, we got
1719 * EEXIST. If we were doing a GUARDED create, then
1720 * just return this error. Otherwise, we need to
1721 * make sure that this wasn't a duplicate of an
1722 * exclusive create request.
1723 *
1724 * The assumption is made that a non-exclusive create
1725 * request will never return EEXIST.
1726 */
1727 if (error != EEXIST || args->how.mode == GUARDED)
1728 goto out;
1729 /*
1730 * Lookup the file so that we can get a vnode for it.
1731 */
1732 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1733 NULL, cr, NULL, NULL, NULL);
1734 if (error) {
1735 /*
1736 * We couldn't find the file that we thought that
1737 * we just created. So, we'll just try creating
1738 * it again.
1739 */
1740 if (error == ENOENT)
1741 goto tryagain;
1742 goto out;
1743 }
1744
1745 /*
1746 * If the file is delegated to a v4 client, go ahead
1747 * and initiate recall, this create is a hint that a
1748 * conflicting v3 open has occurred.
1749 */
1750
1751 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1752 VN_RELE(vp);
1753 resp->status = NFS3ERR_JUKEBOX;
1754 goto out1;
1755 }
1756
1757 va.va_mask = AT_ALL;
1758 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1759
1760 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1761 /* % with INT32_MAX to prevent overflows */
1762 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1763 vap->va_mtime.tv_sec !=
1764 (mtime->seconds % INT32_MAX) ||
1765 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1766 VN_RELE(vp);
1767 error = EEXIST;
1768 goto out;
1769 }
1770 } else {
1771
1772 if ((args->how.mode == UNCHECKED ||
1773 args->how.mode == GUARDED) &&
1774 args->how.createhow3_u.obj_attributes.size.set_it &&
1775 va.va_size == 0)
1776 trunc = TRUE;
1777 else
1778 trunc = FALSE;
1779
1780 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1781 VN_RELE(vp);
1782 resp->status = NFS3ERR_JUKEBOX;
1783 goto out1;
1784 }
1785
1786 va.va_mask = AT_ALL;
1787 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1788
1789 /*
1790 * We need to check to make sure that the file got
1791 * created to the indicated size. If not, we do a
1792 * setattr to try to change the size, but we don't
1793 * try too hard. This shouldn't a problem as most
1794 * clients will only specifiy a size of zero which
1795 * local file systems handle. However, even if
1796 * the client does specify a non-zero size, it can
1797 * still recover by checking the size of the file
1798 * after it has created it and then issue a setattr
1799 * request of its own to set the size of the file.
1800 */
1801 if (vap != NULL &&
1802 (args->how.mode == UNCHECKED ||
1803 args->how.mode == GUARDED) &&
1804 args->how.createhow3_u.obj_attributes.size.set_it &&
1805 vap->va_size != reqsize) {
1806 va.va_mask = AT_SIZE;
1807 va.va_size = reqsize;
1808 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1809 va.va_mask = AT_ALL;
1810 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1811 }
1812 }
1813
1814 if (name != args->where.name)
1815 kmem_free(name, MAXPATHLEN + 1);
1816
1817 error = makefh3(&resp->resok.obj.handle, vp, exi);
1818 if (error)
1819 resp->resok.obj.handle_follows = FALSE;
1820 else
1821 resp->resok.obj.handle_follows = TRUE;
1822
1823 /*
1824 * Force modified data and metadata out to stable storage.
1825 */
1826 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1827 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1828
1829 VN_RELE(vp);
1830 if (tvp != NULL) {
1831 if (in_crit)
1832 nbl_end_crit(tvp);
1833 VN_RELE(tvp);
1834 }
1835
1836 resp->status = NFS3_OK;
1837 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1838 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1839
1840 DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1841 cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1842
1843 VN_RELE(dvp);
1844 return;
1845
1846 out:
1847 if (curthread->t_flag & T_WOULDBLOCK) {
1848 curthread->t_flag &= ~T_WOULDBLOCK;
1849 resp->status = NFS3ERR_JUKEBOX;
1850 } else
1851 resp->status = puterrno3(error);
1852 out1:
1853 DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1854 cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1855
1856 if (name != NULL && name != args->where.name)
1857 kmem_free(name, MAXPATHLEN + 1);
1858
1859 if (tvp != NULL) {
1860 if (in_crit)
1861 nbl_end_crit(tvp);
1862 VN_RELE(tvp);
1863 }
1864 if (dvp != NULL)
1865 VN_RELE(dvp);
1866 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1867 }
1868
1869 void *
1870 rfs3_create_getfh(CREATE3args *args)
1871 {
1872
1873 return (&args->where.dir);
1874 }
1875
1876 void
1877 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1878 struct svc_req *req, cred_t *cr)
1879 {
1880 int error;
1881 vnode_t *vp = NULL;
1882 vnode_t *dvp;
1883 struct vattr *vap;
1884 struct vattr va;
1885 struct vattr *dbvap;
1886 struct vattr dbva;
1887 struct vattr *davap;
1888 struct vattr dava;
1889 struct sockaddr *ca;
1890 char *name = NULL;
1891
1892 dbvap = NULL;
1893 davap = NULL;
1894
1895 dvp = nfs3_fhtovp(&args->where.dir, exi);
1896
1897 DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1898 cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1899
1900 if (dvp == NULL) {
1901 error = ESTALE;
1902 goto out;
1903 }
1904
1905 dbva.va_mask = AT_ALL;
1906 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1907 davap = dbvap;
1908
1909 if (args->where.name == nfs3nametoolong) {
1910 resp->status = NFS3ERR_NAMETOOLONG;
1911 goto out1;
1912 }
1913
1914 if (args->where.name == NULL || *(args->where.name) == '\0') {
1915 resp->status = NFS3ERR_ACCES;
1916 goto out1;
1917 }
1918
1919 if (rdonly(exi, req)) {
1920 resp->status = NFS3ERR_ROFS;
1921 goto out1;
1922 }
1923
1924 if (is_system_labeled()) {
1925 bslabel_t *clabel = req->rq_label;
1926
1927 ASSERT(clabel != NULL);
1928 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1929 "got client label from request(1)", struct svc_req *, req);
1930
1931 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1932 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1933 exi)) {
1934 resp->status = NFS3ERR_ACCES;
1935 goto out1;
1936 }
1937 }
1938 }
1939
1940 error = sattr3_to_vattr(&args->attributes, &va);
1941 if (error)
1942 goto out;
1943
1944 if (!(va.va_mask & AT_MODE)) {
1945 resp->status = NFS3ERR_INVAL;
1946 goto out1;
1947 }
1948
1949 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1950 name = nfscmd_convname(ca, exi, args->where.name,
1951 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1952
1953 if (name == NULL) {
1954 resp->status = NFS3ERR_INVAL;
1955 goto out1;
1956 }
1957
1958 va.va_mask |= AT_TYPE;
1959 va.va_type = VDIR;
1960
1961 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1962
1963 if (name != args->where.name)
1964 kmem_free(name, MAXPATHLEN + 1);
1965
1966 dava.va_mask = AT_ALL;
1967 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1968
1969 /*
1970 * Force modified data and metadata out to stable storage.
1971 */
1972 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1973
1974 if (error)
1975 goto out;
1976
1977 error = makefh3(&resp->resok.obj.handle, vp, exi);
1978 if (error)
1979 resp->resok.obj.handle_follows = FALSE;
1980 else
1981 resp->resok.obj.handle_follows = TRUE;
1982
1983 va.va_mask = AT_ALL;
1984 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1985
1986 /*
1987 * Force modified data and metadata out to stable storage.
1988 */
1989 (void) VOP_FSYNC(vp, 0, cr, NULL);
1990
1991 VN_RELE(vp);
1992
1993 resp->status = NFS3_OK;
1994 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1995 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1996
1997 DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1998 cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1999 VN_RELE(dvp);
2000
2001 return;
2002
2003 out:
2004 if (curthread->t_flag & T_WOULDBLOCK) {
2005 curthread->t_flag &= ~T_WOULDBLOCK;
2006 resp->status = NFS3ERR_JUKEBOX;
2007 } else
2008 resp->status = puterrno3(error);
2009 out1:
2010 DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2011 cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2012 if (dvp != NULL)
2013 VN_RELE(dvp);
2014 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2015 }
2016
2017 void *
2018 rfs3_mkdir_getfh(MKDIR3args *args)
2019 {
2020
2021 return (&args->where.dir);
2022 }
2023
2024 void
2025 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2026 struct svc_req *req, cred_t *cr)
2027 {
2028 int error;
2029 vnode_t *vp;
2030 vnode_t *dvp;
2031 struct vattr *vap;
2032 struct vattr va;
2033 struct vattr *dbvap;
2034 struct vattr dbva;
2035 struct vattr *davap;
2036 struct vattr dava;
2037 struct sockaddr *ca;
2038 char *name = NULL;
2039 char *symdata = NULL;
2040
2041 dbvap = NULL;
2042 davap = NULL;
2043
2044 dvp = nfs3_fhtovp(&args->where.dir, exi);
2045
2046 DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2047 cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2048
2049 if (dvp == NULL) {
2050 error = ESTALE;
2051 goto err;
2052 }
2053
2054 dbva.va_mask = AT_ALL;
2055 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2056 davap = dbvap;
2057
2058 if (args->where.name == nfs3nametoolong) {
2059 resp->status = NFS3ERR_NAMETOOLONG;
2060 goto err1;
2061 }
2062
2063 if (args->where.name == NULL || *(args->where.name) == '\0') {
2064 resp->status = NFS3ERR_ACCES;
2065 goto err1;
2066 }
2067
2068 if (rdonly(exi, req)) {
2069 resp->status = NFS3ERR_ROFS;
2070 goto err1;
2071 }
2072
2073 if (is_system_labeled()) {
2074 bslabel_t *clabel = req->rq_label;
2075
2076 ASSERT(clabel != NULL);
2077 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2078 "got client label from request(1)", struct svc_req *, req);
2079
2080 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2081 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2082 exi)) {
2083 resp->status = NFS3ERR_ACCES;
2084 goto err1;
2085 }
2086 }
2087 }
2088
2089 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2090 if (error)
2091 goto err;
2092
2093 if (!(va.va_mask & AT_MODE)) {
2094 resp->status = NFS3ERR_INVAL;
2095 goto err1;
2096 }
2097
2098 if (args->symlink.symlink_data == nfs3nametoolong) {
2099 resp->status = NFS3ERR_NAMETOOLONG;
2100 goto err1;
2101 }
2102
2103 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2104 name = nfscmd_convname(ca, exi, args->where.name,
2105 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2106
2107 if (name == NULL) {
2108 /* This is really a Solaris EILSEQ */
2109 resp->status = NFS3ERR_INVAL;
2110 goto err1;
2111 }
2112
2113 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2114 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2115 if (symdata == NULL) {
2116 /* This is really a Solaris EILSEQ */
2117 resp->status = NFS3ERR_INVAL;
2118 goto err1;
2119 }
2120
2121
2122 va.va_mask |= AT_TYPE;
2123 va.va_type = VLNK;
2124
2125 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2126
2127 dava.va_mask = AT_ALL;
2128 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2129
2130 if (error)
2131 goto err;
2132
2133 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2134 NULL, NULL, NULL);
2135
2136 /*
2137 * Force modified data and metadata out to stable storage.
2138 */
2139 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2140
2141
2142 resp->status = NFS3_OK;
2143 if (error) {
2144 resp->resok.obj.handle_follows = FALSE;
2145 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2146 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2147 goto out;
2148 }
2149
2150 error = makefh3(&resp->resok.obj.handle, vp, exi);
2151 if (error)
2152 resp->resok.obj.handle_follows = FALSE;
2153 else
2154 resp->resok.obj.handle_follows = TRUE;
2155
2156 va.va_mask = AT_ALL;
2157 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2158
2159 /*
2160 * Force modified data and metadata out to stable storage.
2161 */
2162 (void) VOP_FSYNC(vp, 0, cr, NULL);
2163
2164 VN_RELE(vp);
2165
2166 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2167 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2168 goto out;
2169
2170 err:
2171 if (curthread->t_flag & T_WOULDBLOCK) {
2172 curthread->t_flag &= ~T_WOULDBLOCK;
2173 resp->status = NFS3ERR_JUKEBOX;
2174 } else
2175 resp->status = puterrno3(error);
2176 err1:
2177 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2178 out:
2179 if (name != NULL && name != args->where.name)
2180 kmem_free(name, MAXPATHLEN + 1);
2181 if (symdata != NULL && symdata != args->symlink.symlink_data)
2182 kmem_free(symdata, MAXPATHLEN + 1);
2183
2184 DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2185 cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2186
2187 if (dvp != NULL)
2188 VN_RELE(dvp);
2189 }
2190
2191 void *
2192 rfs3_symlink_getfh(SYMLINK3args *args)
2193 {
2194
2195 return (&args->where.dir);
2196 }
2197
2198 void
2199 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2200 struct svc_req *req, cred_t *cr)
2201 {
2202 int error;
2203 vnode_t *vp;
2204 vnode_t *realvp;
2205 vnode_t *dvp;
2206 struct vattr *vap;
2207 struct vattr va;
2208 struct vattr *dbvap;
2209 struct vattr dbva;
2210 struct vattr *davap;
2211 struct vattr dava;
2212 int mode;
2213 enum vcexcl excl;
2214 struct sockaddr *ca;
2215 char *name = NULL;
2216
2217 dbvap = NULL;
2218 davap = NULL;
2219
2220 dvp = nfs3_fhtovp(&args->where.dir, exi);
2221
2222 DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2223 cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2224
2225 if (dvp == NULL) {
2226 error = ESTALE;
2227 goto out;
2228 }
2229
2230 dbva.va_mask = AT_ALL;
2231 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2232 davap = dbvap;
2233
2234 if (args->where.name == nfs3nametoolong) {
2235 resp->status = NFS3ERR_NAMETOOLONG;
2236 goto out1;
2237 }
2238
2239 if (args->where.name == NULL || *(args->where.name) == '\0') {
2240 resp->status = NFS3ERR_ACCES;
2241 goto out1;
2242 }
2243
2244 if (rdonly(exi, req)) {
2245 resp->status = NFS3ERR_ROFS;
2246 goto out1;
2247 }
2248
2249 if (is_system_labeled()) {
2250 bslabel_t *clabel = req->rq_label;
2251
2252 ASSERT(clabel != NULL);
2253 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2254 "got client label from request(1)", struct svc_req *, req);
2255
2256 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2257 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2258 exi)) {
2259 resp->status = NFS3ERR_ACCES;
2260 goto out1;
2261 }
2262 }
2263 }
2264
2265 switch (args->what.type) {
2266 case NF3CHR:
2267 case NF3BLK:
2268 error = sattr3_to_vattr(
2269 &args->what.mknoddata3_u.device.dev_attributes, &va);
2270 if (error)
2271 goto out;
2272 if (secpolicy_sys_devices(cr) != 0) {
2273 resp->status = NFS3ERR_PERM;
2274 goto out1;
2275 }
2276 if (args->what.type == NF3CHR)
2277 va.va_type = VCHR;
2278 else
2279 va.va_type = VBLK;
2280 va.va_rdev = makedevice(
2281 args->what.mknoddata3_u.device.spec.specdata1,
2282 args->what.mknoddata3_u.device.spec.specdata2);
2283 va.va_mask |= AT_TYPE | AT_RDEV;
2284 break;
2285 case NF3SOCK:
2286 error = sattr3_to_vattr(
2287 &args->what.mknoddata3_u.pipe_attributes, &va);
2288 if (error)
2289 goto out;
2290 va.va_type = VSOCK;
2291 va.va_mask |= AT_TYPE;
2292 break;
2293 case NF3FIFO:
2294 error = sattr3_to_vattr(
2295 &args->what.mknoddata3_u.pipe_attributes, &va);
2296 if (error)
2297 goto out;
2298 va.va_type = VFIFO;
2299 va.va_mask |= AT_TYPE;
2300 break;
2301 default:
2302 resp->status = NFS3ERR_BADTYPE;
2303 goto out1;
2304 }
2305
2306 /*
2307 * Must specify the mode.
2308 */
2309 if (!(va.va_mask & AT_MODE)) {
2310 resp->status = NFS3ERR_INVAL;
2311 goto out1;
2312 }
2313
2314 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2315 name = nfscmd_convname(ca, exi, args->where.name,
2316 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2317
2318 if (name == NULL) {
2319 resp->status = NFS3ERR_INVAL;
2320 goto out1;
2321 }
2322
2323 excl = EXCL;
2324
2325 mode = 0;
2326
2327 error = VOP_CREATE(dvp, name, &va, excl, mode,
2328 &vp, cr, 0, NULL, NULL);
2329
2330 if (name != args->where.name)
2331 kmem_free(name, MAXPATHLEN + 1);
2332
2333 dava.va_mask = AT_ALL;
2334 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2335
2336 /*
2337 * Force modified data and metadata out to stable storage.
2338 */
2339 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2340
2341 if (error)
2342 goto out;
2343
2344 resp->status = NFS3_OK;
2345
2346 error = makefh3(&resp->resok.obj.handle, vp, exi);
2347 if (error)
2348 resp->resok.obj.handle_follows = FALSE;
2349 else
2350 resp->resok.obj.handle_follows = TRUE;
2351
2352 va.va_mask = AT_ALL;
2353 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2354
2355 /*
2356 * Force modified metadata out to stable storage.
2357 *
2358 * if a underlying vp exists, pass it to VOP_FSYNC
2359 */
2360 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2361 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2362 else
2363 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2364
2365 VN_RELE(vp);
2366
2367 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2368 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2369 DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2370 cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2371 VN_RELE(dvp);
2372 return;
2373
2374 out:
2375 if (curthread->t_flag & T_WOULDBLOCK) {
2376 curthread->t_flag &= ~T_WOULDBLOCK;
2377 resp->status = NFS3ERR_JUKEBOX;
2378 } else
2379 resp->status = puterrno3(error);
2380 out1:
2381 DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2382 cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2383 if (dvp != NULL)
2384 VN_RELE(dvp);
2385 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2386 }
2387
2388 void *
2389 rfs3_mknod_getfh(MKNOD3args *args)
2390 {
2391
2392 return (&args->where.dir);
2393 }
2394
2395 void
2396 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2397 struct svc_req *req, cred_t *cr)
2398 {
2399 int error = 0;
2400 vnode_t *vp;
2401 struct vattr *bvap;
2402 struct vattr bva;
2403 struct vattr *avap;
2404 struct vattr ava;
2405 vnode_t *targvp = NULL;
2406 struct sockaddr *ca;
2407 char *name = NULL;
2408
2409 bvap = NULL;
2410 avap = NULL;
2411
2412 vp = nfs3_fhtovp(&args->object.dir, exi);
2413
2414 DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2415 cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2416
2417 if (vp == NULL) {
2418 error = ESTALE;
2419 goto err;
2420 }
2421
2422 bva.va_mask = AT_ALL;
2423 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2424 avap = bvap;
2425
2426 if (vp->v_type != VDIR) {
2427 resp->status = NFS3ERR_NOTDIR;
2428 goto err1;
2429 }
2430
2431 if (args->object.name == nfs3nametoolong) {
2432 resp->status = NFS3ERR_NAMETOOLONG;
2433 goto err1;
2434 }
2435
2436 if (args->object.name == NULL || *(args->object.name) == '\0') {
2437 resp->status = NFS3ERR_ACCES;
2438 goto err1;
2439 }
2440
2441 if (rdonly(exi, req)) {
2442 resp->status = NFS3ERR_ROFS;
2443 goto err1;
2444 }
2445
2446 if (is_system_labeled()) {
2447 bslabel_t *clabel = req->rq_label;
2448
2449 ASSERT(clabel != NULL);
2450 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2451 "got client label from request(1)", struct svc_req *, req);
2452
2453 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2454 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2455 exi)) {
2456 resp->status = NFS3ERR_ACCES;
2457 goto err1;
2458 }
2459 }
2460 }
2461
2462 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2463 name = nfscmd_convname(ca, exi, args->object.name,
2464 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2465
2466 if (name == NULL) {
2467 resp->status = NFS3ERR_INVAL;
2468 goto err1;
2469 }
2470
2471 /*
2472 * Check for a conflict with a non-blocking mandatory share
2473 * reservation and V4 delegations
2474 */
2475 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2476 NULL, cr, NULL, NULL, NULL);
2477 if (error != 0)
2478 goto err;
2479
2480 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2481 resp->status = NFS3ERR_JUKEBOX;
2482 goto err1;
2483 }
2484
2485 if (!nbl_need_check(targvp)) {
2486 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2487 } else {
2488 nbl_start_crit(targvp, RW_READER);
2489 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2490 error = EACCES;
2491 } else {
2492 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2493 }
2494 nbl_end_crit(targvp);
2495 }
2496 VN_RELE(targvp);
2497 targvp = NULL;
2498
2499 ava.va_mask = AT_ALL;
2500 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2501
2502 /*
2503 * Force modified data and metadata out to stable storage.
2504 */
2505 (void) VOP_FSYNC(vp, 0, cr, NULL);
2506
2507 if (error)
2508 goto err;
2509
2510 resp->status = NFS3_OK;
2511 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2512 goto out;
2513
2514 err:
2515 if (curthread->t_flag & T_WOULDBLOCK) {
2516 curthread->t_flag &= ~T_WOULDBLOCK;
2517 resp->status = NFS3ERR_JUKEBOX;
2518 } else
2519 resp->status = puterrno3(error);
2520 err1:
2521 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2522 out:
2523 DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2524 cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2525
2526 if (name != NULL && name != args->object.name)
2527 kmem_free(name, MAXPATHLEN + 1);
2528
2529 if (vp != NULL)
2530 VN_RELE(vp);
2531 }
2532
2533 void *
2534 rfs3_remove_getfh(REMOVE3args *args)
2535 {
2536
2537 return (&args->object.dir);
2538 }
2539
2540 void
2541 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2542 struct svc_req *req, cred_t *cr)
2543 {
2544 int error;
2545 vnode_t *vp;
2546 struct vattr *bvap;
2547 struct vattr bva;
2548 struct vattr *avap;
2549 struct vattr ava;
2550 struct sockaddr *ca;
2551 char *name = NULL;
2552
2553 bvap = NULL;
2554 avap = NULL;
2555
2556 vp = nfs3_fhtovp(&args->object.dir, exi);
2557
2558 DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2559 cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2560
2561 if (vp == NULL) {
2562 error = ESTALE;
2563 goto err;
2564 }
2565
2566 bva.va_mask = AT_ALL;
2567 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2568 avap = bvap;
2569
2570 if (vp->v_type != VDIR) {
2571 resp->status = NFS3ERR_NOTDIR;
2572 goto err1;
2573 }
2574
2575 if (args->object.name == nfs3nametoolong) {
2576 resp->status = NFS3ERR_NAMETOOLONG;
2577 goto err1;
2578 }
2579
2580 if (args->object.name == NULL || *(args->object.name) == '\0') {
2581 resp->status = NFS3ERR_ACCES;
2582 goto err1;
2583 }
2584
2585 if (rdonly(exi, req)) {
2586 resp->status = NFS3ERR_ROFS;
2587 goto err1;
2588 }
2589
2590 if (is_system_labeled()) {
2591 bslabel_t *clabel = req->rq_label;
2592
2593 ASSERT(clabel != NULL);
2594 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2595 "got client label from request(1)", struct svc_req *, req);
2596
2597 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2598 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2599 exi)) {
2600 resp->status = NFS3ERR_ACCES;
2601 goto err1;
2602 }
2603 }
2604 }
2605
2606 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2607 name = nfscmd_convname(ca, exi, args->object.name,
2608 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2609
2610 if (name == NULL) {
2611 resp->status = NFS3ERR_INVAL;
2612 goto err1;
2613 }
2614
2615 error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2616
2617 if (name != args->object.name)
2618 kmem_free(name, MAXPATHLEN + 1);
2619
2620 ava.va_mask = AT_ALL;
2621 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2622
2623 /*
2624 * Force modified data and metadata out to stable storage.
2625 */
2626 (void) VOP_FSYNC(vp, 0, cr, NULL);
2627
2628 if (error) {
2629 /*
2630 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2631 * if the directory is not empty. A System V NFS server
2632 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2633 * over the wire.
2634 */
2635 if (error == EEXIST)
2636 error = ENOTEMPTY;
2637 goto err;
2638 }
2639
2640 resp->status = NFS3_OK;
2641 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2642 goto out;
2643
2644 err:
2645 if (curthread->t_flag & T_WOULDBLOCK) {
2646 curthread->t_flag &= ~T_WOULDBLOCK;
2647 resp->status = NFS3ERR_JUKEBOX;
2648 } else
2649 resp->status = puterrno3(error);
2650 err1:
2651 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2652 out:
2653 DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2654 cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2655 if (vp != NULL)
2656 VN_RELE(vp);
2657
2658 }
2659
2660 void *
2661 rfs3_rmdir_getfh(RMDIR3args *args)
2662 {
2663
2664 return (&args->object.dir);
2665 }
2666
2667 void
2668 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2669 struct svc_req *req, cred_t *cr)
2670 {
2671 int error = 0;
2672 vnode_t *fvp;
2673 vnode_t *tvp;
2674 vnode_t *targvp;
2675 struct vattr *fbvap;
2676 struct vattr fbva;
2677 struct vattr *favap;
2678 struct vattr fava;
2679 struct vattr *tbvap;
2680 struct vattr tbva;
2681 struct vattr *tavap;
2682 struct vattr tava;
2683 nfs_fh3 *fh3;
2684 struct exportinfo *to_exi;
2685 vnode_t *srcvp = NULL;
2686 bslabel_t *clabel;
2687 struct sockaddr *ca;
2688 char *name = NULL;
2689 char *toname = NULL;
2690
2691 fbvap = NULL;
2692 favap = NULL;
2693 tbvap = NULL;
2694 tavap = NULL;
2695 tvp = NULL;
2696
2697 fvp = nfs3_fhtovp(&args->from.dir, exi);
2698
2699 DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2700 cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2701
2702 if (fvp == NULL) {
2703 error = ESTALE;
2704 goto err;
2705 }
2706
2707 if (is_system_labeled()) {
2708 clabel = req->rq_label;
2709 ASSERT(clabel != NULL);
2710 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2711 "got client label from request(1)", struct svc_req *, req);
2712
2713 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2714 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2715 exi)) {
2716 resp->status = NFS3ERR_ACCES;
2717 goto err1;
2718 }
2719 }
2720 }
2721
2722 fbva.va_mask = AT_ALL;
2723 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2724 favap = fbvap;
2725
2726 fh3 = &args->to.dir;
2727 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2728 if (to_exi == NULL) {
2729 resp->status = NFS3ERR_ACCES;
2730 goto err1;
2731 }
2732 exi_rele(to_exi);
2733
2734 if (to_exi != exi) {
2735 resp->status = NFS3ERR_XDEV;
2736 goto err1;
2737 }
2738
2739 tvp = nfs3_fhtovp(&args->to.dir, exi);
2740 if (tvp == NULL) {
2741 error = ESTALE;
2742 goto err;
2743 }
2744
2745 tbva.va_mask = AT_ALL;
2746 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2747 tavap = tbvap;
2748
2749 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2750 resp->status = NFS3ERR_NOTDIR;
2751 goto err1;
2752 }
2753
2754 if (args->from.name == nfs3nametoolong ||
2755 args->to.name == nfs3nametoolong) {
2756 resp->status = NFS3ERR_NAMETOOLONG;
2757 goto err1;
2758 }
2759 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2760 args->to.name == NULL || *(args->to.name) == '\0') {
2761 resp->status = NFS3ERR_ACCES;
2762 goto err1;
2763 }
2764
2765 if (rdonly(exi, req)) {
2766 resp->status = NFS3ERR_ROFS;
2767 goto err1;
2768 }
2769
2770 if (is_system_labeled()) {
2771 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2772 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2773 exi)) {
2774 resp->status = NFS3ERR_ACCES;
2775 goto err1;
2776 }
2777 }
2778 }
2779
2780 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2781 name = nfscmd_convname(ca, exi, args->from.name,
2782 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2783
2784 if (name == NULL) {
2785 resp->status = NFS3ERR_INVAL;
2786 goto err1;
2787 }
2788
2789 toname = nfscmd_convname(ca, exi, args->to.name,
2790 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2791
2792 if (toname == NULL) {
2793 resp->status = NFS3ERR_INVAL;
2794 goto err1;
2795 }
2796
2797 /*
2798 * Check for a conflict with a non-blocking mandatory share
2799 * reservation or V4 delegations.
2800 */
2801 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2802 NULL, cr, NULL, NULL, NULL);
2803 if (error != 0)
2804 goto err;
2805
2806 /*
2807 * If we rename a delegated file we should recall the
2808 * delegation, since future opens should fail or would
2809 * refer to a new file.
2810 */
2811 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2812 resp->status = NFS3ERR_JUKEBOX;
2813 goto err1;
2814 }
2815
2816 /*
2817 * Check for renaming over a delegated file. Check rfs4_deleg_policy
2818 * first to avoid VOP_LOOKUP if possible.
2819 */
2820 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2821 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2822 NULL, NULL, NULL) == 0) {
2823
2824 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2825 VN_RELE(targvp);
2826 resp->status = NFS3ERR_JUKEBOX;
2827 goto err1;
2828 }
2829 VN_RELE(targvp);
2830 }
2831
2832 if (!nbl_need_check(srcvp)) {
2833 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2834 } else {
2835 nbl_start_crit(srcvp, RW_READER);
2836 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2837 error = EACCES;
2838 else
2839 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2840 nbl_end_crit(srcvp);
2841 }
2842 if (error == 0)
2843 vn_renamepath(tvp, srcvp, args->to.name,
2844 strlen(args->to.name));
2845 VN_RELE(srcvp);
2846 srcvp = NULL;
2847
2848 fava.va_mask = AT_ALL;
2849 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2850 tava.va_mask = AT_ALL;
2851 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2852
2853 /*
2854 * Force modified data and metadata out to stable storage.
2855 */
2856 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2857 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2858
2859 if (error)
2860 goto err;
2861
2862 resp->status = NFS3_OK;
2863 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2864 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2865 goto out;
2866
2867 err:
2868 if (curthread->t_flag & T_WOULDBLOCK) {
2869 curthread->t_flag &= ~T_WOULDBLOCK;
2870 resp->status = NFS3ERR_JUKEBOX;
2871 } else {
2872 resp->status = puterrno3(error);
2873 }
2874 err1:
2875 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2876 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2877
2878 out:
2879 if (name != NULL && name != args->from.name)
2880 kmem_free(name, MAXPATHLEN + 1);
2881 if (toname != NULL && toname != args->to.name)
2882 kmem_free(toname, MAXPATHLEN + 1);
2883
2884 DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2885 cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2886 if (fvp != NULL)
2887 VN_RELE(fvp);
2888 if (tvp != NULL)
2889 VN_RELE(tvp);
2890 }
2891
2892 void *
2893 rfs3_rename_getfh(RENAME3args *args)
2894 {
2895
2896 return (&args->from.dir);
2897 }
2898
2899 void
2900 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2901 struct svc_req *req, cred_t *cr)
2902 {
2903 int error;
2904 vnode_t *vp;
2905 vnode_t *dvp;
2906 struct vattr *vap;
2907 struct vattr va;
2908 struct vattr *bvap;
2909 struct vattr bva;
2910 struct vattr *avap;
2911 struct vattr ava;
2912 nfs_fh3 *fh3;
2913 struct exportinfo *to_exi;
2914 bslabel_t *clabel;
2915 struct sockaddr *ca;
2916 char *name = NULL;
2917
2918 vap = NULL;
2919 bvap = NULL;
2920 avap = NULL;
2921 dvp = NULL;
2922
2923 vp = nfs3_fhtovp(&args->file, exi);
2924
2925 DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2926 cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2927
2928 if (vp == NULL) {
2929 error = ESTALE;
2930 goto out;
2931 }
2932
2933 va.va_mask = AT_ALL;
2934 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2935
2936 fh3 = &args->link.dir;
2937 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2938 if (to_exi == NULL) {
2939 resp->status = NFS3ERR_ACCES;
2940 goto out1;
2941 }
2942 exi_rele(to_exi);
2943
2944 if (to_exi != exi) {
2945 resp->status = NFS3ERR_XDEV;
2946 goto out1;
2947 }
2948
2949 if (is_system_labeled()) {
2950 clabel = req->rq_label;
2951
2952 ASSERT(clabel != NULL);
2953 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2954 "got client label from request(1)", struct svc_req *, req);
2955
2956 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2957 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2958 exi)) {
2959 resp->status = NFS3ERR_ACCES;
2960 goto out1;
2961 }
2962 }
2963 }
2964
2965 dvp = nfs3_fhtovp(&args->link.dir, exi);
2966 if (dvp == NULL) {
2967 error = ESTALE;
2968 goto out;
2969 }
2970
2971 bva.va_mask = AT_ALL;
2972 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2973
2974 if (dvp->v_type != VDIR) {
2975 resp->status = NFS3ERR_NOTDIR;
2976 goto out1;
2977 }
2978
2979 if (args->link.name == nfs3nametoolong) {
2980 resp->status = NFS3ERR_NAMETOOLONG;
2981 goto out1;
2982 }
2983
2984 if (args->link.name == NULL || *(args->link.name) == '\0') {
2985 resp->status = NFS3ERR_ACCES;
2986 goto out1;
2987 }
2988
2989 if (rdonly(exi, req)) {
2990 resp->status = NFS3ERR_ROFS;
2991 goto out1;
2992 }
2993
2994 if (is_system_labeled()) {
2995 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2996 "got client label from request(1)", struct svc_req *, req);
2997
2998 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2999 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3000 exi)) {
3001 resp->status = NFS3ERR_ACCES;
3002 goto out1;
3003 }
3004 }
3005 }
3006
3007 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3008 name = nfscmd_convname(ca, exi, args->link.name,
3009 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3010
3011 if (name == NULL) {
3012 resp->status = NFS3ERR_SERVERFAULT;
3013 goto out1;
3014 }
3015
3016 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3017
3018 va.va_mask = AT_ALL;
3019 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3020 ava.va_mask = AT_ALL;
3021 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3022
3023 /*
3024 * Force modified data and metadata out to stable storage.
3025 */
3026 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3027 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3028
3029 if (error)
3030 goto out;
3031
3032 VN_RELE(dvp);
3033
3034 resp->status = NFS3_OK;
3035 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3036 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3037
3038 DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3039 cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3040
3041 VN_RELE(vp);
3042
3043 return;
3044
3045 out:
3046 if (curthread->t_flag & T_WOULDBLOCK) {
3047 curthread->t_flag &= ~T_WOULDBLOCK;
3048 resp->status = NFS3ERR_JUKEBOX;
3049 } else
3050 resp->status = puterrno3(error);
3051 out1:
3052 if (name != NULL && name != args->link.name)
3053 kmem_free(name, MAXPATHLEN + 1);
3054
3055 DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3056 cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3057
3058 if (vp != NULL)
3059 VN_RELE(vp);
3060 if (dvp != NULL)
3061 VN_RELE(dvp);
3062 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3063 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3064 }
3065
3066 void *
3067 rfs3_link_getfh(LINK3args *args)
3068 {
3069
3070 return (&args->file);
3071 }
3072
3073 /*
3074 * This macro defines the size of a response which contains attribute
3075 * information and one directory entry (whose length is specified by
3076 * the macro parameter). If the incoming request is larger than this,
3077 * then we are guaranteed to be able to return at one directory entry
3078 * if one exists. Therefore, we do not need to check for
3079 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
3080 * is not, then we need to check to make sure that this error does not
3081 * need to be returned.
3082 *
3083 * NFS3_READDIR_MIN_COUNT is comprised of following :
3084 *
3085 * status - 1 * BYTES_PER_XDR_UNIT
3086 * attr. flag - 1 * BYTES_PER_XDR_UNIT
3087 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3088 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3089 * boolean - 1 * BYTES_PER_XDR_UNIT
3090 * file id - 2 * BYTES_PER_XDR_UNIT
3091 * directory name length - 1 * BYTES_PER_XDR_UNIT
3092 * cookie - 2 * BYTES_PER_XDR_UNIT
3093 * end of list - 1 * BYTES_PER_XDR_UNIT
3094 * end of file - 1 * BYTES_PER_XDR_UNIT
3095 * Name length of directory to the nearest byte
3096 */
3097
3098 #define NFS3_READDIR_MIN_COUNT(length) \
3099 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3100 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3101
3102 /* ARGSUSED */
3103 void
3104 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3105 struct svc_req *req, cred_t *cr)
3106 {
3107 int error;
3108 vnode_t *vp;
3109 struct vattr *vap;
3110 struct vattr va;
3111 struct iovec iov;
3112 struct uio uio;
3113 char *data;
3114 int iseof;
3115 int bufsize;
3116 int namlen;
3117 uint_t count;
3118 struct sockaddr *ca;
3119
3120 vap = NULL;
3121
3122 vp = nfs3_fhtovp(&args->dir, exi);
3123
3124 DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3125 cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3126
3127 if (vp == NULL) {
3128 error = ESTALE;
3129 goto out;
3130 }
3131
3132 if (is_system_labeled()) {
3133 bslabel_t *clabel = req->rq_label;
3134
3135 ASSERT(clabel != NULL);
3136 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3137 "got client label from request(1)", struct svc_req *, req);
3138
3139 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3140 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3141 exi)) {
3142 resp->status = NFS3ERR_ACCES;
3143 goto out1;
3144 }
3145 }
3146 }
3147
3148 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3149
3150 va.va_mask = AT_ALL;
3151 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3152
3153 if (vp->v_type != VDIR) {
3154 resp->status = NFS3ERR_NOTDIR;
3155 goto out1;
3156 }
3157
3158 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3159 if (error)
3160 goto out;
3161
3162 /*
3163 * Now don't allow arbitrary count to alloc;
3164 * allow the maximum not to exceed rfs3_tsize()
3165 */
3166 if (args->count > rfs3_tsize(req))
3167 args->count = rfs3_tsize(req);
3168
3169 /*
3170 * Make sure that there is room to read at least one entry
3171 * if any are available.
3172 */
3173 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3174 count = DIRENT64_RECLEN(MAXNAMELEN);
3175 else
3176 count = args->count;
3177
3178 data = kmem_alloc(count, KM_SLEEP);
3179
3180 iov.iov_base = data;
3181 iov.iov_len = count;
3182 uio.uio_iov = &iov;
3183 uio.uio_iovcnt = 1;
3184 uio.uio_segflg = UIO_SYSSPACE;
3185 uio.uio_extflg = UIO_COPY_CACHED;
3186 uio.uio_loffset = (offset_t)args->cookie;
3187 uio.uio_resid = count;
3188
3189 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3190
3191 va.va_mask = AT_ALL;
3192 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3193
3194 if (error) {
3195 kmem_free(data, count);
3196 goto out;
3197 }
3198
3199 /*
3200 * If the count was not large enough to be able to guarantee
3201 * to be able to return at least one entry, then need to
3202 * check to see if NFS3ERR_TOOSMALL should be returned.
3203 */
3204 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3205 /*
3206 * bufsize is used to keep track of the size of the response.
3207 * It is primed with:
3208 * 1 for the status +
3209 * 1 for the dir_attributes.attributes boolean +
3210 * 2 for the cookie verifier
3211 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3212 * to bytes. If there are directory attributes to be
3213 * returned, then:
3214 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3215 * time BYTES_PER_XDR_UNIT is added to account for them.
3216 */
3217 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3218 if (vap != NULL)
3219 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3220 /*
3221 * An entry is composed of:
3222 * 1 for the true/false list indicator +
3223 * 2 for the fileid +
3224 * 1 for the length of the name +
3225 * 2 for the cookie +
3226 * all times BYTES_PER_XDR_UNIT to convert from
3227 * XDR units to bytes, plus the length of the name
3228 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3229 */
3230 if (count != uio.uio_resid) {
3231 namlen = strlen(((struct dirent64 *)data)->d_name);
3232 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3233 roundup(namlen, BYTES_PER_XDR_UNIT);
3234 }
3235 /*
3236 * We need to check to see if the number of bytes left
3237 * to go into the buffer will actually fit into the
3238 * buffer. This is calculated as the size of this
3239 * entry plus:
3240 * 1 for the true/false list indicator +
3241 * 1 for the eof indicator
3242 * times BYTES_PER_XDR_UNIT to convert from from
3243 * XDR units to bytes.
3244 */
3245 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3246 if (bufsize > args->count) {
3247 kmem_free(data, count);
3248 resp->status = NFS3ERR_TOOSMALL;
3249 goto out1;
3250 }
3251 }
3252
3253 /*
3254 * Have a valid readir buffer for the native character
3255 * set. Need to check if a conversion is necessary and
3256 * potentially rewrite the whole buffer. Note that if the
3257 * conversion expands names enough, the structure may not
3258 * fit. In this case, we need to drop entries until if fits
3259 * and patch the counts in order that the next readdir will
3260 * get the correct entries.
3261 */
3262 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3263 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3264
3265
3266 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3267
3268 #if 0 /* notyet */
3269 /*
3270 * Don't do this. It causes local disk writes when just
3271 * reading the file and the overhead is deemed larger
3272 * than the benefit.
3273 */
3274 /*
3275 * Force modified metadata out to stable storage.
3276 */
3277 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3278 #endif
3279
3280 resp->status = NFS3_OK;
3281 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3282 resp->resok.cookieverf = 0;
3283 resp->resok.reply.entries = (entry3 *)data;
3284 resp->resok.reply.eof = iseof;
3285 resp->resok.size = count - uio.uio_resid;
3286 resp->resok.count = args->count;
3287 resp->resok.freecount = count;
3288
3289 DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3290 cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3291
3292 VN_RELE(vp);
3293
3294 return;
3295
3296 out:
3297 if (curthread->t_flag & T_WOULDBLOCK) {
3298 curthread->t_flag &= ~T_WOULDBLOCK;
3299 resp->status = NFS3ERR_JUKEBOX;
3300 } else
3301 resp->status = puterrno3(error);
3302 out1:
3303 DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3304 cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3305
3306 if (vp != NULL) {
3307 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3308 VN_RELE(vp);
3309 }
3310 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3311 }
3312
3313 void *
3314 rfs3_readdir_getfh(READDIR3args *args)
3315 {
3316
3317 return (&args->dir);
3318 }
3319
3320 void
3321 rfs3_readdir_free(READDIR3res *resp)
3322 {
3323
3324 if (resp->status == NFS3_OK)
3325 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3326 }
3327
3328 #ifdef nextdp
3329 #undef nextdp
3330 #endif
3331 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3332
3333 /*
3334 * This macro computes the size of a response which contains
3335 * one directory entry including the attributes as well as file handle.
3336 * If the incoming request is larger than this, then we are guaranteed to be
3337 * able to return at least one more directory entry if one exists.
3338 *
3339 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3340 *
3341 * boolean - 1 * BYTES_PER_XDR_UNIT
3342 * file id - 2 * BYTES_PER_XDR_UNIT
3343 * directory name length - 1 * BYTES_PER_XDR_UNIT
3344 * cookie - 2 * BYTES_PER_XDR_UNIT
3345 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3346 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3347 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3348 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3349 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3350 * name length of the entry to the nearest bytes
3351 */
3352 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3353 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3354 BYTES_PER_XDR_UNIT + \
3355 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3356
3357 static int rfs3_readdir_unit = MAXBSIZE;
3358
3359 /* ARGSUSED */
3360 void
3361 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3362 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3363 {
3364 int error;
3365 vnode_t *vp;
3366 struct vattr *vap;
3367 struct vattr va;
3368 struct iovec iov;
3369 struct uio uio;
3370 char *data;
3371 int iseof;
3372 struct dirent64 *dp;
3373 vnode_t *nvp;
3374 struct vattr *nvap;
3375 struct vattr nva;
3376 entryplus3_info *infop = NULL;
3377 int size = 0;
3378 int nents = 0;
3379 int bufsize = 0;
3380 int entrysize = 0;
3381 int tofit = 0;
3382 int rd_unit = rfs3_readdir_unit;
3383 int prev_len;
3384 int space_left;
3385 int i;
3386 uint_t *namlen = NULL;
3387 char *ndata = NULL;
3388 struct sockaddr *ca;
3389 size_t ret;
3390
3391 vap = NULL;
3392
3393 vp = nfs3_fhtovp(&args->dir, exi);
3394
3395 DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3396 cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3397
3398 if (vp == NULL) {
3399 error = ESTALE;
3400 goto out;
3401 }
3402
3403 if (is_system_labeled()) {
3404 bslabel_t *clabel = req->rq_label;
3405
3406 ASSERT(clabel != NULL);
3407 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3408 char *, "got client label from request(1)",
3409 struct svc_req *, req);
3410
3411 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3412 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3413 exi)) {
3414 resp->status = NFS3ERR_ACCES;
3415 goto out1;
3416 }
3417 }
3418 }
3419
3420 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3421
3422 va.va_mask = AT_ALL;
3423 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3424
3425 if (vp->v_type != VDIR) {
3426 error = ENOTDIR;
3427 goto out;
3428 }
3429
3430 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3431 if (error)
3432 goto out;
3433
3434 /*
3435 * Don't allow arbitrary counts for allocation
3436 */
3437 if (args->maxcount > rfs3_tsize(req))
3438 args->maxcount = rfs3_tsize(req);
3439
3440 /*
3441 * Make sure that there is room to read at least one entry
3442 * if any are available
3443 */
3444 args->dircount = MIN(args->dircount, args->maxcount);
3445
3446 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3447 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3448
3449 /*
3450 * This allocation relies on a minimum directory entry
3451 * being roughly 24 bytes. Therefore, the namlen array
3452 * will have enough space based on the maximum number of
3453 * entries to read.
3454 */
3455 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3456
3457 space_left = args->dircount;
3458 data = kmem_alloc(args->dircount, KM_SLEEP);
3459 dp = (struct dirent64 *)data;
3460 uio.uio_iov = &iov;
3461 uio.uio_iovcnt = 1;
3462 uio.uio_segflg = UIO_SYSSPACE;
3463 uio.uio_extflg = UIO_COPY_CACHED;
3464 uio.uio_loffset = (offset_t)args->cookie;
3465
3466 /*
3467 * bufsize is used to keep track of the size of the response as we
3468 * get post op attributes and filehandles for each entry. This is
3469 * an optimization as the server may have read more entries than will
3470 * fit in the buffer specified by maxcount. We stop calculating
3471 * post op attributes and filehandles once we have exceeded maxcount.
3472 * This will minimize the effect of truncation.
3473 *
3474 * It is primed with:
3475 * 1 for the status +
3476 * 1 for the dir_attributes.attributes boolean +
3477 * 2 for the cookie verifier
3478 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3479 * to bytes. If there are directory attributes to be
3480 * returned, then:
3481 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3482 * time BYTES_PER_XDR_UNIT is added to account for them.
3483 */
3484 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3485 if (vap != NULL)
3486 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3487
3488 getmoredents:
3489 /*
3490 * Here we make a check so that our read unit is not larger than
3491 * the space left in the buffer.
3492 */
3493 rd_unit = MIN(rd_unit, space_left);
3494 iov.iov_base = (char *)dp;
3495 iov.iov_len = rd_unit;
3496 uio.uio_resid = rd_unit;
3497 prev_len = rd_unit;
3498
3499 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3500
3501 if (error) {
3502 kmem_free(data, args->dircount);
3503 goto out;
3504 }
3505
3506 if (uio.uio_resid == prev_len && !iseof) {
3507 if (nents == 0) {
3508 kmem_free(data, args->dircount);
3509 resp->status = NFS3ERR_TOOSMALL;
3510 goto out1;
3511 }
3512
3513 /*
3514 * We could not get any more entries, so get the attributes
3515 * and filehandle for the entries already obtained.
3516 */
3517 goto good;
3518 }
3519
3520 /*
3521 * We estimate the size of the response by assuming the
3522 * entry exists and attributes and filehandle are also valid
3523 */
3524 for (size = prev_len - uio.uio_resid;
3525 size > 0;
3526 size -= dp->d_reclen, dp = nextdp(dp)) {
3527
3528 if (dp->d_ino == 0) {
3529 nents++;
3530 continue;
3531 }
3532
3533 namlen[nents] = strlen(dp->d_name);
3534 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3535
3536 /*
3537 * We need to check to see if the number of bytes left
3538 * to go into the buffer will actually fit into the
3539 * buffer. This is calculated as the size of this
3540 * entry plus:
3541 * 1 for the true/false list indicator +
3542 * 1 for the eof indicator
3543 * times BYTES_PER_XDR_UNIT to convert from XDR units
3544 * to bytes.
3545 *
3546 * Also check the dircount limit against the first entry read
3547 *
3548 */
3549 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3550 if (bufsize + tofit > args->maxcount) {
3551 /*
3552 * We make a check here to see if this was the
3553 * first entry being measured. If so, then maxcount
3554 * was too small to begin with and so we need to
3555 * return with NFS3ERR_TOOSMALL.
3556 */
3557 if (nents == 0) {
3558 kmem_free(data, args->dircount);
3559 resp->status = NFS3ERR_TOOSMALL;
3560 goto out1;
3561 }
3562 iseof = FALSE;
3563 goto good;
3564 }
3565 bufsize += entrysize;
3566 nents++;
3567 }
3568
3569 /*
3570 * If there is enough room to fit at least 1 more entry including
3571 * post op attributes and filehandle in the buffer AND that we haven't
3572 * exceeded dircount then go back and get some more.
3573 */
3574 if (!iseof &&
3575 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3576 space_left -= (prev_len - uio.uio_resid);
3577 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3578 goto getmoredents;
3579
3580 /* else, fall through */
3581 }
3582 good:
3583 va.va_mask = AT_ALL;
3584 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3585
3586 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3587
3588 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3589 resp->resok.infop = infop;
3590
3591 dp = (struct dirent64 *)data;
3592 for (i = 0; i < nents; i++) {
3593
3594 if (dp->d_ino == 0) {
3595 infop[i].attr.attributes = FALSE;
3596 infop[i].fh.handle_follows = FALSE;
3597 dp = nextdp(dp);
3598 continue;
3599 }
3600
3601 infop[i].namelen = namlen[i];
3602
3603 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3604 NULL, NULL, NULL);
3605 if (error) {
3606 infop[i].attr.attributes = FALSE;
3607 infop[i].fh.handle_follows = FALSE;
3608 dp = nextdp(dp);
3609 continue;
3610 }
3611
3612 nva.va_mask = AT_ALL;
3613 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3614
3615 /* Lie about the object type for a referral */
3616 if (vn_is_nfs_reparse(nvp, cr))
3617 nvap->va_type = VLNK;
3618
3619 if (vn_ismntpt(nvp)) {
3620 infop[i].attr.attributes = FALSE;
3621 infop[i].fh.handle_follows = FALSE;
3622 } else {
3623 vattr_to_post_op_attr(nvap, &infop[i].attr);
3624
3625 error = makefh3(&infop[i].fh.handle, nvp, exi);
3626 if (!error)
3627 infop[i].fh.handle_follows = TRUE;
3628 else
3629 infop[i].fh.handle_follows = FALSE;
3630 }
3631
3632 VN_RELE(nvp);
3633 dp = nextdp(dp);
3634 }
3635
3636 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3637 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3638 if (ndata == NULL)
3639 ndata = data;
3640
3641 if (ret > 0) {
3642 /*
3643 * We had to drop one or more entries in order to fit
3644 * during the character conversion. We need to patch
3645 * up the size and eof info.
3646 */
3647 if (iseof)
3648 iseof = FALSE;
3649
3650 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3651 nents, ret);
3652 }
3653
3654
3655 #if 0 /* notyet */
3656 /*
3657 * Don't do this. It causes local disk writes when just
3658 * reading the file and the overhead is deemed larger
3659 * than the benefit.
3660 */
3661 /*
3662 * Force modified metadata out to stable storage.
3663 */
3664 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3665 #endif
3666
3667 kmem_free(namlen, args->dircount);
3668
3669 resp->status = NFS3_OK;
3670 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3671 resp->resok.cookieverf = 0;
3672 resp->resok.reply.entries = (entryplus3 *)ndata;
3673 resp->resok.reply.eof = iseof;
3674 resp->resok.size = nents;
3675 resp->resok.count = args->dircount - ret;
3676 resp->resok.maxcount = args->maxcount;
3677
3678 DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3679 cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3680 if (ndata != data)
3681 kmem_free(data, args->dircount);
3682
3683
3684 VN_RELE(vp);
3685
3686 return;
3687
3688 out:
3689 if (curthread->t_flag & T_WOULDBLOCK) {
3690 curthread->t_flag &= ~T_WOULDBLOCK;
3691 resp->status = NFS3ERR_JUKEBOX;
3692 } else {
3693 resp->status = puterrno3(error);
3694 }
3695 out1:
3696 DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3697 cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3698
3699 if (vp != NULL) {
3700 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3701 VN_RELE(vp);
3702 }
3703
3704 if (namlen != NULL)
3705 kmem_free(namlen, args->dircount);
3706
3707 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3708 }
3709
3710 void *
3711 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3712 {
3713
3714 return (&args->dir);
3715 }
3716
3717 void
3718 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3719 {
3720
3721 if (resp->status == NFS3_OK) {
3722 kmem_free(resp->resok.reply.entries, resp->resok.count);
3723 kmem_free(resp->resok.infop,
3724 resp->resok.size * sizeof (struct entryplus3_info));
3725 }
3726 }
3727
3728 /* ARGSUSED */
3729 void
3730 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3731 struct svc_req *req, cred_t *cr)
3732 {
3733 int error;
3734 vnode_t *vp;
3735 struct vattr *vap;
3736 struct vattr va;
3737 struct statvfs64 sb;
3738
3739 vap = NULL;
3740
3741 vp = nfs3_fhtovp(&args->fsroot, exi);
3742
3743 DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3744 cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3745
3746 if (vp == NULL) {
3747 error = ESTALE;
3748 goto out;
3749 }
3750
3751 if (is_system_labeled()) {
3752 bslabel_t *clabel = req->rq_label;
3753
3754 ASSERT(clabel != NULL);
3755 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3756 "got client label from request(1)", struct svc_req *, req);
3757
3758 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3759 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3760 exi)) {
3761 resp->status = NFS3ERR_ACCES;
3762 goto out1;
3763 }
3764 }
3765 }
3766
3767 error = VFS_STATVFS(vp->v_vfsp, &sb);
3768
3769 va.va_mask = AT_ALL;
3770 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3771
3772 if (error)
3773 goto out;
3774
3775 resp->status = NFS3_OK;
3776 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3777 if (sb.f_blocks != (fsblkcnt64_t)-1)
3778 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3779 else
3780 resp->resok.tbytes = (size3)sb.f_blocks;
3781 if (sb.f_bfree != (fsblkcnt64_t)-1)
3782 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3783 else
3784 resp->resok.fbytes = (size3)sb.f_bfree;
3785 if (sb.f_bavail != (fsblkcnt64_t)-1)
3786 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3787 else
3788 resp->resok.abytes = (size3)sb.f_bavail;
3789 resp->resok.tfiles = (size3)sb.f_files;
3790 resp->resok.ffiles = (size3)sb.f_ffree;
3791 resp->resok.afiles = (size3)sb.f_favail;
3792 resp->resok.invarsec = 0;
3793
3794 DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3795 cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3796 VN_RELE(vp);
3797
3798 return;
3799
3800 out:
3801 if (curthread->t_flag & T_WOULDBLOCK) {
3802 curthread->t_flag &= ~T_WOULDBLOCK;
3803 resp->status = NFS3ERR_JUKEBOX;
3804 } else
3805 resp->status = puterrno3(error);
3806 out1:
3807 DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3808 cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3809
3810 if (vp != NULL)
3811 VN_RELE(vp);
3812 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3813 }
3814
3815 void *
3816 rfs3_fsstat_getfh(FSSTAT3args *args)
3817 {
3818
3819 return (&args->fsroot);
3820 }
3821
3822 void
3823 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3824 struct svc_req *req, cred_t *cr)
3825 {
3826 vnode_t *vp;
3827 struct vattr *vap;
3828 struct vattr va;
3829 uint32_t xfer_size;
3830 ulong_t l = 0;
3831 int error;
3832
3833 vp = nfs3_fhtovp(&args->fsroot, exi);
3834
3835 DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3836 cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3837
3838 if (vp == NULL) {
3839 if (curthread->t_flag & T_WOULDBLOCK) {
3840 curthread->t_flag &= ~T_WOULDBLOCK;
3841 resp->status = NFS3ERR_JUKEBOX;
3842 } else
3843 resp->status = NFS3ERR_STALE;
3844 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3845 goto out;
3846 }
3847
3848 if (is_system_labeled()) {
3849 bslabel_t *clabel = req->rq_label;
3850
3851 ASSERT(clabel != NULL);
3852 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3853 "got client label from request(1)", struct svc_req *, req);
3854
3855 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3856 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3857 exi)) {
3858 resp->status = NFS3ERR_STALE;
3859 vattr_to_post_op_attr(NULL,
3860 &resp->resfail.obj_attributes);
3861 goto out;
3862 }
3863 }
3864 }
3865
3866 va.va_mask = AT_ALL;
3867 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3868
3869 resp->status = NFS3_OK;
3870 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3871 xfer_size = rfs3_tsize(req);
3872 resp->resok.rtmax = xfer_size;
3873 resp->resok.rtpref = xfer_size;
3874 resp->resok.rtmult = DEV_BSIZE;
3875 resp->resok.wtmax = xfer_size;
3876 resp->resok.wtpref = xfer_size;
3877 resp->resok.wtmult = DEV_BSIZE;
3878 resp->resok.dtpref = MAXBSIZE;
3879
3880 /*
3881 * Large file spec: want maxfilesize based on limit of
3882 * underlying filesystem. We can guess 2^31-1 if need be.
3883 */
3884 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3885 if (error) {
3886 resp->status = puterrno3(error);
3887 goto out;
3888 }
3889
3890 /*
3891 * If the underlying file system does not support _PC_FILESIZEBITS,
3892 * return a reasonable default. Note that error code on VOP_PATHCONF
3893 * will be 0, even if the underlying file system does not support
3894 * _PC_FILESIZEBITS.
3895 */
3896 if (l == (ulong_t)-1) {
3897 resp->resok.maxfilesize = MAXOFF32_T;
3898 } else {
3899 if (l >= (sizeof (uint64_t) * 8))
3900 resp->resok.maxfilesize = INT64_MAX;
3901 else
3902 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3903 }
3904
3905 resp->resok.time_delta.seconds = 0;
3906 resp->resok.time_delta.nseconds = 1000;
3907 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3908 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3909
3910 DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3911 cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3912
3913 VN_RELE(vp);
3914
3915 return;
3916
3917 out:
3918 DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3919 cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3920 if (vp != NULL)
3921 VN_RELE(vp);
3922 }
3923
3924 void *
3925 rfs3_fsinfo_getfh(FSINFO3args *args)
3926 {
3927
3928 return (&args->fsroot);
3929 }
3930
3931 /* ARGSUSED */
3932 void
3933 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3934 struct svc_req *req, cred_t *cr)
3935 {
3936 int error;
3937 vnode_t *vp;
3938 struct vattr *vap;
3939 struct vattr va;
3940 ulong_t val;
3941
3942 vap = NULL;
3943
3944 vp = nfs3_fhtovp(&args->object, exi);
3945
3946 DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3947 cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3948
3949 if (vp == NULL) {
3950 error = ESTALE;
3951 goto out;
3952 }
3953
3954 if (is_system_labeled()) {
3955 bslabel_t *clabel = req->rq_label;
3956
3957 ASSERT(clabel != NULL);
3958 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3959 "got client label from request(1)", struct svc_req *, req);
3960
3961 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3962 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3963 exi)) {
3964 resp->status = NFS3ERR_ACCES;
3965 goto out1;
3966 }
3967 }
3968 }
3969
3970 va.va_mask = AT_ALL;
3971 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3972
3973 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3974 if (error)
3975 goto out;
3976 resp->resok.info.link_max = (uint32)val;
3977
3978 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3979 if (error)
3980 goto out;
3981 resp->resok.info.name_max = (uint32)val;
3982
3983 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3984 if (error)
3985 goto out;
3986 if (val == 1)
3987 resp->resok.info.no_trunc = TRUE;
3988 else
3989 resp->resok.info.no_trunc = FALSE;
3990
3991 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3992 if (error)
3993 goto out;
3994 if (val == 1)
3995 resp->resok.info.chown_restricted = TRUE;
3996 else
3997 resp->resok.info.chown_restricted = FALSE;
3998
3999 resp->status = NFS3_OK;
4000 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4001 resp->resok.info.case_insensitive = FALSE;
4002 resp->resok.info.case_preserving = TRUE;
4003 DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4004 cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4005 VN_RELE(vp);
4006 return;
4007
4008 out:
4009 if (curthread->t_flag & T_WOULDBLOCK) {
4010 curthread->t_flag &= ~T_WOULDBLOCK;
4011 resp->status = NFS3ERR_JUKEBOX;
4012 } else
4013 resp->status = puterrno3(error);
4014 out1:
4015 DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4016 cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4017 if (vp != NULL)
4018 VN_RELE(vp);
4019 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4020 }
4021
4022 void *
4023 rfs3_pathconf_getfh(PATHCONF3args *args)
4024 {
4025
4026 return (&args->object);
4027 }
4028
4029 void
4030 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4031 struct svc_req *req, cred_t *cr)
4032 {
4033 int error;
4034 vnode_t *vp;
4035 struct vattr *bvap;
4036 struct vattr bva;
4037 struct vattr *avap;
4038 struct vattr ava;
4039
4040 bvap = NULL;
4041 avap = NULL;
4042
4043 vp = nfs3_fhtovp(&args->file, exi);
4044
4045 DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4046 cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4047
4048 if (vp == NULL) {
4049 error = ESTALE;
4050 goto out;
4051 }
4052
4053 bva.va_mask = AT_ALL;
4054 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4055
4056 /*
4057 * If we can't get the attributes, then we can't do the
4058 * right access checking. So, we'll fail the request.
4059 */
4060 if (error)
4061 goto out;
4062
4063 bvap = &bva;
4064
4065 if (rdonly(exi, req)) {
4066 resp->status = NFS3ERR_ROFS;
4067 goto out1;
4068 }
4069
4070 if (vp->v_type != VREG) {
4071 resp->status = NFS3ERR_INVAL;
4072 goto out1;
4073 }
4074
4075 if (is_system_labeled()) {
4076 bslabel_t *clabel = req->rq_label;
4077
4078 ASSERT(clabel != NULL);
4079 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4080 "got client label from request(1)", struct svc_req *, req);
4081
4082 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4083 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4084 exi)) {
4085 resp->status = NFS3ERR_ACCES;
4086 goto out1;
4087 }
4088 }
4089 }
4090
4091 if (crgetuid(cr) != bva.va_uid &&
4092 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4093 goto out;
4094
4095 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4096
4097 ava.va_mask = AT_ALL;
4098 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4099
4100 if (error)
4101 goto out;
4102
4103 resp->status = NFS3_OK;
4104 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4105 resp->resok.verf = write3verf;
4106
4107 DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4108 cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4109
4110 VN_RELE(vp);
4111
4112 return;
4113
4114 out:
4115 if (curthread->t_flag & T_WOULDBLOCK) {
4116 curthread->t_flag &= ~T_WOULDBLOCK;
4117 resp->status = NFS3ERR_JUKEBOX;
4118 } else
4119 resp->status = puterrno3(error);
4120 out1:
4121 DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4122 cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4123
4124 if (vp != NULL)
4125 VN_RELE(vp);
4126 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4127 }
4128
4129 void *
4130 rfs3_commit_getfh(COMMIT3args *args)
4131 {
4132
4133 return (&args->file);
4134 }
4135
4136 static int
4137 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4138 {
4139
4140 vap->va_mask = 0;
4141
4142 if (sap->mode.set_it) {
4143 vap->va_mode = (mode_t)sap->mode.mode;
4144 vap->va_mask |= AT_MODE;
4145 }
4146 if (sap->uid.set_it) {
4147 vap->va_uid = (uid_t)sap->uid.uid;
4148 vap->va_mask |= AT_UID;
4149 }
4150 if (sap->gid.set_it) {
4151 vap->va_gid = (gid_t)sap->gid.gid;
4152 vap->va_mask |= AT_GID;
4153 }
4154 if (sap->size.set_it) {
4155 if (sap->size.size > (size3)((u_longlong_t)-1))
4156 return (EINVAL);
4157 vap->va_size = sap->size.size;
4158 vap->va_mask |= AT_SIZE;
4159 }
4160 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4161 #ifndef _LP64
4162 /* check time validity */
4163 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4164 return (EOVERFLOW);
4165 #endif
4166 /*
4167 * nfs protocol defines times as unsigned so don't extend sign,
4168 * unless sysadmin set nfs_allow_preepoch_time.
4169 */
4170 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4171 sap->atime.atime.seconds);
4172 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4173 vap->va_mask |= AT_ATIME;
4174 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4175 gethrestime(&vap->va_atime);
4176 vap->va_mask |= AT_ATIME;
4177 }
4178 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4179 #ifndef _LP64
4180 /* check time validity */
4181 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4182 return (EOVERFLOW);
4183 #endif
4184 /*
4185 * nfs protocol defines times as unsigned so don't extend sign,
4186 * unless sysadmin set nfs_allow_preepoch_time.
4187 */
4188 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4189 sap->mtime.mtime.seconds);
4190 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4191 vap->va_mask |= AT_MTIME;
4192 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4193 gethrestime(&vap->va_mtime);
4194 vap->va_mask |= AT_MTIME;
4195 }
4196
4197 return (0);
4198 }
4199
4200 static ftype3 vt_to_nf3[] = {
4201 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4202 };
4203
4204 static int
4205 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4206 {
4207
4208 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4209 /* Return error if time or size overflow */
4210 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4211 return (EOVERFLOW);
4212 }
4213 fap->type = vt_to_nf3[vap->va_type];
4214 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4215 fap->nlink = (uint32)vap->va_nlink;
4216 if (vap->va_uid == UID_NOBODY)
4217 fap->uid = (uid3)NFS_UID_NOBODY;
4218 else
4219 fap->uid = (uid3)vap->va_uid;
4220 if (vap->va_gid == GID_NOBODY)
4221 fap->gid = (gid3)NFS_GID_NOBODY;
4222 else
4223 fap->gid = (gid3)vap->va_gid;
4224 fap->size = (size3)vap->va_size;
4225 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4226 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4227 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4228 fap->fsid = (uint64)vap->va_fsid;
4229 fap->fileid = (fileid3)vap->va_nodeid;
4230 fap->atime.seconds = vap->va_atime.tv_sec;
4231 fap->atime.nseconds = vap->va_atime.tv_nsec;
4232 fap->mtime.seconds = vap->va_mtime.tv_sec;
4233 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4234 fap->ctime.seconds = vap->va_ctime.tv_sec;
4235 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4236 return (0);
4237 }
4238
4239 static int
4240 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4241 {
4242
4243 /* Return error if time or size overflow */
4244 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4245 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4246 NFS3_SIZE_OK(vap->va_size))) {
4247 return (EOVERFLOW);
4248 }
4249 wccap->size = (size3)vap->va_size;
4250 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4251 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4252 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4253 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4254 return (0);
4255 }
4256
4257 static void
4258 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4259 {
4260
4261 /* don't return attrs if time overflow */
4262 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4263 poap->attributes = TRUE;
4264 } else
4265 poap->attributes = FALSE;
4266 }
4267
4268 void
4269 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4270 {
4271
4272 /* don't return attrs if time overflow */
4273 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4274 poap->attributes = TRUE;
4275 } else
4276 poap->attributes = FALSE;
4277 }
4278
4279 static void
4280 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4281 {
4282
4283 vattr_to_pre_op_attr(bvap, &wccp->before);
4284 vattr_to_post_op_attr(avap, &wccp->after);
4285 }
4286
4287 void
4288 rfs3_srvrinit(void)
4289 {
4290 struct rfs3_verf_overlay {
4291 uint_t id; /* a "unique" identifier */
4292 int ts; /* a unique timestamp */
4293 } *verfp;
4294 timestruc_t now;
4295
4296 /*
4297 * The following algorithm attempts to find a unique verifier
4298 * to be used as the write verifier returned from the server
4299 * to the client. It is important that this verifier change
4300 * whenever the server reboots. Of secondary importance, it
4301 * is important for the verifier to be unique between two
4302 * different servers.
4303 *
4304 * Thus, an attempt is made to use the system hostid and the
4305 * current time in seconds when the nfssrv kernel module is
4306 * loaded. It is assumed that an NFS server will not be able
4307 * to boot and then to reboot in less than a second. If the
4308 * hostid has not been set, then the current high resolution
4309 * time is used. This will ensure different verifiers each
4310 * time the server reboots and minimize the chances that two
4311 * different servers will have the same verifier.
4312 */
4313
4314 #ifndef lint
4315 /*
4316 * We ASSERT that this constant logic expression is
4317 * always true because in the past, it wasn't.
4318 */
4319 ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4320 #endif
4321
4322 gethrestime(&now);
4323 verfp = (struct rfs3_verf_overlay *)&write3verf;
4324 verfp->ts = (int)now.tv_sec;
4325 verfp->id = zone_get_hostid(NULL);
4326
4327 if (verfp->id == 0)
4328 verfp->id = (uint_t)now.tv_nsec;
4329
4330 nfs3_srv_caller_id = fs_new_caller_id();
4331
4332 }
4333
4334 static int
4335 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4336 {
4337 struct clist *wcl;
4338 int wlist_len;
4339 count3 count = rok->count;
4340
4341 wcl = args->wlist;
4342 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4343 return (FALSE);
4344 }
4345
4346 wcl = args->wlist;
4347 rok->wlist_len = wlist_len;
4348 rok->wlist = wcl;
4349 return (TRUE);
4350 }
4351
4352 void
4353 rfs3_srvrfini(void)
4354 {
4355 /* Nothing to do */
4356 }