1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 *
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 */
26
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/buf.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/uio.h>
38 #include <sys/errno.h>
39 #include <sys/sysmacros.h>
40 #include <sys/statvfs.h>
41 #include <sys/kmem.h>
42 #include <sys/dirent.h>
43 #include <sys/cmn_err.h>
44 #include <sys/debug.h>
45 #include <sys/systeminfo.h>
46 #include <sys/flock.h>
47 #include <sys/nbmlock.h>
48 #include <sys/policy.h>
49 #include <sys/sdt.h>
50
51 #include <rpc/types.h>
52 #include <rpc/auth.h>
53 #include <rpc/svc.h>
54 #include <rpc/rpc_rdma.h>
55
56 #include <nfs/nfs.h>
57 #include <nfs/export.h>
58 #include <nfs/nfs_cmd.h>
59
60 #include <sys/strsubr.h>
61
62 #include <sys/tsol/label.h>
63 #include <sys/tsol/tndb.h>
64
65 #include <sys/zone.h>
66
67 #include <inet/ip.h>
68 #include <inet/ip6.h>
69
70 /*
71 * These are the interface routines for the server side of the
72 * Network File System. See the NFS version 3 protocol specification
73 * for a description of this interface.
74 */
75
76 static writeverf3 write3verf;
77
78 static int sattr3_to_vattr(sattr3 *, struct vattr *);
79 static int vattr_to_fattr3(struct vattr *, fattr3 *);
80 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
81 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
82 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
83 static int rdma_setup_read_data3(READ3args *, READ3resok *);
84
85 extern int nfs_loaned_buffers;
86
87 u_longlong_t nfs3_srv_caller_id;
88
89 /* ARGSUSED */
90 void
91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
92 struct svc_req *req, cred_t *cr)
93 {
94 int error;
95 vnode_t *vp;
96 struct vattr va;
97
98 vp = nfs3_fhtovp(&args->object, exi);
99
100 DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
101 cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
102
103 if (vp == NULL) {
104 error = ESTALE;
105 goto out;
106 }
107
108 va.va_mask = AT_ALL;
109 error = rfs4_delegated_getattr(vp, &va, 0, cr);
110
111 if (!error) {
112 /* Lie about the object type for a referral */
113 if (vn_is_nfs_reparse(vp, cr))
114 va.va_type = VLNK;
115
116 /* overflow error if time or size is out of range */
117 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
118 if (error)
119 goto out;
120 resp->status = NFS3_OK;
121
122 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
123 cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
124
125 VN_RELE(vp);
126
127 return;
128 }
129
130 out:
131 if (curthread->t_flag & T_WOULDBLOCK) {
132 curthread->t_flag &= ~T_WOULDBLOCK;
133 resp->status = NFS3ERR_JUKEBOX;
134 } else
135 resp->status = puterrno3(error);
136
137 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
138 cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
139
140 if (vp != NULL)
141 VN_RELE(vp);
142 }
143
144 void *
145 rfs3_getattr_getfh(GETATTR3args *args)
146 {
147
148 return (&args->object);
149 }
150
151 void
152 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
153 struct svc_req *req, cred_t *cr)
154 {
155 int error;
156 vnode_t *vp;
157 struct vattr *bvap;
158 struct vattr bva;
159 struct vattr *avap;
160 struct vattr ava;
161 int flag;
162 int in_crit = 0;
163 struct flock64 bf;
164 caller_context_t ct;
165
166 bvap = NULL;
167 avap = NULL;
168
169 vp = nfs3_fhtovp(&args->object, exi);
170
171 DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
172 cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
173
174 if (vp == NULL) {
175 error = ESTALE;
176 goto out;
177 }
178
179 error = sattr3_to_vattr(&args->new_attributes, &ava);
180 if (error)
181 goto out;
182
183 if (is_system_labeled()) {
184 bslabel_t *clabel = req->rq_label;
185
186 ASSERT(clabel != NULL);
187 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
188 "got client label from request(1)", struct svc_req *, req);
189
190 if (!blequal(&l_admin_low->tsl_label, clabel)) {
191 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
192 exi)) {
193 resp->status = NFS3ERR_ACCES;
194 goto out1;
195 }
196 }
197 }
198
199 /*
200 * We need to specially handle size changes because of
201 * possible conflicting NBMAND locks. Get into critical
202 * region before VOP_GETATTR, so the size attribute is
203 * valid when checking conflicts.
204 *
205 * Also, check to see if the v4 side of the server has
206 * delegated this file. If so, then we return JUKEBOX to
207 * allow the client to retrasmit its request.
208 */
209 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
210 if (nbl_need_check(vp)) {
211 nbl_start_crit(vp, RW_READER);
212 in_crit = 1;
213 }
214 }
215
216 bva.va_mask = AT_ALL;
217 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
218
219 /*
220 * If we can't get the attributes, then we can't do the
221 * right access checking. So, we'll fail the request.
222 */
223 if (error)
224 goto out;
225
226 bvap = &bva;
227
228 if (rdonly(exi, req) || vn_is_readonly(vp)) {
229 resp->status = NFS3ERR_ROFS;
230 goto out1;
231 }
232
233 if (args->guard.check &&
234 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
235 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
236 resp->status = NFS3ERR_NOT_SYNC;
237 goto out1;
238 }
239
240 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
241 flag = ATTR_UTIME;
242 else
243 flag = 0;
244
245 /*
246 * If the filesystem is exported with nosuid, then mask off
247 * the setuid and setgid bits.
248 */
249 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
250 (exi->exi_export.ex_flags & EX_NOSUID))
251 ava.va_mode &= ~(VSUID | VSGID);
252
253 ct.cc_sysid = 0;
254 ct.cc_pid = 0;
255 ct.cc_caller_id = nfs3_srv_caller_id;
256 ct.cc_flags = CC_DONTBLOCK;
257
258 /*
259 * We need to specially handle size changes because it is
260 * possible for the client to create a file with modes
261 * which indicate read-only, but with the file opened for
262 * writing. If the client then tries to set the size of
263 * the file, then the normal access checking done in
264 * VOP_SETATTR would prevent the client from doing so,
265 * although it should be legal for it to do so. To get
266 * around this, we do the access checking for ourselves
267 * and then use VOP_SPACE which doesn't do the access
268 * checking which VOP_SETATTR does. VOP_SPACE can only
269 * operate on VREG files, let VOP_SETATTR handle the other
270 * extremely rare cases.
271 * Also the client should not be allowed to change the
272 * size of the file if there is a conflicting non-blocking
273 * mandatory lock in the region the change.
274 */
275 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
276 if (in_crit) {
277 u_offset_t offset;
278 ssize_t length;
279
280 if (ava.va_size < bva.va_size) {
281 offset = ava.va_size;
282 length = bva.va_size - ava.va_size;
283 } else {
284 offset = bva.va_size;
285 length = ava.va_size - bva.va_size;
286 }
287 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
288 NULL)) {
289 error = EACCES;
290 goto out;
291 }
292 }
293
294 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
295 ava.va_mask &= ~AT_SIZE;
296 bf.l_type = F_WRLCK;
297 bf.l_whence = 0;
298 bf.l_start = (off64_t)ava.va_size;
299 bf.l_len = 0;
300 bf.l_sysid = 0;
301 bf.l_pid = 0;
302 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
303 (offset_t)ava.va_size, cr, &ct);
304 }
305 }
306
307 if (!error && ava.va_mask)
308 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
309
310 /* check if a monitor detected a delegation conflict */
311 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
312 resp->status = NFS3ERR_JUKEBOX;
313 goto out1;
314 }
315
316 ava.va_mask = AT_ALL;
317 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
318
319 /*
320 * Force modified metadata out to stable storage.
321 */
322 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
323
324 if (error)
325 goto out;
326
327 if (in_crit)
328 nbl_end_crit(vp);
329
330 resp->status = NFS3_OK;
331 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
332
333 DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
334 cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
335
336 VN_RELE(vp);
337
338 return;
339
340 out:
341 if (curthread->t_flag & T_WOULDBLOCK) {
342 curthread->t_flag &= ~T_WOULDBLOCK;
343 resp->status = NFS3ERR_JUKEBOX;
344 } else
345 resp->status = puterrno3(error);
346 out1:
347 DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
348 cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
349
350 if (vp != NULL) {
351 if (in_crit)
352 nbl_end_crit(vp);
353 VN_RELE(vp);
354 }
355 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
356 }
357
358 void *
359 rfs3_setattr_getfh(SETATTR3args *args)
360 {
361
362 return (&args->object);
363 }
364
365 /* ARGSUSED */
366 void
367 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
368 struct svc_req *req, cred_t *cr)
369 {
370 int error;
371 vnode_t *vp;
372 vnode_t *dvp;
373 struct vattr *vap;
374 struct vattr va;
375 struct vattr *dvap;
376 struct vattr dva;
377 nfs_fh3 *fhp;
378 struct sec_ol sec = {0, 0};
379 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
380 struct sockaddr *ca;
381 char *name = NULL;
382
383 dvap = NULL;
384
385 /* Take an extra hold here in case of 'exi' switching */
386 if (exi != NULL)
387 exi_hold(exi);
388
389 /*
390 * Allow lookups from the root - the default
391 * location of the public filehandle.
392 */
393 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
394 dvp = rootdir;
395 VN_HOLD(dvp);
396
397 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
398 cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
399 } else {
400 dvp = nfs3_fhtovp(&args->what.dir, exi);
401
402 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
403 cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
404
405 if (dvp == NULL) {
406 error = ESTALE;
407 goto out;
408 }
409 }
410
411 dva.va_mask = AT_ALL;
412 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
413
414 if (args->what.name == nfs3nametoolong) {
415 resp->status = NFS3ERR_NAMETOOLONG;
416 goto out1;
417 }
418
419 if (args->what.name == NULL || *(args->what.name) == '\0') {
420 resp->status = NFS3ERR_ACCES;
421 goto out1;
422 }
423
424 fhp = &args->what.dir;
425 if (strcmp(args->what.name, "..") == 0 &&
426 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
427 resp->status = NFS3ERR_NOENT;
428 goto out1;
429 }
430
431 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
432 name = nfscmd_convname(ca, exi, args->what.name,
433 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
434
435 if (name == NULL) {
436 resp->status = NFS3ERR_ACCES;
437 goto out1;
438 }
439
440 /*
441 * If the public filehandle is used then allow
442 * a multi-component lookup
443 */
444 if (PUBLIC_FH3(&args->what.dir)) {
445 struct exportinfo *new;
446
447 publicfh_flag = TRUE;
448
449 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
450 &new, &sec);
451
452 if (error == 0) {
453 exi_rele(exi);
454 exi = new;
455 }
456
457 /*
458 * Since WebNFS may bypass MOUNT, we need to ensure this
459 * request didn't come from an unlabeled admin_low client.
460 */
461 if (is_system_labeled() && error == 0) {
462 int addr_type;
463 void *ipaddr;
464 tsol_tpc_t *tp;
465
466 if (ca->sa_family == AF_INET) {
467 addr_type = IPV4_VERSION;
468 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
469 } else if (ca->sa_family == AF_INET6) {
470 addr_type = IPV6_VERSION;
471 ipaddr = &((struct sockaddr_in6 *)
472 ca)->sin6_addr;
473 }
474 tp = find_tpc(ipaddr, addr_type, B_FALSE);
475 if (tp == NULL || tp->tpc_tp.tp_doi !=
476 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
477 SUN_CIPSO) {
478 VN_RELE(vp);
479 resp->status = NFS3ERR_ACCES;
480 error = 1;
481 }
482 if (tp != NULL)
483 TPC_RELE(tp);
484 }
485 } else {
486 error = VOP_LOOKUP(dvp, name, &vp,
487 NULL, 0, NULL, cr, NULL, NULL, NULL);
488 }
489
490 if (name != args->what.name)
491 kmem_free(name, MAXPATHLEN + 1);
492
493 if (is_system_labeled() && error == 0) {
494 bslabel_t *clabel = req->rq_label;
495
496 ASSERT(clabel != NULL);
497 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
498 "got client label from request(1)", struct svc_req *, req);
499
500 if (!blequal(&l_admin_low->tsl_label, clabel)) {
501 if (!do_rfs_label_check(clabel, dvp,
502 DOMINANCE_CHECK, exi)) {
503 VN_RELE(vp);
504 resp->status = NFS3ERR_ACCES;
505 error = 1;
506 }
507 }
508 }
509
510 dva.va_mask = AT_ALL;
511 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
512
513 if (error)
514 goto out;
515
516 if (sec.sec_flags & SEC_QUERY) {
517 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
518 } else {
519 error = makefh3(&resp->resok.object, vp, exi);
520 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
521 auth_weak = TRUE;
522 }
523
524 if (error) {
525 VN_RELE(vp);
526 goto out;
527 }
528
529 va.va_mask = AT_ALL;
530 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
531
532 exi_rele(exi);
533 VN_RELE(vp);
534
535 resp->status = NFS3_OK;
536 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
537 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
538
539 /*
540 * If it's public fh, no 0x81, and client's flavor is
541 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
542 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
543 */
544 if (auth_weak)
545 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
546
547 DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
548 cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
549 VN_RELE(dvp);
550
551 return;
552
553 out:
554 if (curthread->t_flag & T_WOULDBLOCK) {
555 curthread->t_flag &= ~T_WOULDBLOCK;
556 resp->status = NFS3ERR_JUKEBOX;
557 } else
558 resp->status = puterrno3(error);
559 out1:
560 if (exi != NULL)
561 exi_rele(exi);
562
563 DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
564 cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
565
566 if (dvp != NULL)
567 VN_RELE(dvp);
568 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
569
570 }
571
572 void *
573 rfs3_lookup_getfh(LOOKUP3args *args)
574 {
575
576 return (&args->what.dir);
577 }
578
579 /* ARGSUSED */
580 void
581 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
582 struct svc_req *req, cred_t *cr)
583 {
584 int error;
585 vnode_t *vp;
586 struct vattr *vap;
587 struct vattr va;
588 int checkwriteperm;
589 boolean_t dominant_label = B_FALSE;
590 boolean_t equal_label = B_FALSE;
591 boolean_t admin_low_client;
592
593 vap = NULL;
594
595 vp = nfs3_fhtovp(&args->object, exi);
596
597 DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
598 cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
599
600 if (vp == NULL) {
601 error = ESTALE;
602 goto out;
603 }
604
605 /*
606 * If the file system is exported read only, it is not appropriate
607 * to check write permissions for regular files and directories.
608 * Special files are interpreted by the client, so the underlying
609 * permissions are sent back to the client for interpretation.
610 */
611 if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
612 checkwriteperm = 0;
613 else
614 checkwriteperm = 1;
615
616 /*
617 * We need the mode so that we can correctly determine access
618 * permissions relative to a mandatory lock file. Access to
619 * mandatory lock files is denied on the server, so it might
620 * as well be reflected to the server during the open.
621 */
622 va.va_mask = AT_MODE;
623 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
624 if (error)
625 goto out;
626
627 vap = &va;
628
629 resp->resok.access = 0;
630
631 if (is_system_labeled()) {
632 bslabel_t *clabel = req->rq_label;
633
634 ASSERT(clabel != NULL);
635 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
636 "got client label from request(1)", struct svc_req *, req);
637
638 if (!blequal(&l_admin_low->tsl_label, clabel)) {
639 if ((equal_label = do_rfs_label_check(clabel, vp,
640 EQUALITY_CHECK, exi)) == B_FALSE) {
641 dominant_label = do_rfs_label_check(clabel,
642 vp, DOMINANCE_CHECK, exi);
643 } else
644 dominant_label = B_TRUE;
645 admin_low_client = B_FALSE;
646 } else
647 admin_low_client = B_TRUE;
648 }
649
650 if (args->access & ACCESS3_READ) {
651 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
652 if (error) {
653 if (curthread->t_flag & T_WOULDBLOCK)
654 goto out;
655 } else if (!MANDLOCK(vp, va.va_mode) &&
656 (!is_system_labeled() || admin_low_client ||
657 dominant_label))
658 resp->resok.access |= ACCESS3_READ;
659 }
660 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
661 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
662 if (error) {
663 if (curthread->t_flag & T_WOULDBLOCK)
664 goto out;
665 } else if (!is_system_labeled() || admin_low_client ||
666 dominant_label)
667 resp->resok.access |= ACCESS3_LOOKUP;
668 }
669 if (checkwriteperm &&
670 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
671 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
672 if (error) {
673 if (curthread->t_flag & T_WOULDBLOCK)
674 goto out;
675 } else if (!MANDLOCK(vp, va.va_mode) &&
676 (!is_system_labeled() || admin_low_client || equal_label)) {
677 resp->resok.access |=
678 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
679 }
680 }
681 if (checkwriteperm &&
682 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
683 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
684 if (error) {
685 if (curthread->t_flag & T_WOULDBLOCK)
686 goto out;
687 } else if (!is_system_labeled() || admin_low_client ||
688 equal_label)
689 resp->resok.access |= ACCESS3_DELETE;
690 }
691 if (args->access & ACCESS3_EXECUTE) {
692 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
693 if (error) {
694 if (curthread->t_flag & T_WOULDBLOCK)
695 goto out;
696 } else if (!MANDLOCK(vp, va.va_mode) &&
697 (!is_system_labeled() || admin_low_client ||
698 dominant_label))
699 resp->resok.access |= ACCESS3_EXECUTE;
700 }
701
702 va.va_mask = AT_ALL;
703 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
704
705 resp->status = NFS3_OK;
706 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
707
708 DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
709 cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
710
711 VN_RELE(vp);
712
713 return;
714
715 out:
716 if (curthread->t_flag & T_WOULDBLOCK) {
717 curthread->t_flag &= ~T_WOULDBLOCK;
718 resp->status = NFS3ERR_JUKEBOX;
719 } else
720 resp->status = puterrno3(error);
721 DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
722 cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
723 if (vp != NULL)
724 VN_RELE(vp);
725 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
726 }
727
728 void *
729 rfs3_access_getfh(ACCESS3args *args)
730 {
731
732 return (&args->object);
733 }
734
735 /* ARGSUSED */
736 void
737 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
738 struct svc_req *req, cred_t *cr)
739 {
740 int error;
741 vnode_t *vp;
742 struct vattr *vap;
743 struct vattr va;
744 struct iovec iov;
745 struct uio uio;
746 char *data;
747 struct sockaddr *ca;
748 char *name = NULL;
749 int is_referral = 0;
750
751 vap = NULL;
752
753 vp = nfs3_fhtovp(&args->symlink, exi);
754
755 DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
756 cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
757
758 if (vp == NULL) {
759 error = ESTALE;
760 goto out;
761 }
762
763 va.va_mask = AT_ALL;
764 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
765 if (error)
766 goto out;
767
768 vap = &va;
769
770 /* We lied about the object type for a referral */
771 if (vn_is_nfs_reparse(vp, cr))
772 is_referral = 1;
773
774 if (vp->v_type != VLNK && !is_referral) {
775 resp->status = NFS3ERR_INVAL;
776 goto out1;
777 }
778
779 if (MANDLOCK(vp, va.va_mode)) {
780 resp->status = NFS3ERR_ACCES;
781 goto out1;
782 }
783
784 if (is_system_labeled()) {
785 bslabel_t *clabel = req->rq_label;
786
787 ASSERT(clabel != NULL);
788 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
789 "got client label from request(1)", struct svc_req *, req);
790
791 if (!blequal(&l_admin_low->tsl_label, clabel)) {
792 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
793 exi)) {
794 resp->status = NFS3ERR_ACCES;
795 goto out1;
796 }
797 }
798 }
799
800 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
801
802 if (is_referral) {
803 char *s;
804 size_t strsz;
805
806 /* Get an artificial symlink based on a referral */
807 s = build_symlink(vp, cr, &strsz);
808 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
809 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
810 vnode_t *, vp, char *, s);
811 if (s == NULL)
812 error = EINVAL;
813 else {
814 error = 0;
815 (void) strlcpy(data, s, MAXPATHLEN + 1);
816 kmem_free(s, strsz);
817 }
818
819 } else {
820
821 iov.iov_base = data;
822 iov.iov_len = MAXPATHLEN;
823 uio.uio_iov = &iov;
824 uio.uio_iovcnt = 1;
825 uio.uio_segflg = UIO_SYSSPACE;
826 uio.uio_extflg = UIO_COPY_CACHED;
827 uio.uio_loffset = 0;
828 uio.uio_resid = MAXPATHLEN;
829
830 error = VOP_READLINK(vp, &uio, cr, NULL);
831
832 if (!error)
833 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
834 }
835
836 va.va_mask = AT_ALL;
837 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
838
839 /* Lie about object type again just to be consistent */
840 if (is_referral && vap != NULL)
841 vap->va_type = VLNK;
842
843 #if 0 /* notyet */
844 /*
845 * Don't do this. It causes local disk writes when just
846 * reading the file and the overhead is deemed larger
847 * than the benefit.
848 */
849 /*
850 * Force modified metadata out to stable storage.
851 */
852 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
853 #endif
854
855 if (error) {
856 kmem_free(data, MAXPATHLEN + 1);
857 goto out;
858 }
859
860 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
861 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
862 MAXPATHLEN + 1);
863
864 if (name == NULL) {
865 /*
866 * Even though the conversion failed, we return
867 * something. We just don't translate it.
868 */
869 name = data;
870 }
871
872 resp->status = NFS3_OK;
873 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
874 resp->resok.data = name;
875
876 DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
877 cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
878 VN_RELE(vp);
879
880 if (name != data)
881 kmem_free(data, MAXPATHLEN + 1);
882
883 return;
884
885 out:
886 if (curthread->t_flag & T_WOULDBLOCK) {
887 curthread->t_flag &= ~T_WOULDBLOCK;
888 resp->status = NFS3ERR_JUKEBOX;
889 } else
890 resp->status = puterrno3(error);
891 out1:
892 DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
893 cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
894 if (vp != NULL)
895 VN_RELE(vp);
896 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
897 }
898
899 void *
900 rfs3_readlink_getfh(READLINK3args *args)
901 {
902
903 return (&args->symlink);
904 }
905
906 void
907 rfs3_readlink_free(READLINK3res *resp)
908 {
909
910 if (resp->status == NFS3_OK)
911 kmem_free(resp->resok.data, MAXPATHLEN + 1);
912 }
913
914 /*
915 * Server routine to handle read
916 * May handle RDMA data as well as mblks
917 */
918 /* ARGSUSED */
919 void
920 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
921 struct svc_req *req, cred_t *cr)
922 {
923 int error;
924 vnode_t *vp;
925 struct vattr *vap;
926 struct vattr va;
927 struct iovec iov;
928 struct uio uio;
929 u_offset_t offset;
930 mblk_t *mp = NULL;
931 int alloc_err = 0;
932 int in_crit = 0;
933 int need_rwunlock = 0;
934 caller_context_t ct;
935 int rdma_used = 0;
936 int loaned_buffers;
937 struct uio *uiop;
938
939 vap = NULL;
940
941 vp = nfs3_fhtovp(&args->file, exi);
942
943 DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
944 cred_t *, cr, vnode_t *, vp, READ3args *, args);
945
946 if (vp == NULL) {
947 error = ESTALE;
948 goto out;
949 }
950
951 if (args->wlist) {
952 if (args->count > clist_len(args->wlist)) {
953 error = EINVAL;
954 goto out;
955 }
956 rdma_used = 1;
957 }
958
959 /* use loaned buffers for TCP */
960 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
961
962 if (is_system_labeled()) {
963 bslabel_t *clabel = req->rq_label;
964
965 ASSERT(clabel != NULL);
966 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
967 "got client label from request(1)", struct svc_req *, req);
968
969 if (!blequal(&l_admin_low->tsl_label, clabel)) {
970 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
971 exi)) {
972 resp->status = NFS3ERR_ACCES;
973 goto out1;
974 }
975 }
976 }
977
978 ct.cc_sysid = 0;
979 ct.cc_pid = 0;
980 ct.cc_caller_id = nfs3_srv_caller_id;
981 ct.cc_flags = CC_DONTBLOCK;
982
983 /*
984 * Enter the critical region before calling VOP_RWLOCK
985 * to avoid a deadlock with write requests.
986 */
987 if (nbl_need_check(vp)) {
988 nbl_start_crit(vp, RW_READER);
989 in_crit = 1;
990 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
991 NULL)) {
992 error = EACCES;
993 goto out;
994 }
995 }
996
997 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
998
999 /* check if a monitor detected a delegation conflict */
1000 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1001 resp->status = NFS3ERR_JUKEBOX;
1002 goto out1;
1003 }
1004
1005 need_rwunlock = 1;
1006
1007 va.va_mask = AT_ALL;
1008 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1009
1010 /*
1011 * If we can't get the attributes, then we can't do the
1012 * right access checking. So, we'll fail the request.
1013 */
1014 if (error)
1015 goto out;
1016
1017 vap = &va;
1018
1019 if (vp->v_type != VREG) {
1020 resp->status = NFS3ERR_INVAL;
1021 goto out1;
1022 }
1023
1024 if (crgetuid(cr) != va.va_uid) {
1025 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1026 if (error) {
1027 if (curthread->t_flag & T_WOULDBLOCK)
1028 goto out;
1029 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1030 if (error)
1031 goto out;
1032 }
1033 }
1034
1035 if (MANDLOCK(vp, va.va_mode)) {
1036 resp->status = NFS3ERR_ACCES;
1037 goto out1;
1038 }
1039
1040 offset = args->offset;
1041 if (offset >= va.va_size) {
1042 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1043 if (in_crit)
1044 nbl_end_crit(vp);
1045 resp->status = NFS3_OK;
1046 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1047 resp->resok.count = 0;
1048 resp->resok.eof = TRUE;
1049 resp->resok.data.data_len = 0;
1050 resp->resok.data.data_val = NULL;
1051 resp->resok.data.mp = NULL;
1052 /* RDMA */
1053 resp->resok.wlist = args->wlist;
1054 resp->resok.wlist_len = resp->resok.count;
1055 if (resp->resok.wlist)
1056 clist_zero_len(resp->resok.wlist);
1057 goto done;
1058 }
1059
1060 if (args->count == 0) {
1061 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1062 if (in_crit)
1063 nbl_end_crit(vp);
1064 resp->status = NFS3_OK;
1065 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1066 resp->resok.count = 0;
1067 resp->resok.eof = FALSE;
1068 resp->resok.data.data_len = 0;
1069 resp->resok.data.data_val = NULL;
1070 resp->resok.data.mp = NULL;
1071 /* RDMA */
1072 resp->resok.wlist = args->wlist;
1073 resp->resok.wlist_len = resp->resok.count;
1074 if (resp->resok.wlist)
1075 clist_zero_len(resp->resok.wlist);
1076 goto done;
1077 }
1078
1079 /*
1080 * do not allocate memory more the max. allowed
1081 * transfer size
1082 */
1083 if (args->count > rfs3_tsize(req))
1084 args->count = rfs3_tsize(req);
1085
1086 if (loaned_buffers) {
1087 uiop = (uio_t *)rfs_setup_xuio(vp);
1088 ASSERT(uiop != NULL);
1089 uiop->uio_segflg = UIO_SYSSPACE;
1090 uiop->uio_loffset = args->offset;
1091 uiop->uio_resid = args->count;
1092
1093 /* Jump to do the read if successful */
1094 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1095 /*
1096 * Need to hold the vnode until after VOP_RETZCBUF()
1097 * is called.
1098 */
1099 VN_HOLD(vp);
1100 goto doio_read;
1101 }
1102
1103 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1104 uiop->uio_loffset, int, uiop->uio_resid);
1105
1106 uiop->uio_extflg = 0;
1107 /* failure to setup for zero copy */
1108 rfs_free_xuio((void *)uiop);
1109 loaned_buffers = 0;
1110 }
1111
1112 /*
1113 * If returning data via RDMA Write, then grab the chunk list.
1114 * If we aren't returning READ data w/RDMA_WRITE, then grab
1115 * a mblk.
1116 */
1117 if (rdma_used) {
1118 (void) rdma_get_wchunk(req, &iov, args->wlist);
1119 } else {
1120 /*
1121 * mp will contain the data to be sent out in the read reply.
1122 * This will be freed after the reply has been sent out (by the
1123 * driver).
1124 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1125 * that the call to xdrmblk_putmblk() never fails.
1126 */
1127 mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
1128 &alloc_err);
1129 ASSERT(mp != NULL);
1130 ASSERT(alloc_err == 0);
1131
1132 iov.iov_base = (caddr_t)mp->b_datap->db_base;
1133 iov.iov_len = args->count;
1134 }
1135
1136 uio.uio_iov = &iov;
1137 uio.uio_iovcnt = 1;
1138 uio.uio_segflg = UIO_SYSSPACE;
1139 uio.uio_extflg = UIO_COPY_CACHED;
1140 uio.uio_loffset = args->offset;
1141 uio.uio_resid = args->count;
1142 uiop = &uio;
1143
1144 doio_read:
1145 error = VOP_READ(vp, uiop, 0, cr, &ct);
1146
1147 if (error) {
1148 if (mp)
1149 freemsg(mp);
1150 /* check if a monitor detected a delegation conflict */
1151 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1152 resp->status = NFS3ERR_JUKEBOX;
1153 goto out1;
1154 }
1155 goto out;
1156 }
1157
1158 /* make mblk using zc buffers */
1159 if (loaned_buffers) {
1160 mp = uio_to_mblk(uiop);
1161 ASSERT(mp != NULL);
1162 }
1163
1164 va.va_mask = AT_ALL;
1165 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1166
1167 if (error)
1168 vap = NULL;
1169 else
1170 vap = &va;
1171
1172 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1173
1174 if (in_crit)
1175 nbl_end_crit(vp);
1176
1177 resp->status = NFS3_OK;
1178 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1179 resp->resok.count = args->count - uiop->uio_resid;
1180 if (!error && offset + resp->resok.count == va.va_size)
1181 resp->resok.eof = TRUE;
1182 else
1183 resp->resok.eof = FALSE;
1184 resp->resok.data.data_len = resp->resok.count;
1185
1186 if (mp)
1187 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1188
1189 resp->resok.data.mp = mp;
1190 resp->resok.size = (uint_t)args->count;
1191
1192 if (rdma_used) {
1193 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1194 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1195 resp->status = NFS3ERR_INVAL;
1196 }
1197 } else {
1198 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1199 (resp->resok).wlist = NULL;
1200 }
1201
1202 done:
1203 DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1204 cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1205
1206 VN_RELE(vp);
1207
1208 return;
1209
1210 out:
1211 if (curthread->t_flag & T_WOULDBLOCK) {
1212 curthread->t_flag &= ~T_WOULDBLOCK;
1213 resp->status = NFS3ERR_JUKEBOX;
1214 } else
1215 resp->status = puterrno3(error);
1216 out1:
1217 DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1218 cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1219
1220 if (vp != NULL) {
1221 if (need_rwunlock)
1222 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1223 if (in_crit)
1224 nbl_end_crit(vp);
1225 VN_RELE(vp);
1226 }
1227 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1228 }
1229
1230 void
1231 rfs3_read_free(READ3res *resp)
1232 {
1233 mblk_t *mp;
1234
1235 if (resp->status == NFS3_OK) {
1236 mp = resp->resok.data.mp;
1237 if (mp != NULL)
1238 freemsg(mp);
1239 }
1240 }
1241
1242 void *
1243 rfs3_read_getfh(READ3args *args)
1244 {
1245
1246 return (&args->file);
1247 }
1248
1249 #define MAX_IOVECS 12
1250
1251 #ifdef DEBUG
1252 static int rfs3_write_hits = 0;
1253 static int rfs3_write_misses = 0;
1254 #endif
1255
1256 void
1257 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1258 struct svc_req *req, cred_t *cr)
1259 {
1260 int error;
1261 vnode_t *vp;
1262 struct vattr *bvap = NULL;
1263 struct vattr bva;
1264 struct vattr *avap = NULL;
1265 struct vattr ava;
1266 u_offset_t rlimit;
1267 struct uio uio;
1268 struct iovec iov[MAX_IOVECS];
1269 mblk_t *m;
1270 struct iovec *iovp;
1271 int iovcnt;
1272 int ioflag;
1273 cred_t *savecred;
1274 int in_crit = 0;
1275 int rwlock_ret = -1;
1276 caller_context_t ct;
1277
1278 vp = nfs3_fhtovp(&args->file, exi);
1279
1280 DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1281 cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1282
1283 if (vp == NULL) {
1284 error = ESTALE;
1285 goto err;
1286 }
1287
1288 if (is_system_labeled()) {
1289 bslabel_t *clabel = req->rq_label;
1290
1291 ASSERT(clabel != NULL);
1292 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1293 "got client label from request(1)", struct svc_req *, req);
1294
1295 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1296 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1297 exi)) {
1298 resp->status = NFS3ERR_ACCES;
1299 goto err1;
1300 }
1301 }
1302 }
1303
1304 ct.cc_sysid = 0;
1305 ct.cc_pid = 0;
1306 ct.cc_caller_id = nfs3_srv_caller_id;
1307 ct.cc_flags = CC_DONTBLOCK;
1308
1309 /*
1310 * We have to enter the critical region before calling VOP_RWLOCK
1311 * to avoid a deadlock with ufs.
1312 */
1313 if (nbl_need_check(vp)) {
1314 nbl_start_crit(vp, RW_READER);
1315 in_crit = 1;
1316 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1317 NULL)) {
1318 error = EACCES;
1319 goto err;
1320 }
1321 }
1322
1323 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1324
1325 /* check if a monitor detected a delegation conflict */
1326 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1327 resp->status = NFS3ERR_JUKEBOX;
1328 rwlock_ret = -1;
1329 goto err1;
1330 }
1331
1332
1333 bva.va_mask = AT_ALL;
1334 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1335
1336 /*
1337 * If we can't get the attributes, then we can't do the
1338 * right access checking. So, we'll fail the request.
1339 */
1340 if (error)
1341 goto err;
1342
1343 bvap = &bva;
1344 avap = bvap;
1345
1346 if (args->count != args->data.data_len) {
1347 resp->status = NFS3ERR_INVAL;
1348 goto err1;
1349 }
1350
1351 if (rdonly(exi, req)) {
1352 resp->status = NFS3ERR_ROFS;
1353 goto err1;
1354 }
1355
1356 if (vp->v_type != VREG) {
1357 resp->status = NFS3ERR_INVAL;
1358 goto err1;
1359 }
1360
1361 if (crgetuid(cr) != bva.va_uid &&
1362 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1363 goto err;
1364
1365 if (MANDLOCK(vp, bva.va_mode)) {
1366 resp->status = NFS3ERR_ACCES;
1367 goto err1;
1368 }
1369
1370 if (args->count == 0) {
1371 resp->status = NFS3_OK;
1372 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1373 resp->resok.count = 0;
1374 resp->resok.committed = args->stable;
1375 resp->resok.verf = write3verf;
1376 goto out;
1377 }
1378
1379 if (args->mblk != NULL) {
1380 iovcnt = 0;
1381 for (m = args->mblk; m != NULL; m = m->b_cont)
1382 iovcnt++;
1383 if (iovcnt <= MAX_IOVECS) {
1384 #ifdef DEBUG
1385 rfs3_write_hits++;
1386 #endif
1387 iovp = iov;
1388 } else {
1389 #ifdef DEBUG
1390 rfs3_write_misses++;
1391 #endif
1392 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1393 }
1394 mblk_to_iov(args->mblk, iovcnt, iovp);
1395
1396 } else if (args->rlist != NULL) {
1397 iovcnt = 1;
1398 iovp = iov;
1399 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1400 iovp->iov_len = args->count;
1401 } else {
1402 iovcnt = 1;
1403 iovp = iov;
1404 iovp->iov_base = args->data.data_val;
1405 iovp->iov_len = args->count;
1406 }
1407
1408 uio.uio_iov = iovp;
1409 uio.uio_iovcnt = iovcnt;
1410
1411 uio.uio_segflg = UIO_SYSSPACE;
1412 uio.uio_extflg = UIO_COPY_DEFAULT;
1413 uio.uio_loffset = args->offset;
1414 uio.uio_resid = args->count;
1415 uio.uio_llimit = curproc->p_fsz_ctl;
1416 rlimit = uio.uio_llimit - args->offset;
1417 if (rlimit < (u_offset_t)uio.uio_resid)
1418 uio.uio_resid = (int)rlimit;
1419
1420 if (args->stable == UNSTABLE)
1421 ioflag = 0;
1422 else if (args->stable == FILE_SYNC)
1423 ioflag = FSYNC;
1424 else if (args->stable == DATA_SYNC)
1425 ioflag = FDSYNC;
1426 else {
1427 if (iovp != iov)
1428 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1429 resp->status = NFS3ERR_INVAL;
1430 goto err1;
1431 }
1432
1433 /*
1434 * We're changing creds because VM may fault and we need
1435 * the cred of the current thread to be used if quota
1436 * checking is enabled.
1437 */
1438 savecred = curthread->t_cred;
1439 curthread->t_cred = cr;
1440 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1441 curthread->t_cred = savecred;
1442
1443 if (iovp != iov)
1444 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1445
1446 /* check if a monitor detected a delegation conflict */
1447 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1448 resp->status = NFS3ERR_JUKEBOX;
1449 goto err1;
1450 }
1451
1452 ava.va_mask = AT_ALL;
1453 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1454
1455 if (error)
1456 goto err;
1457
1458 /*
1459 * If we were unable to get the V_WRITELOCK_TRUE, then we
1460 * may not have accurate after attrs, so check if
1461 * we have both attributes, they have a non-zero va_seq, and
1462 * va_seq has changed by exactly one,
1463 * if not, turn off the before attr.
1464 */
1465 if (rwlock_ret != V_WRITELOCK_TRUE) {
1466 if (bvap == NULL || avap == NULL ||
1467 bvap->va_seq == 0 || avap->va_seq == 0 ||
1468 avap->va_seq != (bvap->va_seq + 1)) {
1469 bvap = NULL;
1470 }
1471 }
1472
1473 resp->status = NFS3_OK;
1474 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1475 resp->resok.count = args->count - uio.uio_resid;
1476 resp->resok.committed = args->stable;
1477 resp->resok.verf = write3verf;
1478 goto out;
1479
1480 err:
1481 if (curthread->t_flag & T_WOULDBLOCK) {
1482 curthread->t_flag &= ~T_WOULDBLOCK;
1483 resp->status = NFS3ERR_JUKEBOX;
1484 } else
1485 resp->status = puterrno3(error);
1486 err1:
1487 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1488 out:
1489 DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1490 cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1491
1492 if (vp != NULL) {
1493 if (rwlock_ret != -1)
1494 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1495 if (in_crit)
1496 nbl_end_crit(vp);
1497 VN_RELE(vp);
1498 }
1499 }
1500
1501 void *
1502 rfs3_write_getfh(WRITE3args *args)
1503 {
1504
1505 return (&args->file);
1506 }
1507
1508 void
1509 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1510 struct svc_req *req, cred_t *cr)
1511 {
1512 int error;
1513 int in_crit = 0;
1514 vnode_t *vp;
1515 vnode_t *tvp = NULL;
1516 vnode_t *dvp;
1517 struct vattr *vap;
1518 struct vattr va;
1519 struct vattr *dbvap;
1520 struct vattr dbva;
1521 struct vattr *davap;
1522 struct vattr dava;
1523 enum vcexcl excl;
1524 nfstime3 *mtime;
1525 len_t reqsize;
1526 bool_t trunc;
1527 struct sockaddr *ca;
1528 char *name = NULL;
1529
1530 dbvap = NULL;
1531 davap = NULL;
1532
1533 dvp = nfs3_fhtovp(&args->where.dir, exi);
1534
1535 DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1536 cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1537
1538 if (dvp == NULL) {
1539 error = ESTALE;
1540 goto out;
1541 }
1542
1543 dbva.va_mask = AT_ALL;
1544 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1545 davap = dbvap;
1546
1547 if (args->where.name == nfs3nametoolong) {
1548 resp->status = NFS3ERR_NAMETOOLONG;
1549 goto out1;
1550 }
1551
1552 if (args->where.name == NULL || *(args->where.name) == '\0') {
1553 resp->status = NFS3ERR_ACCES;
1554 goto out1;
1555 }
1556
1557 if (rdonly(exi, req)) {
1558 resp->status = NFS3ERR_ROFS;
1559 goto out1;
1560 }
1561
1562 if (is_system_labeled()) {
1563 bslabel_t *clabel = req->rq_label;
1564
1565 ASSERT(clabel != NULL);
1566 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1567 "got client label from request(1)", struct svc_req *, req);
1568
1569 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1570 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1571 exi)) {
1572 resp->status = NFS3ERR_ACCES;
1573 goto out1;
1574 }
1575 }
1576 }
1577
1578 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1579 name = nfscmd_convname(ca, exi, args->where.name,
1580 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1581
1582 if (name == NULL) {
1583 /* This is really a Solaris EILSEQ */
1584 resp->status = NFS3ERR_INVAL;
1585 goto out1;
1586 }
1587
1588 if (args->how.mode == EXCLUSIVE) {
1589 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1590 va.va_type = VREG;
1591 va.va_mode = (mode_t)0;
1592 /*
1593 * Ensure no time overflows and that types match
1594 */
1595 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1596 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1597 va.va_mtime.tv_nsec = mtime->nseconds;
1598 excl = EXCL;
1599 } else {
1600 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1601 &va);
1602 if (error)
1603 goto out;
1604 va.va_mask |= AT_TYPE;
1605 va.va_type = VREG;
1606 if (args->how.mode == GUARDED)
1607 excl = EXCL;
1608 else {
1609 excl = NONEXCL;
1610
1611 /*
1612 * During creation of file in non-exclusive mode
1613 * if size of file is being set then make sure
1614 * that if the file already exists that no conflicting
1615 * non-blocking mandatory locks exists in the region
1616 * being modified. If there are conflicting locks fail
1617 * the operation with EACCES.
1618 */
1619 if (va.va_mask & AT_SIZE) {
1620 struct vattr tva;
1621
1622 /*
1623 * Does file already exist?
1624 */
1625 error = VOP_LOOKUP(dvp, name, &tvp,
1626 NULL, 0, NULL, cr, NULL, NULL, NULL);
1627
1628 /*
1629 * Check to see if the file has been delegated
1630 * to a v4 client. If so, then begin recall of
1631 * the delegation and return JUKEBOX to allow
1632 * the client to retrasmit its request.
1633 */
1634
1635 trunc = va.va_size == 0;
1636 if (!error &&
1637 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1638 resp->status = NFS3ERR_JUKEBOX;
1639 goto out1;
1640 }
1641
1642 /*
1643 * Check for NBMAND lock conflicts
1644 */
1645 if (!error && nbl_need_check(tvp)) {
1646 u_offset_t offset;
1647 ssize_t len;
1648
1649 nbl_start_crit(tvp, RW_READER);
1650 in_crit = 1;
1651
1652 tva.va_mask = AT_SIZE;
1653 error = VOP_GETATTR(tvp, &tva, 0, cr,
1654 NULL);
1655 /*
1656 * Can't check for conflicts, so return
1657 * error.
1658 */
1659 if (error)
1660 goto out;
1661
1662 offset = tva.va_size < va.va_size ?
1663 tva.va_size : va.va_size;
1664 len = tva.va_size < va.va_size ?
1665 va.va_size - tva.va_size :
1666 tva.va_size - va.va_size;
1667 if (nbl_conflict(tvp, NBL_WRITE,
1668 offset, len, 0, NULL)) {
1669 error = EACCES;
1670 goto out;
1671 }
1672 } else if (tvp) {
1673 VN_RELE(tvp);
1674 tvp = NULL;
1675 }
1676 }
1677 }
1678 if (va.va_mask & AT_SIZE)
1679 reqsize = va.va_size;
1680 }
1681
1682 /*
1683 * Must specify the mode.
1684 */
1685 if (!(va.va_mask & AT_MODE)) {
1686 resp->status = NFS3ERR_INVAL;
1687 goto out1;
1688 }
1689
1690 /*
1691 * If the filesystem is exported with nosuid, then mask off
1692 * the setuid and setgid bits.
1693 */
1694 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1695 va.va_mode &= ~(VSUID | VSGID);
1696
1697 tryagain:
1698 /*
1699 * The file open mode used is VWRITE. If the client needs
1700 * some other semantic, then it should do the access checking
1701 * itself. It would have been nice to have the file open mode
1702 * passed as part of the arguments.
1703 */
1704 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1705 &vp, cr, 0, NULL, NULL);
1706
1707 dava.va_mask = AT_ALL;
1708 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1709
1710 if (error) {
1711 /*
1712 * If we got something other than file already exists
1713 * then just return this error. Otherwise, we got
1714 * EEXIST. If we were doing a GUARDED create, then
1715 * just return this error. Otherwise, we need to
1716 * make sure that this wasn't a duplicate of an
1717 * exclusive create request.
1718 *
1719 * The assumption is made that a non-exclusive create
1720 * request will never return EEXIST.
1721 */
1722 if (error != EEXIST || args->how.mode == GUARDED)
1723 goto out;
1724 /*
1725 * Lookup the file so that we can get a vnode for it.
1726 */
1727 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1728 NULL, cr, NULL, NULL, NULL);
1729 if (error) {
1730 /*
1731 * We couldn't find the file that we thought that
1732 * we just created. So, we'll just try creating
1733 * it again.
1734 */
1735 if (error == ENOENT)
1736 goto tryagain;
1737 goto out;
1738 }
1739
1740 /*
1741 * If the file is delegated to a v4 client, go ahead
1742 * and initiate recall, this create is a hint that a
1743 * conflicting v3 open has occurred.
1744 */
1745
1746 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1747 VN_RELE(vp);
1748 resp->status = NFS3ERR_JUKEBOX;
1749 goto out1;
1750 }
1751
1752 va.va_mask = AT_ALL;
1753 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1754
1755 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1756 /* % with INT32_MAX to prevent overflows */
1757 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1758 vap->va_mtime.tv_sec !=
1759 (mtime->seconds % INT32_MAX) ||
1760 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1761 VN_RELE(vp);
1762 error = EEXIST;
1763 goto out;
1764 }
1765 } else {
1766
1767 if ((args->how.mode == UNCHECKED ||
1768 args->how.mode == GUARDED) &&
1769 args->how.createhow3_u.obj_attributes.size.set_it &&
1770 va.va_size == 0)
1771 trunc = TRUE;
1772 else
1773 trunc = FALSE;
1774
1775 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1776 VN_RELE(vp);
1777 resp->status = NFS3ERR_JUKEBOX;
1778 goto out1;
1779 }
1780
1781 va.va_mask = AT_ALL;
1782 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1783
1784 /*
1785 * We need to check to make sure that the file got
1786 * created to the indicated size. If not, we do a
1787 * setattr to try to change the size, but we don't
1788 * try too hard. This shouldn't a problem as most
1789 * clients will only specifiy a size of zero which
1790 * local file systems handle. However, even if
1791 * the client does specify a non-zero size, it can
1792 * still recover by checking the size of the file
1793 * after it has created it and then issue a setattr
1794 * request of its own to set the size of the file.
1795 */
1796 if (vap != NULL &&
1797 (args->how.mode == UNCHECKED ||
1798 args->how.mode == GUARDED) &&
1799 args->how.createhow3_u.obj_attributes.size.set_it &&
1800 vap->va_size != reqsize) {
1801 va.va_mask = AT_SIZE;
1802 va.va_size = reqsize;
1803 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1804 va.va_mask = AT_ALL;
1805 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1806 }
1807 }
1808
1809 if (name != args->where.name)
1810 kmem_free(name, MAXPATHLEN + 1);
1811
1812 error = makefh3(&resp->resok.obj.handle, vp, exi);
1813 if (error)
1814 resp->resok.obj.handle_follows = FALSE;
1815 else
1816 resp->resok.obj.handle_follows = TRUE;
1817
1818 /*
1819 * Force modified data and metadata out to stable storage.
1820 */
1821 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1822 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1823
1824 VN_RELE(vp);
1825 if (tvp != NULL) {
1826 if (in_crit)
1827 nbl_end_crit(tvp);
1828 VN_RELE(tvp);
1829 }
1830
1831 resp->status = NFS3_OK;
1832 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1833 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1834
1835 DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1836 cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1837
1838 VN_RELE(dvp);
1839 return;
1840
1841 out:
1842 if (curthread->t_flag & T_WOULDBLOCK) {
1843 curthread->t_flag &= ~T_WOULDBLOCK;
1844 resp->status = NFS3ERR_JUKEBOX;
1845 } else
1846 resp->status = puterrno3(error);
1847 out1:
1848 DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1849 cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1850
1851 if (name != NULL && name != args->where.name)
1852 kmem_free(name, MAXPATHLEN + 1);
1853
1854 if (tvp != NULL) {
1855 if (in_crit)
1856 nbl_end_crit(tvp);
1857 VN_RELE(tvp);
1858 }
1859 if (dvp != NULL)
1860 VN_RELE(dvp);
1861 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1862 }
1863
1864 void *
1865 rfs3_create_getfh(CREATE3args *args)
1866 {
1867
1868 return (&args->where.dir);
1869 }
1870
1871 void
1872 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1873 struct svc_req *req, cred_t *cr)
1874 {
1875 int error;
1876 vnode_t *vp = NULL;
1877 vnode_t *dvp;
1878 struct vattr *vap;
1879 struct vattr va;
1880 struct vattr *dbvap;
1881 struct vattr dbva;
1882 struct vattr *davap;
1883 struct vattr dava;
1884 struct sockaddr *ca;
1885 char *name = NULL;
1886
1887 dbvap = NULL;
1888 davap = NULL;
1889
1890 dvp = nfs3_fhtovp(&args->where.dir, exi);
1891
1892 DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1893 cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1894
1895 if (dvp == NULL) {
1896 error = ESTALE;
1897 goto out;
1898 }
1899
1900 dbva.va_mask = AT_ALL;
1901 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1902 davap = dbvap;
1903
1904 if (args->where.name == nfs3nametoolong) {
1905 resp->status = NFS3ERR_NAMETOOLONG;
1906 goto out1;
1907 }
1908
1909 if (args->where.name == NULL || *(args->where.name) == '\0') {
1910 resp->status = NFS3ERR_ACCES;
1911 goto out1;
1912 }
1913
1914 if (rdonly(exi, req)) {
1915 resp->status = NFS3ERR_ROFS;
1916 goto out1;
1917 }
1918
1919 if (is_system_labeled()) {
1920 bslabel_t *clabel = req->rq_label;
1921
1922 ASSERT(clabel != NULL);
1923 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1924 "got client label from request(1)", struct svc_req *, req);
1925
1926 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1927 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1928 exi)) {
1929 resp->status = NFS3ERR_ACCES;
1930 goto out1;
1931 }
1932 }
1933 }
1934
1935 error = sattr3_to_vattr(&args->attributes, &va);
1936 if (error)
1937 goto out;
1938
1939 if (!(va.va_mask & AT_MODE)) {
1940 resp->status = NFS3ERR_INVAL;
1941 goto out1;
1942 }
1943
1944 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1945 name = nfscmd_convname(ca, exi, args->where.name,
1946 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1947
1948 if (name == NULL) {
1949 resp->status = NFS3ERR_INVAL;
1950 goto out1;
1951 }
1952
1953 va.va_mask |= AT_TYPE;
1954 va.va_type = VDIR;
1955
1956 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1957
1958 if (name != args->where.name)
1959 kmem_free(name, MAXPATHLEN + 1);
1960
1961 dava.va_mask = AT_ALL;
1962 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1963
1964 /*
1965 * Force modified data and metadata out to stable storage.
1966 */
1967 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1968
1969 if (error)
1970 goto out;
1971
1972 error = makefh3(&resp->resok.obj.handle, vp, exi);
1973 if (error)
1974 resp->resok.obj.handle_follows = FALSE;
1975 else
1976 resp->resok.obj.handle_follows = TRUE;
1977
1978 va.va_mask = AT_ALL;
1979 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1980
1981 /*
1982 * Force modified data and metadata out to stable storage.
1983 */
1984 (void) VOP_FSYNC(vp, 0, cr, NULL);
1985
1986 VN_RELE(vp);
1987
1988 resp->status = NFS3_OK;
1989 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1990 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1991
1992 DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1993 cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1994 VN_RELE(dvp);
1995
1996 return;
1997
1998 out:
1999 if (curthread->t_flag & T_WOULDBLOCK) {
2000 curthread->t_flag &= ~T_WOULDBLOCK;
2001 resp->status = NFS3ERR_JUKEBOX;
2002 } else
2003 resp->status = puterrno3(error);
2004 out1:
2005 DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2006 cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2007 if (dvp != NULL)
2008 VN_RELE(dvp);
2009 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2010 }
2011
2012 void *
2013 rfs3_mkdir_getfh(MKDIR3args *args)
2014 {
2015
2016 return (&args->where.dir);
2017 }
2018
2019 void
2020 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2021 struct svc_req *req, cred_t *cr)
2022 {
2023 int error;
2024 vnode_t *vp;
2025 vnode_t *dvp;
2026 struct vattr *vap;
2027 struct vattr va;
2028 struct vattr *dbvap;
2029 struct vattr dbva;
2030 struct vattr *davap;
2031 struct vattr dava;
2032 struct sockaddr *ca;
2033 char *name = NULL;
2034 char *symdata = NULL;
2035
2036 dbvap = NULL;
2037 davap = NULL;
2038
2039 dvp = nfs3_fhtovp(&args->where.dir, exi);
2040
2041 DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2042 cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2043
2044 if (dvp == NULL) {
2045 error = ESTALE;
2046 goto err;
2047 }
2048
2049 dbva.va_mask = AT_ALL;
2050 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2051 davap = dbvap;
2052
2053 if (args->where.name == nfs3nametoolong) {
2054 resp->status = NFS3ERR_NAMETOOLONG;
2055 goto err1;
2056 }
2057
2058 if (args->where.name == NULL || *(args->where.name) == '\0') {
2059 resp->status = NFS3ERR_ACCES;
2060 goto err1;
2061 }
2062
2063 if (rdonly(exi, req)) {
2064 resp->status = NFS3ERR_ROFS;
2065 goto err1;
2066 }
2067
2068 if (is_system_labeled()) {
2069 bslabel_t *clabel = req->rq_label;
2070
2071 ASSERT(clabel != NULL);
2072 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2073 "got client label from request(1)", struct svc_req *, req);
2074
2075 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2076 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2077 exi)) {
2078 resp->status = NFS3ERR_ACCES;
2079 goto err1;
2080 }
2081 }
2082 }
2083
2084 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2085 if (error)
2086 goto err;
2087
2088 if (!(va.va_mask & AT_MODE)) {
2089 resp->status = NFS3ERR_INVAL;
2090 goto err1;
2091 }
2092
2093 if (args->symlink.symlink_data == nfs3nametoolong) {
2094 resp->status = NFS3ERR_NAMETOOLONG;
2095 goto err1;
2096 }
2097
2098 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2099 name = nfscmd_convname(ca, exi, args->where.name,
2100 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2101
2102 if (name == NULL) {
2103 /* This is really a Solaris EILSEQ */
2104 resp->status = NFS3ERR_INVAL;
2105 goto err1;
2106 }
2107
2108 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2109 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2110 if (symdata == NULL) {
2111 /* This is really a Solaris EILSEQ */
2112 resp->status = NFS3ERR_INVAL;
2113 goto err1;
2114 }
2115
2116
2117 va.va_mask |= AT_TYPE;
2118 va.va_type = VLNK;
2119
2120 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2121
2122 dava.va_mask = AT_ALL;
2123 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2124
2125 if (error)
2126 goto err;
2127
2128 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2129 NULL, NULL, NULL);
2130
2131 /*
2132 * Force modified data and metadata out to stable storage.
2133 */
2134 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2135
2136
2137 resp->status = NFS3_OK;
2138 if (error) {
2139 resp->resok.obj.handle_follows = FALSE;
2140 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2141 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2142 goto out;
2143 }
2144
2145 error = makefh3(&resp->resok.obj.handle, vp, exi);
2146 if (error)
2147 resp->resok.obj.handle_follows = FALSE;
2148 else
2149 resp->resok.obj.handle_follows = TRUE;
2150
2151 va.va_mask = AT_ALL;
2152 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2153
2154 /*
2155 * Force modified data and metadata out to stable storage.
2156 */
2157 (void) VOP_FSYNC(vp, 0, cr, NULL);
2158
2159 VN_RELE(vp);
2160
2161 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2162 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2163 goto out;
2164
2165 err:
2166 if (curthread->t_flag & T_WOULDBLOCK) {
2167 curthread->t_flag &= ~T_WOULDBLOCK;
2168 resp->status = NFS3ERR_JUKEBOX;
2169 } else
2170 resp->status = puterrno3(error);
2171 err1:
2172 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2173 out:
2174 if (name != NULL && name != args->where.name)
2175 kmem_free(name, MAXPATHLEN + 1);
2176 if (symdata != NULL && symdata != args->symlink.symlink_data)
2177 kmem_free(symdata, MAXPATHLEN + 1);
2178
2179 DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2180 cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2181
2182 if (dvp != NULL)
2183 VN_RELE(dvp);
2184 }
2185
2186 void *
2187 rfs3_symlink_getfh(SYMLINK3args *args)
2188 {
2189
2190 return (&args->where.dir);
2191 }
2192
2193 void
2194 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2195 struct svc_req *req, cred_t *cr)
2196 {
2197 int error;
2198 vnode_t *vp;
2199 vnode_t *realvp;
2200 vnode_t *dvp;
2201 struct vattr *vap;
2202 struct vattr va;
2203 struct vattr *dbvap;
2204 struct vattr dbva;
2205 struct vattr *davap;
2206 struct vattr dava;
2207 int mode;
2208 enum vcexcl excl;
2209 struct sockaddr *ca;
2210 char *name = NULL;
2211
2212 dbvap = NULL;
2213 davap = NULL;
2214
2215 dvp = nfs3_fhtovp(&args->where.dir, exi);
2216
2217 DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2218 cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2219
2220 if (dvp == NULL) {
2221 error = ESTALE;
2222 goto out;
2223 }
2224
2225 dbva.va_mask = AT_ALL;
2226 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2227 davap = dbvap;
2228
2229 if (args->where.name == nfs3nametoolong) {
2230 resp->status = NFS3ERR_NAMETOOLONG;
2231 goto out1;
2232 }
2233
2234 if (args->where.name == NULL || *(args->where.name) == '\0') {
2235 resp->status = NFS3ERR_ACCES;
2236 goto out1;
2237 }
2238
2239 if (rdonly(exi, req)) {
2240 resp->status = NFS3ERR_ROFS;
2241 goto out1;
2242 }
2243
2244 if (is_system_labeled()) {
2245 bslabel_t *clabel = req->rq_label;
2246
2247 ASSERT(clabel != NULL);
2248 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2249 "got client label from request(1)", struct svc_req *, req);
2250
2251 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2252 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2253 exi)) {
2254 resp->status = NFS3ERR_ACCES;
2255 goto out1;
2256 }
2257 }
2258 }
2259
2260 switch (args->what.type) {
2261 case NF3CHR:
2262 case NF3BLK:
2263 error = sattr3_to_vattr(
2264 &args->what.mknoddata3_u.device.dev_attributes, &va);
2265 if (error)
2266 goto out;
2267 if (secpolicy_sys_devices(cr) != 0) {
2268 resp->status = NFS3ERR_PERM;
2269 goto out1;
2270 }
2271 if (args->what.type == NF3CHR)
2272 va.va_type = VCHR;
2273 else
2274 va.va_type = VBLK;
2275 va.va_rdev = makedevice(
2276 args->what.mknoddata3_u.device.spec.specdata1,
2277 args->what.mknoddata3_u.device.spec.specdata2);
2278 va.va_mask |= AT_TYPE | AT_RDEV;
2279 break;
2280 case NF3SOCK:
2281 error = sattr3_to_vattr(
2282 &args->what.mknoddata3_u.pipe_attributes, &va);
2283 if (error)
2284 goto out;
2285 va.va_type = VSOCK;
2286 va.va_mask |= AT_TYPE;
2287 break;
2288 case NF3FIFO:
2289 error = sattr3_to_vattr(
2290 &args->what.mknoddata3_u.pipe_attributes, &va);
2291 if (error)
2292 goto out;
2293 va.va_type = VFIFO;
2294 va.va_mask |= AT_TYPE;
2295 break;
2296 default:
2297 resp->status = NFS3ERR_BADTYPE;
2298 goto out1;
2299 }
2300
2301 /*
2302 * Must specify the mode.
2303 */
2304 if (!(va.va_mask & AT_MODE)) {
2305 resp->status = NFS3ERR_INVAL;
2306 goto out1;
2307 }
2308
2309 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2310 name = nfscmd_convname(ca, exi, args->where.name,
2311 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2312
2313 if (name == NULL) {
2314 resp->status = NFS3ERR_INVAL;
2315 goto out1;
2316 }
2317
2318 excl = EXCL;
2319
2320 mode = 0;
2321
2322 error = VOP_CREATE(dvp, name, &va, excl, mode,
2323 &vp, cr, 0, NULL, NULL);
2324
2325 if (name != args->where.name)
2326 kmem_free(name, MAXPATHLEN + 1);
2327
2328 dava.va_mask = AT_ALL;
2329 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2330
2331 /*
2332 * Force modified data and metadata out to stable storage.
2333 */
2334 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2335
2336 if (error)
2337 goto out;
2338
2339 resp->status = NFS3_OK;
2340
2341 error = makefh3(&resp->resok.obj.handle, vp, exi);
2342 if (error)
2343 resp->resok.obj.handle_follows = FALSE;
2344 else
2345 resp->resok.obj.handle_follows = TRUE;
2346
2347 va.va_mask = AT_ALL;
2348 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2349
2350 /*
2351 * Force modified metadata out to stable storage.
2352 *
2353 * if a underlying vp exists, pass it to VOP_FSYNC
2354 */
2355 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2356 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2357 else
2358 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2359
2360 VN_RELE(vp);
2361
2362 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2363 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2364 DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2365 cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2366 VN_RELE(dvp);
2367 return;
2368
2369 out:
2370 if (curthread->t_flag & T_WOULDBLOCK) {
2371 curthread->t_flag &= ~T_WOULDBLOCK;
2372 resp->status = NFS3ERR_JUKEBOX;
2373 } else
2374 resp->status = puterrno3(error);
2375 out1:
2376 DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2377 cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2378 if (dvp != NULL)
2379 VN_RELE(dvp);
2380 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2381 }
2382
2383 void *
2384 rfs3_mknod_getfh(MKNOD3args *args)
2385 {
2386
2387 return (&args->where.dir);
2388 }
2389
2390 void
2391 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2392 struct svc_req *req, cred_t *cr)
2393 {
2394 int error = 0;
2395 vnode_t *vp;
2396 struct vattr *bvap;
2397 struct vattr bva;
2398 struct vattr *avap;
2399 struct vattr ava;
2400 vnode_t *targvp = NULL;
2401 struct sockaddr *ca;
2402 char *name = NULL;
2403
2404 bvap = NULL;
2405 avap = NULL;
2406
2407 vp = nfs3_fhtovp(&args->object.dir, exi);
2408
2409 DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2410 cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2411
2412 if (vp == NULL) {
2413 error = ESTALE;
2414 goto err;
2415 }
2416
2417 bva.va_mask = AT_ALL;
2418 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2419 avap = bvap;
2420
2421 if (vp->v_type != VDIR) {
2422 resp->status = NFS3ERR_NOTDIR;
2423 goto err1;
2424 }
2425
2426 if (args->object.name == nfs3nametoolong) {
2427 resp->status = NFS3ERR_NAMETOOLONG;
2428 goto err1;
2429 }
2430
2431 if (args->object.name == NULL || *(args->object.name) == '\0') {
2432 resp->status = NFS3ERR_ACCES;
2433 goto err1;
2434 }
2435
2436 if (rdonly(exi, req)) {
2437 resp->status = NFS3ERR_ROFS;
2438 goto err1;
2439 }
2440
2441 if (is_system_labeled()) {
2442 bslabel_t *clabel = req->rq_label;
2443
2444 ASSERT(clabel != NULL);
2445 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2446 "got client label from request(1)", struct svc_req *, req);
2447
2448 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2449 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2450 exi)) {
2451 resp->status = NFS3ERR_ACCES;
2452 goto err1;
2453 }
2454 }
2455 }
2456
2457 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2458 name = nfscmd_convname(ca, exi, args->object.name,
2459 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2460
2461 if (name == NULL) {
2462 resp->status = NFS3ERR_INVAL;
2463 goto err1;
2464 }
2465
2466 /*
2467 * Check for a conflict with a non-blocking mandatory share
2468 * reservation and V4 delegations
2469 */
2470 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2471 NULL, cr, NULL, NULL, NULL);
2472 if (error != 0)
2473 goto err;
2474
2475 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2476 resp->status = NFS3ERR_JUKEBOX;
2477 goto err1;
2478 }
2479
2480 if (!nbl_need_check(targvp)) {
2481 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2482 } else {
2483 nbl_start_crit(targvp, RW_READER);
2484 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2485 error = EACCES;
2486 } else {
2487 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2488 }
2489 nbl_end_crit(targvp);
2490 }
2491 VN_RELE(targvp);
2492 targvp = NULL;
2493
2494 ava.va_mask = AT_ALL;
2495 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2496
2497 /*
2498 * Force modified data and metadata out to stable storage.
2499 */
2500 (void) VOP_FSYNC(vp, 0, cr, NULL);
2501
2502 if (error)
2503 goto err;
2504
2505 resp->status = NFS3_OK;
2506 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2507 goto out;
2508
2509 err:
2510 if (curthread->t_flag & T_WOULDBLOCK) {
2511 curthread->t_flag &= ~T_WOULDBLOCK;
2512 resp->status = NFS3ERR_JUKEBOX;
2513 } else
2514 resp->status = puterrno3(error);
2515 err1:
2516 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2517 out:
2518 DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2519 cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2520
2521 if (name != NULL && name != args->object.name)
2522 kmem_free(name, MAXPATHLEN + 1);
2523
2524 if (vp != NULL)
2525 VN_RELE(vp);
2526 }
2527
2528 void *
2529 rfs3_remove_getfh(REMOVE3args *args)
2530 {
2531
2532 return (&args->object.dir);
2533 }
2534
2535 void
2536 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2537 struct svc_req *req, cred_t *cr)
2538 {
2539 int error;
2540 vnode_t *vp;
2541 struct vattr *bvap;
2542 struct vattr bva;
2543 struct vattr *avap;
2544 struct vattr ava;
2545 struct sockaddr *ca;
2546 char *name = NULL;
2547
2548 bvap = NULL;
2549 avap = NULL;
2550
2551 vp = nfs3_fhtovp(&args->object.dir, exi);
2552
2553 DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2554 cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2555
2556 if (vp == NULL) {
2557 error = ESTALE;
2558 goto err;
2559 }
2560
2561 bva.va_mask = AT_ALL;
2562 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2563 avap = bvap;
2564
2565 if (vp->v_type != VDIR) {
2566 resp->status = NFS3ERR_NOTDIR;
2567 goto err1;
2568 }
2569
2570 if (args->object.name == nfs3nametoolong) {
2571 resp->status = NFS3ERR_NAMETOOLONG;
2572 goto err1;
2573 }
2574
2575 if (args->object.name == NULL || *(args->object.name) == '\0') {
2576 resp->status = NFS3ERR_ACCES;
2577 goto err1;
2578 }
2579
2580 if (rdonly(exi, req)) {
2581 resp->status = NFS3ERR_ROFS;
2582 goto err1;
2583 }
2584
2585 if (is_system_labeled()) {
2586 bslabel_t *clabel = req->rq_label;
2587
2588 ASSERT(clabel != NULL);
2589 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2590 "got client label from request(1)", struct svc_req *, req);
2591
2592 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2593 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2594 exi)) {
2595 resp->status = NFS3ERR_ACCES;
2596 goto err1;
2597 }
2598 }
2599 }
2600
2601 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2602 name = nfscmd_convname(ca, exi, args->object.name,
2603 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2604
2605 if (name == NULL) {
2606 resp->status = NFS3ERR_INVAL;
2607 goto err1;
2608 }
2609
2610 error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2611
2612 if (name != args->object.name)
2613 kmem_free(name, MAXPATHLEN + 1);
2614
2615 ava.va_mask = AT_ALL;
2616 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2617
2618 /*
2619 * Force modified data and metadata out to stable storage.
2620 */
2621 (void) VOP_FSYNC(vp, 0, cr, NULL);
2622
2623 if (error) {
2624 /*
2625 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2626 * if the directory is not empty. A System V NFS server
2627 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2628 * over the wire.
2629 */
2630 if (error == EEXIST)
2631 error = ENOTEMPTY;
2632 goto err;
2633 }
2634
2635 resp->status = NFS3_OK;
2636 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2637 goto out;
2638
2639 err:
2640 if (curthread->t_flag & T_WOULDBLOCK) {
2641 curthread->t_flag &= ~T_WOULDBLOCK;
2642 resp->status = NFS3ERR_JUKEBOX;
2643 } else
2644 resp->status = puterrno3(error);
2645 err1:
2646 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2647 out:
2648 DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2649 cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2650 if (vp != NULL)
2651 VN_RELE(vp);
2652
2653 }
2654
2655 void *
2656 rfs3_rmdir_getfh(RMDIR3args *args)
2657 {
2658
2659 return (&args->object.dir);
2660 }
2661
2662 void
2663 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2664 struct svc_req *req, cred_t *cr)
2665 {
2666 int error = 0;
2667 vnode_t *fvp;
2668 vnode_t *tvp;
2669 vnode_t *targvp;
2670 struct vattr *fbvap;
2671 struct vattr fbva;
2672 struct vattr *favap;
2673 struct vattr fava;
2674 struct vattr *tbvap;
2675 struct vattr tbva;
2676 struct vattr *tavap;
2677 struct vattr tava;
2678 nfs_fh3 *fh3;
2679 struct exportinfo *to_exi;
2680 vnode_t *srcvp = NULL;
2681 bslabel_t *clabel;
2682 struct sockaddr *ca;
2683 char *name = NULL;
2684 char *toname = NULL;
2685
2686 fbvap = NULL;
2687 favap = NULL;
2688 tbvap = NULL;
2689 tavap = NULL;
2690 tvp = NULL;
2691
2692 fvp = nfs3_fhtovp(&args->from.dir, exi);
2693
2694 DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2695 cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2696
2697 if (fvp == NULL) {
2698 error = ESTALE;
2699 goto err;
2700 }
2701
2702 if (is_system_labeled()) {
2703 clabel = req->rq_label;
2704 ASSERT(clabel != NULL);
2705 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2706 "got client label from request(1)", struct svc_req *, req);
2707
2708 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2709 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2710 exi)) {
2711 resp->status = NFS3ERR_ACCES;
2712 goto err1;
2713 }
2714 }
2715 }
2716
2717 fbva.va_mask = AT_ALL;
2718 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2719 favap = fbvap;
2720
2721 fh3 = &args->to.dir;
2722 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2723 if (to_exi == NULL) {
2724 resp->status = NFS3ERR_ACCES;
2725 goto err1;
2726 }
2727 exi_rele(to_exi);
2728
2729 if (to_exi != exi) {
2730 resp->status = NFS3ERR_XDEV;
2731 goto err1;
2732 }
2733
2734 tvp = nfs3_fhtovp(&args->to.dir, exi);
2735 if (tvp == NULL) {
2736 error = ESTALE;
2737 goto err;
2738 }
2739
2740 tbva.va_mask = AT_ALL;
2741 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2742 tavap = tbvap;
2743
2744 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2745 resp->status = NFS3ERR_NOTDIR;
2746 goto err1;
2747 }
2748
2749 if (args->from.name == nfs3nametoolong ||
2750 args->to.name == nfs3nametoolong) {
2751 resp->status = NFS3ERR_NAMETOOLONG;
2752 goto err1;
2753 }
2754 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2755 args->to.name == NULL || *(args->to.name) == '\0') {
2756 resp->status = NFS3ERR_ACCES;
2757 goto err1;
2758 }
2759
2760 if (rdonly(exi, req)) {
2761 resp->status = NFS3ERR_ROFS;
2762 goto err1;
2763 }
2764
2765 if (is_system_labeled()) {
2766 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2767 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2768 exi)) {
2769 resp->status = NFS3ERR_ACCES;
2770 goto err1;
2771 }
2772 }
2773 }
2774
2775 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2776 name = nfscmd_convname(ca, exi, args->from.name,
2777 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2778
2779 if (name == NULL) {
2780 resp->status = NFS3ERR_INVAL;
2781 goto err1;
2782 }
2783
2784 toname = nfscmd_convname(ca, exi, args->to.name,
2785 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2786
2787 if (toname == NULL) {
2788 resp->status = NFS3ERR_INVAL;
2789 goto err1;
2790 }
2791
2792 /*
2793 * Check for a conflict with a non-blocking mandatory share
2794 * reservation or V4 delegations.
2795 */
2796 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2797 NULL, cr, NULL, NULL, NULL);
2798 if (error != 0)
2799 goto err;
2800
2801 /*
2802 * If we rename a delegated file we should recall the
2803 * delegation, since future opens should fail or would
2804 * refer to a new file.
2805 */
2806 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2807 resp->status = NFS3ERR_JUKEBOX;
2808 goto err1;
2809 }
2810
2811 /*
2812 * Check for renaming over a delegated file. Check rfs4_deleg_policy
2813 * first to avoid VOP_LOOKUP if possible.
2814 */
2815 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2816 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2817 NULL, NULL, NULL) == 0) {
2818
2819 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2820 VN_RELE(targvp);
2821 resp->status = NFS3ERR_JUKEBOX;
2822 goto err1;
2823 }
2824 VN_RELE(targvp);
2825 }
2826
2827 if (!nbl_need_check(srcvp)) {
2828 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2829 } else {
2830 nbl_start_crit(srcvp, RW_READER);
2831 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2832 error = EACCES;
2833 else
2834 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2835 nbl_end_crit(srcvp);
2836 }
2837 if (error == 0)
2838 vn_renamepath(tvp, srcvp, args->to.name,
2839 strlen(args->to.name));
2840 VN_RELE(srcvp);
2841 srcvp = NULL;
2842
2843 fava.va_mask = AT_ALL;
2844 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2845 tava.va_mask = AT_ALL;
2846 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2847
2848 /*
2849 * Force modified data and metadata out to stable storage.
2850 */
2851 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2852 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2853
2854 if (error)
2855 goto err;
2856
2857 resp->status = NFS3_OK;
2858 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2859 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2860 goto out;
2861
2862 err:
2863 if (curthread->t_flag & T_WOULDBLOCK) {
2864 curthread->t_flag &= ~T_WOULDBLOCK;
2865 resp->status = NFS3ERR_JUKEBOX;
2866 } else {
2867 resp->status = puterrno3(error);
2868 }
2869 err1:
2870 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2871 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2872
2873 out:
2874 if (name != NULL && name != args->from.name)
2875 kmem_free(name, MAXPATHLEN + 1);
2876 if (toname != NULL && toname != args->to.name)
2877 kmem_free(toname, MAXPATHLEN + 1);
2878
2879 DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2880 cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2881 if (fvp != NULL)
2882 VN_RELE(fvp);
2883 if (tvp != NULL)
2884 VN_RELE(tvp);
2885 }
2886
2887 void *
2888 rfs3_rename_getfh(RENAME3args *args)
2889 {
2890
2891 return (&args->from.dir);
2892 }
2893
2894 void
2895 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2896 struct svc_req *req, cred_t *cr)
2897 {
2898 int error;
2899 vnode_t *vp;
2900 vnode_t *dvp;
2901 struct vattr *vap;
2902 struct vattr va;
2903 struct vattr *bvap;
2904 struct vattr bva;
2905 struct vattr *avap;
2906 struct vattr ava;
2907 nfs_fh3 *fh3;
2908 struct exportinfo *to_exi;
2909 bslabel_t *clabel;
2910 struct sockaddr *ca;
2911 char *name = NULL;
2912
2913 vap = NULL;
2914 bvap = NULL;
2915 avap = NULL;
2916 dvp = NULL;
2917
2918 vp = nfs3_fhtovp(&args->file, exi);
2919
2920 DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2921 cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2922
2923 if (vp == NULL) {
2924 error = ESTALE;
2925 goto out;
2926 }
2927
2928 va.va_mask = AT_ALL;
2929 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2930
2931 fh3 = &args->link.dir;
2932 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2933 if (to_exi == NULL) {
2934 resp->status = NFS3ERR_ACCES;
2935 goto out1;
2936 }
2937 exi_rele(to_exi);
2938
2939 if (to_exi != exi) {
2940 resp->status = NFS3ERR_XDEV;
2941 goto out1;
2942 }
2943
2944 if (is_system_labeled()) {
2945 clabel = req->rq_label;
2946
2947 ASSERT(clabel != NULL);
2948 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2949 "got client label from request(1)", struct svc_req *, req);
2950
2951 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2952 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2953 exi)) {
2954 resp->status = NFS3ERR_ACCES;
2955 goto out1;
2956 }
2957 }
2958 }
2959
2960 dvp = nfs3_fhtovp(&args->link.dir, exi);
2961 if (dvp == NULL) {
2962 error = ESTALE;
2963 goto out;
2964 }
2965
2966 bva.va_mask = AT_ALL;
2967 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2968
2969 if (dvp->v_type != VDIR) {
2970 resp->status = NFS3ERR_NOTDIR;
2971 goto out1;
2972 }
2973
2974 if (args->link.name == nfs3nametoolong) {
2975 resp->status = NFS3ERR_NAMETOOLONG;
2976 goto out1;
2977 }
2978
2979 if (args->link.name == NULL || *(args->link.name) == '\0') {
2980 resp->status = NFS3ERR_ACCES;
2981 goto out1;
2982 }
2983
2984 if (rdonly(exi, req)) {
2985 resp->status = NFS3ERR_ROFS;
2986 goto out1;
2987 }
2988
2989 if (is_system_labeled()) {
2990 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2991 "got client label from request(1)", struct svc_req *, req);
2992
2993 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2994 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2995 exi)) {
2996 resp->status = NFS3ERR_ACCES;
2997 goto out1;
2998 }
2999 }
3000 }
3001
3002 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3003 name = nfscmd_convname(ca, exi, args->link.name,
3004 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3005
3006 if (name == NULL) {
3007 resp->status = NFS3ERR_SERVERFAULT;
3008 goto out1;
3009 }
3010
3011 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3012
3013 va.va_mask = AT_ALL;
3014 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3015 ava.va_mask = AT_ALL;
3016 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3017
3018 /*
3019 * Force modified data and metadata out to stable storage.
3020 */
3021 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3022 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3023
3024 if (error)
3025 goto out;
3026
3027 VN_RELE(dvp);
3028
3029 resp->status = NFS3_OK;
3030 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3031 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3032
3033 DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3034 cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3035
3036 VN_RELE(vp);
3037
3038 return;
3039
3040 out:
3041 if (curthread->t_flag & T_WOULDBLOCK) {
3042 curthread->t_flag &= ~T_WOULDBLOCK;
3043 resp->status = NFS3ERR_JUKEBOX;
3044 } else
3045 resp->status = puterrno3(error);
3046 out1:
3047 if (name != NULL && name != args->link.name)
3048 kmem_free(name, MAXPATHLEN + 1);
3049
3050 DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3051 cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3052
3053 if (vp != NULL)
3054 VN_RELE(vp);
3055 if (dvp != NULL)
3056 VN_RELE(dvp);
3057 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3058 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3059 }
3060
3061 void *
3062 rfs3_link_getfh(LINK3args *args)
3063 {
3064
3065 return (&args->file);
3066 }
3067
3068 /*
3069 * This macro defines the size of a response which contains attribute
3070 * information and one directory entry (whose length is specified by
3071 * the macro parameter). If the incoming request is larger than this,
3072 * then we are guaranteed to be able to return at one directory entry
3073 * if one exists. Therefore, we do not need to check for
3074 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
3075 * is not, then we need to check to make sure that this error does not
3076 * need to be returned.
3077 *
3078 * NFS3_READDIR_MIN_COUNT is comprised of following :
3079 *
3080 * status - 1 * BYTES_PER_XDR_UNIT
3081 * attr. flag - 1 * BYTES_PER_XDR_UNIT
3082 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3083 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3084 * boolean - 1 * BYTES_PER_XDR_UNIT
3085 * file id - 2 * BYTES_PER_XDR_UNIT
3086 * directory name length - 1 * BYTES_PER_XDR_UNIT
3087 * cookie - 2 * BYTES_PER_XDR_UNIT
3088 * end of list - 1 * BYTES_PER_XDR_UNIT
3089 * end of file - 1 * BYTES_PER_XDR_UNIT
3090 * Name length of directory to the nearest byte
3091 */
3092
3093 #define NFS3_READDIR_MIN_COUNT(length) \
3094 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3095 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3096
3097 /* ARGSUSED */
3098 void
3099 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3100 struct svc_req *req, cred_t *cr)
3101 {
3102 int error;
3103 vnode_t *vp;
3104 struct vattr *vap;
3105 struct vattr va;
3106 struct iovec iov;
3107 struct uio uio;
3108 char *data;
3109 int iseof;
3110 int bufsize;
3111 int namlen;
3112 uint_t count;
3113 struct sockaddr *ca;
3114
3115 vap = NULL;
3116
3117 vp = nfs3_fhtovp(&args->dir, exi);
3118
3119 DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3120 cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3121
3122 if (vp == NULL) {
3123 error = ESTALE;
3124 goto out;
3125 }
3126
3127 if (is_system_labeled()) {
3128 bslabel_t *clabel = req->rq_label;
3129
3130 ASSERT(clabel != NULL);
3131 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3132 "got client label from request(1)", struct svc_req *, req);
3133
3134 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3135 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3136 exi)) {
3137 resp->status = NFS3ERR_ACCES;
3138 goto out1;
3139 }
3140 }
3141 }
3142
3143 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3144
3145 va.va_mask = AT_ALL;
3146 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3147
3148 if (vp->v_type != VDIR) {
3149 resp->status = NFS3ERR_NOTDIR;
3150 goto out1;
3151 }
3152
3153 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3154 if (error)
3155 goto out;
3156
3157 /*
3158 * Now don't allow arbitrary count to alloc;
3159 * allow the maximum not to exceed rfs3_tsize()
3160 */
3161 if (args->count > rfs3_tsize(req))
3162 args->count = rfs3_tsize(req);
3163
3164 /*
3165 * Make sure that there is room to read at least one entry
3166 * if any are available.
3167 */
3168 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3169 count = DIRENT64_RECLEN(MAXNAMELEN);
3170 else
3171 count = args->count;
3172
3173 data = kmem_alloc(count, KM_SLEEP);
3174
3175 iov.iov_base = data;
3176 iov.iov_len = count;
3177 uio.uio_iov = &iov;
3178 uio.uio_iovcnt = 1;
3179 uio.uio_segflg = UIO_SYSSPACE;
3180 uio.uio_extflg = UIO_COPY_CACHED;
3181 uio.uio_loffset = (offset_t)args->cookie;
3182 uio.uio_resid = count;
3183
3184 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3185
3186 va.va_mask = AT_ALL;
3187 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3188
3189 if (error) {
3190 kmem_free(data, count);
3191 goto out;
3192 }
3193
3194 /*
3195 * If the count was not large enough to be able to guarantee
3196 * to be able to return at least one entry, then need to
3197 * check to see if NFS3ERR_TOOSMALL should be returned.
3198 */
3199 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3200 /*
3201 * bufsize is used to keep track of the size of the response.
3202 * It is primed with:
3203 * 1 for the status +
3204 * 1 for the dir_attributes.attributes boolean +
3205 * 2 for the cookie verifier
3206 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3207 * to bytes. If there are directory attributes to be
3208 * returned, then:
3209 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3210 * time BYTES_PER_XDR_UNIT is added to account for them.
3211 */
3212 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3213 if (vap != NULL)
3214 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3215 /*
3216 * An entry is composed of:
3217 * 1 for the true/false list indicator +
3218 * 2 for the fileid +
3219 * 1 for the length of the name +
3220 * 2 for the cookie +
3221 * all times BYTES_PER_XDR_UNIT to convert from
3222 * XDR units to bytes, plus the length of the name
3223 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3224 */
3225 if (count != uio.uio_resid) {
3226 namlen = strlen(((struct dirent64 *)data)->d_name);
3227 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3228 roundup(namlen, BYTES_PER_XDR_UNIT);
3229 }
3230 /*
3231 * We need to check to see if the number of bytes left
3232 * to go into the buffer will actually fit into the
3233 * buffer. This is calculated as the size of this
3234 * entry plus:
3235 * 1 for the true/false list indicator +
3236 * 1 for the eof indicator
3237 * times BYTES_PER_XDR_UNIT to convert from from
3238 * XDR units to bytes.
3239 */
3240 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3241 if (bufsize > args->count) {
3242 kmem_free(data, count);
3243 resp->status = NFS3ERR_TOOSMALL;
3244 goto out1;
3245 }
3246 }
3247
3248 /*
3249 * Have a valid readir buffer for the native character
3250 * set. Need to check if a conversion is necessary and
3251 * potentially rewrite the whole buffer. Note that if the
3252 * conversion expands names enough, the structure may not
3253 * fit. In this case, we need to drop entries until if fits
3254 * and patch the counts in order that the next readdir will
3255 * get the correct entries.
3256 */
3257 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3258 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3259
3260
3261 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3262
3263 #if 0 /* notyet */
3264 /*
3265 * Don't do this. It causes local disk writes when just
3266 * reading the file and the overhead is deemed larger
3267 * than the benefit.
3268 */
3269 /*
3270 * Force modified metadata out to stable storage.
3271 */
3272 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3273 #endif
3274
3275 resp->status = NFS3_OK;
3276 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3277 resp->resok.cookieverf = 0;
3278 resp->resok.reply.entries = (entry3 *)data;
3279 resp->resok.reply.eof = iseof;
3280 resp->resok.size = count - uio.uio_resid;
3281 resp->resok.count = args->count;
3282 resp->resok.freecount = count;
3283
3284 DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3285 cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3286
3287 VN_RELE(vp);
3288
3289 return;
3290
3291 out:
3292 if (curthread->t_flag & T_WOULDBLOCK) {
3293 curthread->t_flag &= ~T_WOULDBLOCK;
3294 resp->status = NFS3ERR_JUKEBOX;
3295 } else
3296 resp->status = puterrno3(error);
3297 out1:
3298 DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3299 cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3300
3301 if (vp != NULL) {
3302 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3303 VN_RELE(vp);
3304 }
3305 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3306 }
3307
3308 void *
3309 rfs3_readdir_getfh(READDIR3args *args)
3310 {
3311
3312 return (&args->dir);
3313 }
3314
3315 void
3316 rfs3_readdir_free(READDIR3res *resp)
3317 {
3318
3319 if (resp->status == NFS3_OK)
3320 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3321 }
3322
3323 #ifdef nextdp
3324 #undef nextdp
3325 #endif
3326 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3327
3328 /*
3329 * This macro computes the size of a response which contains
3330 * one directory entry including the attributes as well as file handle.
3331 * If the incoming request is larger than this, then we are guaranteed to be
3332 * able to return at least one more directory entry if one exists.
3333 *
3334 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3335 *
3336 * boolean - 1 * BYTES_PER_XDR_UNIT
3337 * file id - 2 * BYTES_PER_XDR_UNIT
3338 * directory name length - 1 * BYTES_PER_XDR_UNIT
3339 * cookie - 2 * BYTES_PER_XDR_UNIT
3340 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3341 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3342 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3343 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3344 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3345 * name length of the entry to the nearest bytes
3346 */
3347 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3348 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3349 BYTES_PER_XDR_UNIT + \
3350 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3351
3352 static int rfs3_readdir_unit = MAXBSIZE;
3353
3354 /* ARGSUSED */
3355 void
3356 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3357 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3358 {
3359 int error;
3360 vnode_t *vp;
3361 struct vattr *vap;
3362 struct vattr va;
3363 struct iovec iov;
3364 struct uio uio;
3365 char *data;
3366 int iseof;
3367 struct dirent64 *dp;
3368 vnode_t *nvp;
3369 struct vattr *nvap;
3370 struct vattr nva;
3371 entryplus3_info *infop = NULL;
3372 int size = 0;
3373 int nents = 0;
3374 int bufsize = 0;
3375 int entrysize = 0;
3376 int tofit = 0;
3377 int rd_unit = rfs3_readdir_unit;
3378 int prev_len;
3379 int space_left;
3380 int i;
3381 uint_t *namlen = NULL;
3382 char *ndata = NULL;
3383 struct sockaddr *ca;
3384 size_t ret;
3385
3386 vap = NULL;
3387
3388 vp = nfs3_fhtovp(&args->dir, exi);
3389
3390 DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3391 cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3392
3393 if (vp == NULL) {
3394 error = ESTALE;
3395 goto out;
3396 }
3397
3398 if (is_system_labeled()) {
3399 bslabel_t *clabel = req->rq_label;
3400
3401 ASSERT(clabel != NULL);
3402 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3403 char *, "got client label from request(1)",
3404 struct svc_req *, req);
3405
3406 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3407 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3408 exi)) {
3409 resp->status = NFS3ERR_ACCES;
3410 goto out1;
3411 }
3412 }
3413 }
3414
3415 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3416
3417 va.va_mask = AT_ALL;
3418 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3419
3420 if (vp->v_type != VDIR) {
3421 error = ENOTDIR;
3422 goto out;
3423 }
3424
3425 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3426 if (error)
3427 goto out;
3428
3429 /*
3430 * Don't allow arbitrary counts for allocation
3431 */
3432 if (args->maxcount > rfs3_tsize(req))
3433 args->maxcount = rfs3_tsize(req);
3434
3435 /*
3436 * Make sure that there is room to read at least one entry
3437 * if any are available
3438 */
3439 args->dircount = MIN(args->dircount, args->maxcount);
3440
3441 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3442 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3443
3444 /*
3445 * This allocation relies on a minimum directory entry
3446 * being roughly 24 bytes. Therefore, the namlen array
3447 * will have enough space based on the maximum number of
3448 * entries to read.
3449 */
3450 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3451
3452 space_left = args->dircount;
3453 data = kmem_alloc(args->dircount, KM_SLEEP);
3454 dp = (struct dirent64 *)data;
3455 uio.uio_iov = &iov;
3456 uio.uio_iovcnt = 1;
3457 uio.uio_segflg = UIO_SYSSPACE;
3458 uio.uio_extflg = UIO_COPY_CACHED;
3459 uio.uio_loffset = (offset_t)args->cookie;
3460
3461 /*
3462 * bufsize is used to keep track of the size of the response as we
3463 * get post op attributes and filehandles for each entry. This is
3464 * an optimization as the server may have read more entries than will
3465 * fit in the buffer specified by maxcount. We stop calculating
3466 * post op attributes and filehandles once we have exceeded maxcount.
3467 * This will minimize the effect of truncation.
3468 *
3469 * It is primed with:
3470 * 1 for the status +
3471 * 1 for the dir_attributes.attributes boolean +
3472 * 2 for the cookie verifier
3473 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3474 * to bytes. If there are directory attributes to be
3475 * returned, then:
3476 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3477 * time BYTES_PER_XDR_UNIT is added to account for them.
3478 */
3479 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3480 if (vap != NULL)
3481 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3482
3483 getmoredents:
3484 /*
3485 * Here we make a check so that our read unit is not larger than
3486 * the space left in the buffer.
3487 */
3488 rd_unit = MIN(rd_unit, space_left);
3489 iov.iov_base = (char *)dp;
3490 iov.iov_len = rd_unit;
3491 uio.uio_resid = rd_unit;
3492 prev_len = rd_unit;
3493
3494 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3495
3496 if (error) {
3497 kmem_free(data, args->dircount);
3498 goto out;
3499 }
3500
3501 if (uio.uio_resid == prev_len && !iseof) {
3502 if (nents == 0) {
3503 kmem_free(data, args->dircount);
3504 resp->status = NFS3ERR_TOOSMALL;
3505 goto out1;
3506 }
3507
3508 /*
3509 * We could not get any more entries, so get the attributes
3510 * and filehandle for the entries already obtained.
3511 */
3512 goto good;
3513 }
3514
3515 /*
3516 * We estimate the size of the response by assuming the
3517 * entry exists and attributes and filehandle are also valid
3518 */
3519 for (size = prev_len - uio.uio_resid;
3520 size > 0;
3521 size -= dp->d_reclen, dp = nextdp(dp)) {
3522
3523 if (dp->d_ino == 0) {
3524 nents++;
3525 continue;
3526 }
3527
3528 namlen[nents] = strlen(dp->d_name);
3529 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3530
3531 /*
3532 * We need to check to see if the number of bytes left
3533 * to go into the buffer will actually fit into the
3534 * buffer. This is calculated as the size of this
3535 * entry plus:
3536 * 1 for the true/false list indicator +
3537 * 1 for the eof indicator
3538 * times BYTES_PER_XDR_UNIT to convert from XDR units
3539 * to bytes.
3540 *
3541 * Also check the dircount limit against the first entry read
3542 *
3543 */
3544 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3545 if (bufsize + tofit > args->maxcount) {
3546 /*
3547 * We make a check here to see if this was the
3548 * first entry being measured. If so, then maxcount
3549 * was too small to begin with and so we need to
3550 * return with NFS3ERR_TOOSMALL.
3551 */
3552 if (nents == 0) {
3553 kmem_free(data, args->dircount);
3554 resp->status = NFS3ERR_TOOSMALL;
3555 goto out1;
3556 }
3557 iseof = FALSE;
3558 goto good;
3559 }
3560 bufsize += entrysize;
3561 nents++;
3562 }
3563
3564 /*
3565 * If there is enough room to fit at least 1 more entry including
3566 * post op attributes and filehandle in the buffer AND that we haven't
3567 * exceeded dircount then go back and get some more.
3568 */
3569 if (!iseof &&
3570 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3571 space_left -= (prev_len - uio.uio_resid);
3572 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3573 goto getmoredents;
3574
3575 /* else, fall through */
3576 }
3577 good:
3578 va.va_mask = AT_ALL;
3579 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3580
3581 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3582
3583 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3584 resp->resok.infop = infop;
3585
3586 dp = (struct dirent64 *)data;
3587 for (i = 0; i < nents; i++) {
3588
3589 if (dp->d_ino == 0) {
3590 infop[i].attr.attributes = FALSE;
3591 infop[i].fh.handle_follows = FALSE;
3592 dp = nextdp(dp);
3593 continue;
3594 }
3595
3596 infop[i].namelen = namlen[i];
3597
3598 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3599 NULL, NULL, NULL);
3600 if (error) {
3601 infop[i].attr.attributes = FALSE;
3602 infop[i].fh.handle_follows = FALSE;
3603 dp = nextdp(dp);
3604 continue;
3605 }
3606
3607 nva.va_mask = AT_ALL;
3608 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3609
3610 /* Lie about the object type for a referral */
3611 if (vn_is_nfs_reparse(nvp, cr))
3612 nvap->va_type = VLNK;
3613
3614 vattr_to_post_op_attr(nvap, &infop[i].attr);
3615
3616 error = makefh3(&infop[i].fh.handle, nvp, exi);
3617 if (!error)
3618 infop[i].fh.handle_follows = TRUE;
3619 else
3620 infop[i].fh.handle_follows = FALSE;
3621
3622 VN_RELE(nvp);
3623 dp = nextdp(dp);
3624 }
3625
3626 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3627 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3628 if (ndata == NULL)
3629 ndata = data;
3630
3631 if (ret > 0) {
3632 /*
3633 * We had to drop one or more entries in order to fit
3634 * during the character conversion. We need to patch
3635 * up the size and eof info.
3636 */
3637 if (iseof)
3638 iseof = FALSE;
3639
3640 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3641 nents, ret);
3642 }
3643
3644
3645 #if 0 /* notyet */
3646 /*
3647 * Don't do this. It causes local disk writes when just
3648 * reading the file and the overhead is deemed larger
3649 * than the benefit.
3650 */
3651 /*
3652 * Force modified metadata out to stable storage.
3653 */
3654 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3655 #endif
3656
3657 kmem_free(namlen, args->dircount);
3658
3659 resp->status = NFS3_OK;
3660 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3661 resp->resok.cookieverf = 0;
3662 resp->resok.reply.entries = (entryplus3 *)ndata;
3663 resp->resok.reply.eof = iseof;
3664 resp->resok.size = nents;
3665 resp->resok.count = args->dircount - ret;
3666 resp->resok.maxcount = args->maxcount;
3667
3668 DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3669 cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3670 if (ndata != data)
3671 kmem_free(data, args->dircount);
3672
3673
3674 VN_RELE(vp);
3675
3676 return;
3677
3678 out:
3679 if (curthread->t_flag & T_WOULDBLOCK) {
3680 curthread->t_flag &= ~T_WOULDBLOCK;
3681 resp->status = NFS3ERR_JUKEBOX;
3682 } else {
3683 resp->status = puterrno3(error);
3684 }
3685 out1:
3686 DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3687 cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3688
3689 if (vp != NULL) {
3690 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3691 VN_RELE(vp);
3692 }
3693
3694 if (namlen != NULL)
3695 kmem_free(namlen, args->dircount);
3696
3697 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3698 }
3699
3700 void *
3701 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3702 {
3703
3704 return (&args->dir);
3705 }
3706
3707 void
3708 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3709 {
3710
3711 if (resp->status == NFS3_OK) {
3712 kmem_free(resp->resok.reply.entries, resp->resok.count);
3713 kmem_free(resp->resok.infop,
3714 resp->resok.size * sizeof (struct entryplus3_info));
3715 }
3716 }
3717
3718 /* ARGSUSED */
3719 void
3720 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3721 struct svc_req *req, cred_t *cr)
3722 {
3723 int error;
3724 vnode_t *vp;
3725 struct vattr *vap;
3726 struct vattr va;
3727 struct statvfs64 sb;
3728
3729 vap = NULL;
3730
3731 vp = nfs3_fhtovp(&args->fsroot, exi);
3732
3733 DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3734 cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3735
3736 if (vp == NULL) {
3737 error = ESTALE;
3738 goto out;
3739 }
3740
3741 if (is_system_labeled()) {
3742 bslabel_t *clabel = req->rq_label;
3743
3744 ASSERT(clabel != NULL);
3745 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3746 "got client label from request(1)", struct svc_req *, req);
3747
3748 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3749 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3750 exi)) {
3751 resp->status = NFS3ERR_ACCES;
3752 goto out1;
3753 }
3754 }
3755 }
3756
3757 error = VFS_STATVFS(vp->v_vfsp, &sb);
3758
3759 va.va_mask = AT_ALL;
3760 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3761
3762 if (error)
3763 goto out;
3764
3765 resp->status = NFS3_OK;
3766 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3767 if (sb.f_blocks != (fsblkcnt64_t)-1)
3768 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3769 else
3770 resp->resok.tbytes = (size3)sb.f_blocks;
3771 if (sb.f_bfree != (fsblkcnt64_t)-1)
3772 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3773 else
3774 resp->resok.fbytes = (size3)sb.f_bfree;
3775 if (sb.f_bavail != (fsblkcnt64_t)-1)
3776 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3777 else
3778 resp->resok.abytes = (size3)sb.f_bavail;
3779 resp->resok.tfiles = (size3)sb.f_files;
3780 resp->resok.ffiles = (size3)sb.f_ffree;
3781 resp->resok.afiles = (size3)sb.f_favail;
3782 resp->resok.invarsec = 0;
3783
3784 DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3785 cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3786 VN_RELE(vp);
3787
3788 return;
3789
3790 out:
3791 if (curthread->t_flag & T_WOULDBLOCK) {
3792 curthread->t_flag &= ~T_WOULDBLOCK;
3793 resp->status = NFS3ERR_JUKEBOX;
3794 } else
3795 resp->status = puterrno3(error);
3796 out1:
3797 DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3798 cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3799
3800 if (vp != NULL)
3801 VN_RELE(vp);
3802 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3803 }
3804
3805 void *
3806 rfs3_fsstat_getfh(FSSTAT3args *args)
3807 {
3808
3809 return (&args->fsroot);
3810 }
3811
3812 void
3813 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3814 struct svc_req *req, cred_t *cr)
3815 {
3816 vnode_t *vp;
3817 struct vattr *vap;
3818 struct vattr va;
3819 uint32_t xfer_size;
3820 ulong_t l = 0;
3821 int error;
3822
3823 vp = nfs3_fhtovp(&args->fsroot, exi);
3824
3825 DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3826 cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3827
3828 if (vp == NULL) {
3829 if (curthread->t_flag & T_WOULDBLOCK) {
3830 curthread->t_flag &= ~T_WOULDBLOCK;
3831 resp->status = NFS3ERR_JUKEBOX;
3832 } else
3833 resp->status = NFS3ERR_STALE;
3834 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3835 goto out;
3836 }
3837
3838 if (is_system_labeled()) {
3839 bslabel_t *clabel = req->rq_label;
3840
3841 ASSERT(clabel != NULL);
3842 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3843 "got client label from request(1)", struct svc_req *, req);
3844
3845 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3846 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3847 exi)) {
3848 resp->status = NFS3ERR_STALE;
3849 vattr_to_post_op_attr(NULL,
3850 &resp->resfail.obj_attributes);
3851 goto out;
3852 }
3853 }
3854 }
3855
3856 va.va_mask = AT_ALL;
3857 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3858
3859 resp->status = NFS3_OK;
3860 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3861 xfer_size = rfs3_tsize(req);
3862 resp->resok.rtmax = xfer_size;
3863 resp->resok.rtpref = xfer_size;
3864 resp->resok.rtmult = DEV_BSIZE;
3865 resp->resok.wtmax = xfer_size;
3866 resp->resok.wtpref = xfer_size;
3867 resp->resok.wtmult = DEV_BSIZE;
3868 resp->resok.dtpref = MAXBSIZE;
3869
3870 /*
3871 * Large file spec: want maxfilesize based on limit of
3872 * underlying filesystem. We can guess 2^31-1 if need be.
3873 */
3874 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3875 if (error) {
3876 resp->status = puterrno3(error);
3877 goto out;
3878 }
3879
3880 /*
3881 * If the underlying file system does not support _PC_FILESIZEBITS,
3882 * return a reasonable default. Note that error code on VOP_PATHCONF
3883 * will be 0, even if the underlying file system does not support
3884 * _PC_FILESIZEBITS.
3885 */
3886 if (l == (ulong_t)-1) {
3887 resp->resok.maxfilesize = MAXOFF32_T;
3888 } else {
3889 if (l >= (sizeof (uint64_t) * 8))
3890 resp->resok.maxfilesize = INT64_MAX;
3891 else
3892 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3893 }
3894
3895 resp->resok.time_delta.seconds = 0;
3896 resp->resok.time_delta.nseconds = 1000;
3897 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3898 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3899
3900 DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3901 cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3902
3903 VN_RELE(vp);
3904
3905 return;
3906
3907 out:
3908 DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3909 cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3910 if (vp != NULL)
3911 VN_RELE(vp);
3912 }
3913
3914 void *
3915 rfs3_fsinfo_getfh(FSINFO3args *args)
3916 {
3917
3918 return (&args->fsroot);
3919 }
3920
3921 /* ARGSUSED */
3922 void
3923 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3924 struct svc_req *req, cred_t *cr)
3925 {
3926 int error;
3927 vnode_t *vp;
3928 struct vattr *vap;
3929 struct vattr va;
3930 ulong_t val;
3931
3932 vap = NULL;
3933
3934 vp = nfs3_fhtovp(&args->object, exi);
3935
3936 DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3937 cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3938
3939 if (vp == NULL) {
3940 error = ESTALE;
3941 goto out;
3942 }
3943
3944 if (is_system_labeled()) {
3945 bslabel_t *clabel = req->rq_label;
3946
3947 ASSERT(clabel != NULL);
3948 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3949 "got client label from request(1)", struct svc_req *, req);
3950
3951 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3952 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3953 exi)) {
3954 resp->status = NFS3ERR_ACCES;
3955 goto out1;
3956 }
3957 }
3958 }
3959
3960 va.va_mask = AT_ALL;
3961 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3962
3963 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3964 if (error)
3965 goto out;
3966 resp->resok.info.link_max = (uint32)val;
3967
3968 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3969 if (error)
3970 goto out;
3971 resp->resok.info.name_max = (uint32)val;
3972
3973 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3974 if (error)
3975 goto out;
3976 if (val == 1)
3977 resp->resok.info.no_trunc = TRUE;
3978 else
3979 resp->resok.info.no_trunc = FALSE;
3980
3981 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3982 if (error)
3983 goto out;
3984 if (val == 1)
3985 resp->resok.info.chown_restricted = TRUE;
3986 else
3987 resp->resok.info.chown_restricted = FALSE;
3988
3989 resp->status = NFS3_OK;
3990 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3991 resp->resok.info.case_insensitive = FALSE;
3992 resp->resok.info.case_preserving = TRUE;
3993 DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3994 cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3995 VN_RELE(vp);
3996 return;
3997
3998 out:
3999 if (curthread->t_flag & T_WOULDBLOCK) {
4000 curthread->t_flag &= ~T_WOULDBLOCK;
4001 resp->status = NFS3ERR_JUKEBOX;
4002 } else
4003 resp->status = puterrno3(error);
4004 out1:
4005 DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4006 cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4007 if (vp != NULL)
4008 VN_RELE(vp);
4009 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4010 }
4011
4012 void *
4013 rfs3_pathconf_getfh(PATHCONF3args *args)
4014 {
4015
4016 return (&args->object);
4017 }
4018
4019 void
4020 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4021 struct svc_req *req, cred_t *cr)
4022 {
4023 int error;
4024 vnode_t *vp;
4025 struct vattr *bvap;
4026 struct vattr bva;
4027 struct vattr *avap;
4028 struct vattr ava;
4029
4030 bvap = NULL;
4031 avap = NULL;
4032
4033 vp = nfs3_fhtovp(&args->file, exi);
4034
4035 DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4036 cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4037
4038 if (vp == NULL) {
4039 error = ESTALE;
4040 goto out;
4041 }
4042
4043 bva.va_mask = AT_ALL;
4044 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4045
4046 /*
4047 * If we can't get the attributes, then we can't do the
4048 * right access checking. So, we'll fail the request.
4049 */
4050 if (error)
4051 goto out;
4052
4053 bvap = &bva;
4054
4055 if (rdonly(exi, req)) {
4056 resp->status = NFS3ERR_ROFS;
4057 goto out1;
4058 }
4059
4060 if (vp->v_type != VREG) {
4061 resp->status = NFS3ERR_INVAL;
4062 goto out1;
4063 }
4064
4065 if (is_system_labeled()) {
4066 bslabel_t *clabel = req->rq_label;
4067
4068 ASSERT(clabel != NULL);
4069 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4070 "got client label from request(1)", struct svc_req *, req);
4071
4072 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4073 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4074 exi)) {
4075 resp->status = NFS3ERR_ACCES;
4076 goto out1;
4077 }
4078 }
4079 }
4080
4081 if (crgetuid(cr) != bva.va_uid &&
4082 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4083 goto out;
4084
4085 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4086
4087 ava.va_mask = AT_ALL;
4088 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4089
4090 if (error)
4091 goto out;
4092
4093 resp->status = NFS3_OK;
4094 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4095 resp->resok.verf = write3verf;
4096
4097 DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4098 cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4099
4100 VN_RELE(vp);
4101
4102 return;
4103
4104 out:
4105 if (curthread->t_flag & T_WOULDBLOCK) {
4106 curthread->t_flag &= ~T_WOULDBLOCK;
4107 resp->status = NFS3ERR_JUKEBOX;
4108 } else
4109 resp->status = puterrno3(error);
4110 out1:
4111 DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4112 cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4113
4114 if (vp != NULL)
4115 VN_RELE(vp);
4116 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4117 }
4118
4119 void *
4120 rfs3_commit_getfh(COMMIT3args *args)
4121 {
4122
4123 return (&args->file);
4124 }
4125
4126 static int
4127 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4128 {
4129
4130 vap->va_mask = 0;
4131
4132 if (sap->mode.set_it) {
4133 vap->va_mode = (mode_t)sap->mode.mode;
4134 vap->va_mask |= AT_MODE;
4135 }
4136 if (sap->uid.set_it) {
4137 vap->va_uid = (uid_t)sap->uid.uid;
4138 vap->va_mask |= AT_UID;
4139 }
4140 if (sap->gid.set_it) {
4141 vap->va_gid = (gid_t)sap->gid.gid;
4142 vap->va_mask |= AT_GID;
4143 }
4144 if (sap->size.set_it) {
4145 if (sap->size.size > (size3)((u_longlong_t)-1))
4146 return (EINVAL);
4147 vap->va_size = sap->size.size;
4148 vap->va_mask |= AT_SIZE;
4149 }
4150 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4151 #ifndef _LP64
4152 /* check time validity */
4153 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4154 return (EOVERFLOW);
4155 #endif
4156 /*
4157 * nfs protocol defines times as unsigned so don't extend sign,
4158 * unless sysadmin set nfs_allow_preepoch_time.
4159 */
4160 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4161 sap->atime.atime.seconds);
4162 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4163 vap->va_mask |= AT_ATIME;
4164 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4165 gethrestime(&vap->va_atime);
4166 vap->va_mask |= AT_ATIME;
4167 }
4168 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4169 #ifndef _LP64
4170 /* check time validity */
4171 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4172 return (EOVERFLOW);
4173 #endif
4174 /*
4175 * nfs protocol defines times as unsigned so don't extend sign,
4176 * unless sysadmin set nfs_allow_preepoch_time.
4177 */
4178 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4179 sap->mtime.mtime.seconds);
4180 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4181 vap->va_mask |= AT_MTIME;
4182 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4183 gethrestime(&vap->va_mtime);
4184 vap->va_mask |= AT_MTIME;
4185 }
4186
4187 return (0);
4188 }
4189
4190 static ftype3 vt_to_nf3[] = {
4191 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4192 };
4193
4194 static int
4195 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4196 {
4197
4198 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4199 /* Return error if time or size overflow */
4200 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4201 return (EOVERFLOW);
4202 }
4203 fap->type = vt_to_nf3[vap->va_type];
4204 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4205 fap->nlink = (uint32)vap->va_nlink;
4206 if (vap->va_uid == UID_NOBODY)
4207 fap->uid = (uid3)NFS_UID_NOBODY;
4208 else
4209 fap->uid = (uid3)vap->va_uid;
4210 if (vap->va_gid == GID_NOBODY)
4211 fap->gid = (gid3)NFS_GID_NOBODY;
4212 else
4213 fap->gid = (gid3)vap->va_gid;
4214 fap->size = (size3)vap->va_size;
4215 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4216 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4217 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4218 fap->fsid = (uint64)vap->va_fsid;
4219 fap->fileid = (fileid3)vap->va_nodeid;
4220 fap->atime.seconds = vap->va_atime.tv_sec;
4221 fap->atime.nseconds = vap->va_atime.tv_nsec;
4222 fap->mtime.seconds = vap->va_mtime.tv_sec;
4223 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4224 fap->ctime.seconds = vap->va_ctime.tv_sec;
4225 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4226 return (0);
4227 }
4228
4229 static int
4230 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4231 {
4232
4233 /* Return error if time or size overflow */
4234 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4235 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4236 NFS3_SIZE_OK(vap->va_size))) {
4237 return (EOVERFLOW);
4238 }
4239 wccap->size = (size3)vap->va_size;
4240 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4241 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4242 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4243 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4244 return (0);
4245 }
4246
4247 static void
4248 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4249 {
4250
4251 /* don't return attrs if time overflow */
4252 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4253 poap->attributes = TRUE;
4254 } else
4255 poap->attributes = FALSE;
4256 }
4257
4258 void
4259 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4260 {
4261
4262 /* don't return attrs if time overflow */
4263 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4264 poap->attributes = TRUE;
4265 } else
4266 poap->attributes = FALSE;
4267 }
4268
4269 static void
4270 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4271 {
4272
4273 vattr_to_pre_op_attr(bvap, &wccp->before);
4274 vattr_to_post_op_attr(avap, &wccp->after);
4275 }
4276
4277 void
4278 rfs3_srvrinit(void)
4279 {
4280 struct rfs3_verf_overlay {
4281 uint_t id; /* a "unique" identifier */
4282 int ts; /* a unique timestamp */
4283 } *verfp;
4284 timestruc_t now;
4285
4286 /*
4287 * The following algorithm attempts to find a unique verifier
4288 * to be used as the write verifier returned from the server
4289 * to the client. It is important that this verifier change
4290 * whenever the server reboots. Of secondary importance, it
4291 * is important for the verifier to be unique between two
4292 * different servers.
4293 *
4294 * Thus, an attempt is made to use the system hostid and the
4295 * current time in seconds when the nfssrv kernel module is
4296 * loaded. It is assumed that an NFS server will not be able
4297 * to boot and then to reboot in less than a second. If the
4298 * hostid has not been set, then the current high resolution
4299 * time is used. This will ensure different verifiers each
4300 * time the server reboots and minimize the chances that two
4301 * different servers will have the same verifier.
4302 */
4303
4304 #ifndef lint
4305 /*
4306 * We ASSERT that this constant logic expression is
4307 * always true because in the past, it wasn't.
4308 */
4309 ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4310 #endif
4311
4312 gethrestime(&now);
4313 verfp = (struct rfs3_verf_overlay *)&write3verf;
4314 verfp->ts = (int)now.tv_sec;
4315 verfp->id = zone_get_hostid(NULL);
4316
4317 if (verfp->id == 0)
4318 verfp->id = (uint_t)now.tv_nsec;
4319
4320 nfs3_srv_caller_id = fs_new_caller_id();
4321
4322 }
4323
4324 static int
4325 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4326 {
4327 struct clist *wcl;
4328 int wlist_len;
4329 count3 count = rok->count;
4330
4331 wcl = args->wlist;
4332 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4333 return (FALSE);
4334 }
4335
4336 wcl = args->wlist;
4337 rok->wlist_len = wlist_len;
4338 rok->wlist = wcl;
4339 return (TRUE);
4340 }
4341
4342 void
4343 rfs3_srvrfini(void)
4344 {
4345 /* Nothing to do */
4346 }