1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
27 */
28
29 #include <sys/systm.h>
30 #include <rpc/auth.h>
31 #include <rpc/clnt.h>
32 #include <nfs/nfs4_kprot.h>
33 #include <nfs/nfs4.h>
34 #include <nfs/lm.h>
35 #include <sys/cmn_err.h>
36 #include <sys/disp.h>
37 #include <sys/sdt.h>
38
39 #include <sys/pathname.h>
40
41 #include <sys/strsubr.h>
42 #include <sys/ddi.h>
43
44 #include <sys/vnode.h>
45 #include <sys/sdt.h>
46 #include <inet/common.h>
47 #include <inet/ip.h>
48 #include <inet/ip6.h>
49
50 #define MAX_READ_DELEGATIONS 5
51
52 krwlock_t rfs4_deleg_policy_lock;
53 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE;
54 static int rfs4_deleg_wlp = 5;
55 kmutex_t rfs4_deleg_lock;
56 static int rfs4_deleg_disabled;
57 static int rfs4_max_setup_cb_tries = 5;
58
59 #ifdef DEBUG
60
61 static int rfs4_test_cbgetattr_fail = 0;
62 int rfs4_cb_null;
63 int rfs4_cb_debug;
64 int rfs4_deleg_debug;
65
66 #endif
67
68 static void rfs4_recall_file(rfs4_file_t *,
69 void (*recall)(rfs4_deleg_state_t *, bool_t),
70 bool_t, rfs4_client_t *);
71 static void rfs4_revoke_file(rfs4_file_t *);
72 static void rfs4_cb_chflush(rfs4_cbinfo_t *);
73 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *);
74 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
75 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
76 open_delegation_type4, int *);
77
78 /*
79 * Convert a universal address to an transport specific
80 * address using inet_pton.
81 */
82 static int
83 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
84 {
85 int dots = 0, i, j, len, k;
86 unsigned char c;
87 in_port_t port = 0;
88
89 len = strlen(ua);
90
91 for (i = len-1; i >= 0; i--) {
92
93 if (ua[i] == '.')
94 dots++;
95
96 if (dots == 2) {
97
98 ua[i] = '\0';
99 /*
100 * We use k to remember were to stick '.' back, since
101 * ua was kmem_allocateded from the pool len+1.
102 */
103 k = i;
104 if (inet_pton(af, ua, ap) == 1) {
105
106 c = 0;
107
108 for (j = i+1; j < len; j++) {
109 if (ua[j] == '.') {
110 port = c << 8;
111 c = 0;
112 } else if (ua[j] >= '0' &&
113 ua[j] <= '9') {
114 c *= 10;
115 c += ua[j] - '0';
116 } else {
117 ua[k] = '.';
118 return (EINVAL);
119 }
120 }
121 port += c;
122
123 *pp = htons(port);
124
125 ua[k] = '.';
126 return (0);
127 } else {
128 ua[k] = '.';
129 return (EINVAL);
130 }
131 }
132 }
133
134 return (EINVAL);
135 }
136
137 /*
138 * Update the delegation policy with the
139 * value of "new_policy"
140 */
141 void
142 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy)
143 {
144 rw_enter(&rfs4_deleg_policy_lock, RW_WRITER);
145 rfs4_deleg_policy = new_policy;
146 rw_exit(&rfs4_deleg_policy_lock);
147 }
148
149 void
150 rfs4_hold_deleg_policy(void)
151 {
152 rw_enter(&rfs4_deleg_policy_lock, RW_READER);
153 }
154
155 void
156 rfs4_rele_deleg_policy(void)
157 {
158 rw_exit(&rfs4_deleg_policy_lock);
159 }
160
161
162 /*
163 * This free function is to be used when the client struct is being
164 * released and nothing at all is needed of the callback info any
165 * longer.
166 */
167 void
168 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
169 {
170 char *addr = cbp->cb_callback.cb_location.r_addr;
171 char *netid = cbp->cb_callback.cb_location.r_netid;
172
173 /* Free old address if any */
174
175 if (addr)
176 kmem_free(addr, strlen(addr) + 1);
177 if (netid)
178 kmem_free(netid, strlen(netid) + 1);
179
180 addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
181 netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
182
183 if (addr)
184 kmem_free(addr, strlen(addr) + 1);
185 if (netid)
186 kmem_free(netid, strlen(netid) + 1);
187
188 if (cbp->cb_chc_free) {
189 rfs4_cb_chflush(cbp);
190 }
191 }
192
193 /*
194 * The server uses this to check the callback path supplied by the
195 * client. The callback connection is marked "in progress" while this
196 * work is going on and then eventually marked either OK or FAILED.
197 * This work can be done as part of a separate thread and at the end
198 * of this the thread will exit or it may be done such that the caller
199 * will continue with other work.
200 */
201 static void
202 rfs4_do_cb_null(rfs4_client_t *cp)
203 {
204 struct timeval tv;
205 CLIENT *ch;
206 rfs4_cbstate_t newstate;
207 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
208
209 mutex_enter(cbp->cb_lock);
210 /* If another thread is doing CB_NULL RPC then return */
211 if (cbp->cb_nullcaller == TRUE) {
212 mutex_exit(cbp->cb_lock);
213 rfs4_client_rele(cp);
214 return;
215 }
216
217 /* Mark the cbinfo as having a thread in the NULL callback */
218 cbp->cb_nullcaller = TRUE;
219
220 /*
221 * Are there other threads still using the cbinfo client
222 * handles? If so, this thread must wait before going and
223 * mucking aroiund with the callback information
224 */
225 while (cbp->cb_refcnt != 0)
226 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
227
228 /*
229 * This thread itself may find that new callback info has
230 * arrived and is set up to handle this case and redrive the
231 * call to the client's callback server.
232 */
233 retry:
234 if (cbp->cb_newer.cb_new == TRUE &&
235 cbp->cb_newer.cb_confirmed == TRUE) {
236 char *addr = cbp->cb_callback.cb_location.r_addr;
237 char *netid = cbp->cb_callback.cb_location.r_netid;
238
239 /*
240 * Free the old stuff if it exists; may be the first
241 * time through this path
242 */
243 if (addr)
244 kmem_free(addr, strlen(addr) + 1);
245 if (netid)
246 kmem_free(netid, strlen(netid) + 1);
247
248 /* Move over the addr/netid */
249 cbp->cb_callback.cb_location.r_addr =
250 cbp->cb_newer.cb_callback.cb_location.r_addr;
251 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
252 cbp->cb_callback.cb_location.r_netid =
253 cbp->cb_newer.cb_callback.cb_location.r_netid;
254 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
255
256 /* Get the program number */
257 cbp->cb_callback.cb_program =
258 cbp->cb_newer.cb_callback.cb_program;
259 cbp->cb_newer.cb_callback.cb_program = 0;
260
261 /* Don't forget the protocol's "cb_ident" field */
262 cbp->cb_ident = cbp->cb_newer.cb_ident;
263 cbp->cb_newer.cb_ident = 0;
264
265 /* no longer new */
266 cbp->cb_newer.cb_new = FALSE;
267 cbp->cb_newer.cb_confirmed = FALSE;
268
269 /* get rid of the old client handles that may exist */
270 rfs4_cb_chflush(cbp);
271
272 cbp->cb_state = CB_NONE;
273 cbp->cb_timefailed = 0; /* reset the clock */
274 cbp->cb_notified_of_cb_path_down = TRUE;
275 }
276
277 if (cbp->cb_state != CB_NONE) {
278 cv_broadcast(cbp->cb_cv); /* let the others know */
279 cbp->cb_nullcaller = FALSE;
280 mutex_exit(cbp->cb_lock);
281 rfs4_client_rele(cp);
282 return;
283 }
284
285 /* mark rfs4_client_t as CALLBACK NULL in progress */
286 cbp->cb_state = CB_INPROG;
287 mutex_exit(cbp->cb_lock);
288
289 /* get/generate a client handle */
290 if ((ch = rfs4_cb_getch(cbp)) == NULL) {
291 mutex_enter(cbp->cb_lock);
292 cbp->cb_state = CB_BAD;
293 cbp->cb_timefailed = gethrestime_sec(); /* observability */
294 goto retry;
295 }
296
297
298 tv.tv_sec = 30;
299 tv.tv_usec = 0;
300 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
301 newstate = CB_BAD;
302 } else {
303 newstate = CB_OK;
304 #ifdef DEBUG
305 rfs4_cb_null++;
306 #endif
307 }
308
309 /* Check to see if the client has specified new callback info */
310 mutex_enter(cbp->cb_lock);
311 rfs4_cb_freech(cbp, ch, TRUE);
312 if (cbp->cb_newer.cb_new == TRUE &&
313 cbp->cb_newer.cb_confirmed == TRUE) {
314 goto retry; /* give the CB_NULL another chance */
315 }
316
317 cbp->cb_state = newstate;
318 if (cbp->cb_state == CB_BAD)
319 cbp->cb_timefailed = gethrestime_sec(); /* observability */
320
321 cv_broadcast(cbp->cb_cv); /* start up the other threads */
322 cbp->cb_nullcaller = FALSE;
323 mutex_exit(cbp->cb_lock);
324
325 rfs4_client_rele(cp);
326 }
327
328 /*
329 * Given a client struct, inspect the callback info to see if the
330 * callback path is up and available.
331 *
332 * If new callback path is available and no one has set it up then
333 * try to set it up. If setup is not successful after 5 tries (5 secs)
334 * then gives up and returns NULL.
335 *
336 * If callback path is being initialized, then wait for the CB_NULL RPC
337 * call to occur.
338 */
339 static rfs4_cbinfo_t *
340 rfs4_cbinfo_hold(rfs4_client_t *cp)
341 {
342 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
343 int retries = 0;
344
345 mutex_enter(cbp->cb_lock);
346
347 while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
348 /*
349 * Looks like a new callback path may be available and
350 * noone has set it up.
351 */
352 mutex_exit(cbp->cb_lock);
353 rfs4_dbe_hold(cp->rc_dbe);
354 rfs4_do_cb_null(cp); /* caller will release client hold */
355
356 mutex_enter(cbp->cb_lock);
357 /*
358 * If callback path is no longer new, or it's being setup
359 * then stop and wait for it to be done.
360 */
361 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
362 break;
363 mutex_exit(cbp->cb_lock);
364
365 if (++retries >= rfs4_max_setup_cb_tries)
366 return (NULL);
367 delay(hz);
368 mutex_enter(cbp->cb_lock);
369 }
370
371 /* Is there a thread working on doing the CB_NULL RPC? */
372 if (cbp->cb_nullcaller == TRUE)
373 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */
374
375 /* If the callback path is not okay (up and running), just quit */
376 if (cbp->cb_state != CB_OK) {
377 mutex_exit(cbp->cb_lock);
378 return (NULL);
379 }
380
381 /* Let someone know we are using the current callback info */
382 cbp->cb_refcnt++;
383 mutex_exit(cbp->cb_lock);
384 return (cbp);
385 }
386
387 /*
388 * The caller is done with the callback info. It may be that the
389 * caller's RPC failed and the NFSv4 client has actually provided new
390 * callback information. If so, let the caller know so they can
391 * advantage of this and maybe retry the RPC that originally failed.
392 */
393 static int
394 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
395 {
396 int cb_new = FALSE;
397
398 mutex_enter(cbp->cb_lock);
399
400 /* The caller gets a chance to mark the callback info as bad */
401 if (newstate != CB_NOCHANGE)
402 cbp->cb_state = newstate;
403 if (newstate == CB_FAILED) {
404 cbp->cb_timefailed = gethrestime_sec(); /* observability */
405 cbp->cb_notified_of_cb_path_down = FALSE;
406 }
407
408 cbp->cb_refcnt--; /* no longer using the information */
409
410 /*
411 * A thread may be waiting on this one to finish and if so,
412 * let it know that it is okay to do the CB_NULL to the
413 * client's callback server.
414 */
415 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
416 cv_broadcast(cbp->cb_cv_nullcaller);
417
418 /*
419 * If this is the last thread to use the callback info and
420 * there is new callback information to try and no thread is
421 * there ready to do the CB_NULL, then return true to teh
422 * caller so they can do the CB_NULL
423 */
424 if (cbp->cb_refcnt == 0 &&
425 cbp->cb_nullcaller == FALSE &&
426 cbp->cb_newer.cb_new == TRUE &&
427 cbp->cb_newer.cb_confirmed == TRUE)
428 cb_new = TRUE;
429
430 mutex_exit(cbp->cb_lock);
431
432 return (cb_new);
433 }
434
435 /*
436 * Given the information in the callback info struct, create a client
437 * handle that can be used by the server for its callback path.
438 */
439 static CLIENT *
440 rfs4_cbch_init(rfs4_cbinfo_t *cbp)
441 {
442 struct knetconfig knc;
443 vnode_t *vp;
444 struct sockaddr_in addr4;
445 struct sockaddr_in6 addr6;
446 void *addr, *taddr;
447 in_port_t *pp;
448 int af;
449 char *devnam;
450 struct netbuf nb;
451 int size;
452 CLIENT *ch = NULL;
453 int useresvport = 0;
454
455 mutex_enter(cbp->cb_lock);
456
457 if (cbp->cb_callback.cb_location.r_netid == NULL ||
458 cbp->cb_callback.cb_location.r_addr == NULL) {
459 goto cb_init_out;
460 }
461
462 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
463 knc.knc_semantics = NC_TPI_COTS;
464 knc.knc_protofmly = "inet";
465 knc.knc_proto = "tcp";
466 devnam = "/dev/tcp";
467 af = AF_INET;
468 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
469 == 0) {
470 knc.knc_semantics = NC_TPI_CLTS;
471 knc.knc_protofmly = "inet";
472 knc.knc_proto = "udp";
473 devnam = "/dev/udp";
474 af = AF_INET;
475 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
476 == 0) {
477 knc.knc_semantics = NC_TPI_COTS;
478 knc.knc_protofmly = "inet6";
479 knc.knc_proto = "tcp";
480 devnam = "/dev/tcp6";
481 af = AF_INET6;
482 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
483 == 0) {
484 knc.knc_semantics = NC_TPI_CLTS;
485 knc.knc_protofmly = "inet6";
486 knc.knc_proto = "udp";
487 devnam = "/dev/udp6";
488 af = AF_INET6;
489 } else {
490 goto cb_init_out;
491 }
492
493 if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
494
495 goto cb_init_out;
496 }
497
498 if (vp->v_type != VCHR) {
499 VN_RELE(vp);
500 goto cb_init_out;
501 }
502
503 knc.knc_rdev = vp->v_rdev;
504
505 VN_RELE(vp);
506
507 if (af == AF_INET) {
508 size = sizeof (addr4);
509 bzero(&addr4, size);
510 addr4.sin_family = (sa_family_t)af;
511 addr = &addr4.sin_addr;
512 pp = &addr4.sin_port;
513 taddr = &addr4;
514 } else /* AF_INET6 */ {
515 size = sizeof (addr6);
516 bzero(&addr6, size);
517 addr6.sin6_family = (sa_family_t)af;
518 addr = &addr6.sin6_addr;
519 pp = &addr6.sin6_port;
520 taddr = &addr6;
521 }
522
523 if (uaddr2sockaddr(af,
524 cbp->cb_callback.cb_location.r_addr, addr, pp)) {
525
526 goto cb_init_out;
527 }
528
529
530 nb.maxlen = nb.len = size;
531 nb.buf = (char *)taddr;
532
533 if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
534 NFS_CB, 0, 0, curthread->t_cred, &ch)) {
535
536 ch = NULL;
537 }
538
539 /* turn off reserved port usage */
540 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
541
542 cb_init_out:
543 mutex_exit(cbp->cb_lock);
544 return (ch);
545 }
546
547 /*
548 * Iterate over the client handle cache and
549 * destroy it.
550 */
551 static void
552 rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
553 {
554 CLIENT *ch;
555
556 while (cbp->cb_chc_free) {
557 cbp->cb_chc_free--;
558 ch = cbp->cb_chc[cbp->cb_chc_free];
559 cbp->cb_chc[cbp->cb_chc_free] = NULL;
560 if (ch) {
561 if (ch->cl_auth)
562 auth_destroy(ch->cl_auth);
563 clnt_destroy(ch);
564 }
565 }
566 }
567
568 /*
569 * Return a client handle, either from a the small
570 * rfs4_client_t cache or one that we just created.
571 */
572 static CLIENT *
573 rfs4_cb_getch(rfs4_cbinfo_t *cbp)
574 {
575 CLIENT *cbch = NULL;
576 uint32_t zilch = 0;
577
578 mutex_enter(cbp->cb_lock);
579
580 if (cbp->cb_chc_free) {
581 cbp->cb_chc_free--;
582 cbch = cbp->cb_chc[ cbp->cb_chc_free ];
583 mutex_exit(cbp->cb_lock);
584 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
585 return (cbch);
586 }
587
588 mutex_exit(cbp->cb_lock);
589
590 /* none free so make it now */
591 cbch = rfs4_cbch_init(cbp);
592
593 return (cbch);
594 }
595
596 /*
597 * Return the client handle to the small cache or
598 * destroy it.
599 */
600 static void
601 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
602 {
603 if (lockheld == FALSE)
604 mutex_enter(cbp->cb_lock);
605
606 if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
607 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
608 if (lockheld == FALSE)
609 mutex_exit(cbp->cb_lock);
610 return;
611 }
612 if (lockheld == FALSE)
613 mutex_exit(cbp->cb_lock);
614
615 /*
616 * cache maxed out of free entries, obliterate
617 * this client handle, destroy it, throw it away.
618 */
619 if (ch->cl_auth)
620 auth_destroy(ch->cl_auth);
621 clnt_destroy(ch);
622 }
623
624 /*
625 * With the supplied callback information - initialize the client
626 * callback data. If there is a callback in progress, save the
627 * callback info so that a thread can pick it up in the future.
628 */
629 void
630 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
631 {
632 char *addr = NULL;
633 char *netid = NULL;
634 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
635 size_t len;
636
637 /* Set the call back for the client */
638 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
639 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
640 len = strlen(cb->cb_location.r_addr) + 1;
641 addr = kmem_alloc(len, KM_SLEEP);
642 bcopy(cb->cb_location.r_addr, addr, len);
643 len = strlen(cb->cb_location.r_netid) + 1;
644 netid = kmem_alloc(len, KM_SLEEP);
645 bcopy(cb->cb_location.r_netid, netid, len);
646 }
647 /* ready to save the new information but first free old, if exists */
648 mutex_enter(cbp->cb_lock);
649
650 cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
651
652 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
653 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
654 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
655 cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
656
657 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
658 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
659 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
660 cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
661
662 cbp->cb_newer.cb_ident = cb_ident;
663
664 if (addr && *addr && netid && *netid) {
665 cbp->cb_newer.cb_new = TRUE;
666 cbp->cb_newer.cb_confirmed = FALSE;
667 } else {
668 cbp->cb_newer.cb_new = FALSE;
669 cbp->cb_newer.cb_confirmed = FALSE;
670 }
671
672 mutex_exit(cbp->cb_lock);
673 }
674
675 /*
676 * The server uses this when processing SETCLIENTID_CONFIRM. Callback
677 * information may have been provided on SETCLIENTID and this call
678 * marks that information as confirmed and then starts a thread to
679 * test the callback path.
680 */
681 void
682 rfs4_deleg_cb_check(rfs4_client_t *cp)
683 {
684 if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
685 return;
686
687 cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
688
689 rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
690
691 (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN,
692 minclsyspri);
693 }
694
695 static void
696 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
697 {
698 CB_RECALL4args *rec_argp;
699
700 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
701 if (rec_argp->fh.nfs_fh4_val)
702 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
703 }
704
705 /* ARGSUSED */
706 static void
707 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
708 {
709 CB_GETATTR4args *argp;
710
711 argp = &argop->nfs_cb_argop4_u.opcbgetattr;
712 if (argp->fh.nfs_fh4_val)
713 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
714 }
715
716 static void
717 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
718 {
719 int i, arglen;
720 nfs_cb_argop4 *argop;
721
722 /*
723 * First free any special args alloc'd for specific ops.
724 */
725 arglen = args->array_len;
726 argop = args->array;
727 for (i = 0; i < arglen; i++, argop++) {
728
729 switch (argop->argop) {
730 case OP_CB_RECALL:
731 rfs4args_cb_recall_free(argop);
732 break;
733
734 case OP_CB_GETATTR:
735 rfs4args_cb_getattr_free(argop);
736 break;
737
738 default:
739 return;
740 }
741 }
742
743 if (args->tag.utf8string_len > 0)
744 UTF8STRING_FREE(args->tag)
745
746 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
747 if (resp)
748 (void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
749 }
750
751 /*
752 * General callback routine for the server to the client.
753 */
754 static enum clnt_stat
755 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
756 CB_COMPOUND4res *res, struct timeval timeout)
757 {
758 rfs4_cbinfo_t *cbp;
759 CLIENT *ch;
760 /* start with this in case cb_getch() fails */
761 enum clnt_stat stat = RPC_FAILED;
762
763 res->tag.utf8string_val = NULL;
764 res->array = NULL;
765
766 retry:
767 cbp = rfs4_cbinfo_hold(cp);
768 if (cbp == NULL)
769 return (stat);
770
771 /* get a client handle */
772 if ((ch = rfs4_cb_getch(cbp)) != NULL) {
773 /*
774 * reset the cb_ident since it may have changed in
775 * rfs4_cbinfo_hold()
776 */
777 args->callback_ident = cbp->cb_ident;
778
779 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
780 (caddr_t)args, xdr_CB_COMPOUND4res,
781 (caddr_t)res, timeout);
782
783 /* free client handle */
784 rfs4_cb_freech(cbp, ch, FALSE);
785 }
786
787 /*
788 * If the rele says that there may be new callback info then
789 * retry this sequence and it may succeed as a result of the
790 * new callback path
791 */
792 if (rfs4_cbinfo_rele(cbp,
793 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
794 goto retry;
795
796 return (stat);
797 }
798
799 /*
800 * Used by the NFSv4 server to get attributes for a file while
801 * handling the case where a file has been write delegated. For the
802 * time being, VOP_GETATTR() is called and CB_GETATTR processing is
803 * not undertaken. This call site is maintained in case the server is
804 * updated in the future to handle write delegation space guarantees.
805 */
806 nfsstat4
807 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
808 {
809
810 int error;
811
812 error = VOP_GETATTR(vp, vap, flag, cr, NULL);
813 return (puterrno4(error));
814 }
815
816 /*
817 * This is used everywhere in the v2/v3 server to allow the
818 * integration of all NFS versions and the support of delegation. For
819 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced
820 * in the future to provide space guarantees for write delegations
821 * then this call site should be expanded to interact with the client.
822 */
823 int
824 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
825 {
826 return (VOP_GETATTR(vp, vap, flag, cr, NULL));
827 }
828
829 /*
830 * Place the actual cb_recall otw call to client.
831 */
832 static void
833 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
834 {
835 CB_COMPOUND4args cb4_args;
836 CB_COMPOUND4res cb4_res;
837 CB_RECALL4args *rec_argp;
838 CB_RECALL4res *rec_resp;
839 nfs_cb_argop4 *argop;
840 int numops;
841 int argoplist_size;
842 struct timeval timeout;
843 nfs_fh4 *fhp;
844 enum clnt_stat call_stat;
845
846 /*
847 * set up the compound args
848 */
849 numops = 1; /* CB_RECALL only */
850
851 argoplist_size = numops * sizeof (nfs_cb_argop4);
852 argop = kmem_zalloc(argoplist_size, KM_SLEEP);
853 argop->argop = OP_CB_RECALL;
854 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
855
856 (void) str_to_utf8("cb_recall", &cb4_args.tag);
857 cb4_args.minorversion = CB4_MINORVERSION;
858 /* cb4_args.callback_ident is set in rfs4_do_callback() */
859 cb4_args.array_len = numops;
860 cb4_args.array = argop;
861
862 /*
863 * fill in the args struct
864 */
865 bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
866 rec_argp->truncate = trunc;
867
868 fhp = &dsp->rds_finfo->rf_filehandle;
869 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
870 fhp->nfs_fh4_len, KM_SLEEP);
871 nfs_fh4_copy(fhp, &rec_argp->fh);
872
873 /* Keep track of when we did this for observability */
874 dsp->rds_time_recalled = gethrestime_sec();
875
876 /*
877 * Set up the timeout for the callback and make the actual call.
878 * Timeout will be 80% of the lease period for this server.
879 */
880 timeout.tv_sec = (rfs4_lease_time * 80) / 100;
881 timeout.tv_usec = 0;
882
883 DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
884 rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
885
886 call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
887 timeout);
888
889 rec_resp = (cb4_res.array_len == 0) ? NULL :
890 &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
891
892 DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
893 rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
894
895 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
896 rfs4_return_deleg(dsp, TRUE);
897 }
898
899 rfs4freeargres(&cb4_args, &cb4_res);
900 }
901
902 struct recall_arg {
903 rfs4_deleg_state_t *dsp;
904 void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
905 bool_t trunc;
906 };
907
908 static void
909 do_recall(struct recall_arg *arg)
910 {
911 rfs4_deleg_state_t *dsp = arg->dsp;
912 rfs4_file_t *fp = dsp->rds_finfo;
913 callb_cpr_t cpr_info;
914 kmutex_t cpr_lock;
915
916 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
917 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
918
919 /*
920 * It is possible that before this thread starts
921 * the client has send us a return_delegation, and
922 * if that is the case we do not need to send the
923 * recall callback.
924 */
925 if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
926 DTRACE_PROBE3(nfss__i__recall,
927 struct recall_arg *, arg,
928 struct rfs4_deleg_state_t *, dsp,
929 struct rfs4_file_t *, fp);
930
931 if (arg->recall)
932 (void) (*arg->recall)(dsp, arg->trunc);
933 }
934
935 mutex_enter(fp->rf_dinfo.rd_recall_lock);
936 /*
937 * Recall count may go negative if the parent thread that is
938 * creating the individual callback threads does not modify
939 * the recall_count field before the callback thread actually
940 * gets a response from the CB_RECALL
941 */
942 fp->rf_dinfo.rd_recall_count--;
943 if (fp->rf_dinfo.rd_recall_count == 0)
944 cv_signal(fp->rf_dinfo.rd_recall_cv);
945 mutex_exit(fp->rf_dinfo.rd_recall_lock);
946
947 mutex_enter(&cpr_lock);
948 CALLB_CPR_EXIT(&cpr_info);
949 mutex_destroy(&cpr_lock);
950
951 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
952
953 kmem_free(arg, sizeof (struct recall_arg));
954 }
955
956 struct master_recall_args {
957 rfs4_file_t *fp;
958 void (*recall)(rfs4_deleg_state_t *, bool_t);
959 bool_t trunc;
960 };
961
962 static void
963 do_recall_file(struct master_recall_args *map)
964 {
965 rfs4_file_t *fp = map->fp;
966 rfs4_deleg_state_t *dsp;
967 struct recall_arg *arg;
968 callb_cpr_t cpr_info;
969 kmutex_t cpr_lock;
970 int32_t recall_count;
971
972 rfs4_dbe_lock(fp->rf_dbe);
973
974 /* Recall already in progress ? */
975 mutex_enter(fp->rf_dinfo.rd_recall_lock);
976 if (fp->rf_dinfo.rd_recall_count != 0) {
977 mutex_exit(fp->rf_dinfo.rd_recall_lock);
978 rfs4_dbe_rele_nolock(fp->rf_dbe);
979 rfs4_dbe_unlock(fp->rf_dbe);
980 kmem_free(map, sizeof (struct master_recall_args));
981 return;
982 }
983
984 mutex_exit(fp->rf_dinfo.rd_recall_lock);
985
986 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
987 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile");
988
989 recall_count = 0;
990 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
991 dsp = list_next(&fp->rf_delegstatelist, dsp)) {
992
993 rfs4_dbe_lock(dsp->rds_dbe);
994 /*
995 * if this delegation state
996 * is being reaped skip it
997 */
998 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
999 rfs4_dbe_unlock(dsp->rds_dbe);
1000 continue;
1001 }
1002
1003 /* hold for receiving thread */
1004 rfs4_dbe_hold(dsp->rds_dbe);
1005 rfs4_dbe_unlock(dsp->rds_dbe);
1006
1007 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1008 arg->recall = map->recall;
1009 arg->trunc = map->trunc;
1010 arg->dsp = dsp;
1011
1012 recall_count++;
1013
1014 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN,
1015 minclsyspri);
1016 }
1017
1018 rfs4_dbe_unlock(fp->rf_dbe);
1019
1020 mutex_enter(fp->rf_dinfo.rd_recall_lock);
1021 /*
1022 * Recall count may go negative if the parent thread that is
1023 * creating the individual callback threads does not modify
1024 * the recall_count field before the callback thread actually
1025 * gets a response from the CB_RECALL
1026 */
1027 fp->rf_dinfo.rd_recall_count += recall_count;
1028 while (fp->rf_dinfo.rd_recall_count)
1029 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1030
1031 mutex_exit(fp->rf_dinfo.rd_recall_lock);
1032
1033 DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1034 rfs4_file_rele(fp);
1035 kmem_free(map, sizeof (struct master_recall_args));
1036 mutex_enter(&cpr_lock);
1037 CALLB_CPR_EXIT(&cpr_info);
1038 mutex_destroy(&cpr_lock);
1039 }
1040
1041 static void
1042 rfs4_recall_file(rfs4_file_t *fp,
1043 void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1044 bool_t trunc, rfs4_client_t *cp)
1045 {
1046 struct master_recall_args *args;
1047
1048 rfs4_dbe_lock(fp->rf_dbe);
1049 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1050 rfs4_dbe_unlock(fp->rf_dbe);
1051 return;
1052 }
1053 rfs4_dbe_hold(fp->rf_dbe); /* hold for new thread */
1054
1055 /*
1056 * Mark the time we started the recall processing.
1057 * If it has been previously recalled, do not reset the
1058 * timer since this is used for the revocation decision.
1059 */
1060 if (fp->rf_dinfo.rd_time_recalled == 0)
1061 fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1062 fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1063 /* Client causing recall not always available */
1064 if (cp)
1065 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1066
1067 rfs4_dbe_unlock(fp->rf_dbe);
1068
1069 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1070 args->fp = fp;
1071 args->recall = recall;
1072 args->trunc = trunc;
1073
1074 (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN,
1075 minclsyspri);
1076 }
1077
1078 void
1079 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1080 {
1081 time_t elapsed1, elapsed2;
1082
1083 if (fp->rf_dinfo.rd_time_recalled != 0) {
1084 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1085 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1086 /* First check to see if a revocation should occur */
1087 if (elapsed1 > rfs4_lease_time &&
1088 elapsed2 > rfs4_lease_time) {
1089 rfs4_revoke_file(fp);
1090 return;
1091 }
1092 /*
1093 * Next check to see if a recall should be done again
1094 * so quickly.
1095 */
1096 if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
1097 return;
1098 }
1099 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
1100 }
1101
1102 /*
1103 * rfs4_check_recall is called from rfs4_do_open to determine if the current
1104 * open conflicts with the delegation.
1105 * Return true if we need recall otherwise false.
1106 * Assumes entry locks for sp and sp->rs_finfo are held.
1107 */
1108 bool_t
1109 rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
1110 {
1111 open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
1112
1113 switch (dtype) {
1114 case OPEN_DELEGATE_NONE:
1115 /* Not currently delegated so there is nothing to do */
1116 return (FALSE);
1117 case OPEN_DELEGATE_READ:
1118 /*
1119 * If the access is only asking for READ then there is
1120 * no conflict and nothing to do. If it is asking
1121 * for write, then there will be conflict and the read
1122 * delegation should be recalled.
1123 */
1124 if (access == OPEN4_SHARE_ACCESS_READ)
1125 return (FALSE);
1126 else
1127 return (TRUE);
1128 case OPEN_DELEGATE_WRITE:
1129 /* Check to see if this client has the delegation */
1130 return (rfs4_is_deleg(sp));
1131 }
1132
1133 return (FALSE);
1134 }
1135
1136 /*
1137 * Return the "best" allowable delegation available given the current
1138 * delegation type and the desired access and deny modes on the file.
1139 * At the point that this routine is called we know that the access and
1140 * deny modes are consistent with the file modes.
1141 */
1142 static open_delegation_type4
1143 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
1144 {
1145 open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
1146 uint32_t access = sp->rs_share_access;
1147 uint32_t deny = sp->rs_share_deny;
1148 int readcnt = 0;
1149 int writecnt = 0;
1150
1151 switch (dtype) {
1152 case OPEN_DELEGATE_NONE:
1153 /*
1154 * Determine if more than just this OPEN have the file
1155 * open and if so, no delegation may be provided to
1156 * the client.
1157 */
1158 if (access & OPEN4_SHARE_ACCESS_WRITE)
1159 writecnt++;
1160 if (access & OPEN4_SHARE_ACCESS_READ)
1161 readcnt++;
1162
1163 if (fp->rf_access_read > readcnt ||
1164 fp->rf_access_write > writecnt)
1165 return (OPEN_DELEGATE_NONE);
1166
1167 /*
1168 * If the client is going to write, or if the client
1169 * has exclusive access, return a write delegation.
1170 */
1171 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1172 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
1173 return (OPEN_DELEGATE_WRITE);
1174 /*
1175 * If we don't want to write or we've haven't denied read
1176 * access to others, return a read delegation.
1177 */
1178 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
1179 (deny & ~OPEN4_SHARE_DENY_READ))
1180 return (OPEN_DELEGATE_READ);
1181
1182 /* Shouldn't get here */
1183 return (OPEN_DELEGATE_NONE);
1184
1185 case OPEN_DELEGATE_READ:
1186 /*
1187 * If the file is delegated for read but we wan't to
1188 * write or deny others to read then we can't delegate
1189 * the file. We shouldn't get here since the delegation should
1190 * have been recalled already.
1191 */
1192 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1193 (deny & OPEN4_SHARE_DENY_READ))
1194 return (OPEN_DELEGATE_NONE);
1195 return (OPEN_DELEGATE_READ);
1196
1197 case OPEN_DELEGATE_WRITE:
1198 return (OPEN_DELEGATE_WRITE);
1199 }
1200
1201 /* Shouldn't get here */
1202 return (OPEN_DELEGATE_NONE);
1203 }
1204
1205 /*
1206 * Given the desired delegation type and the "history" of the file
1207 * determine the actual delegation type to return.
1208 */
1209 static open_delegation_type4
1210 rfs4_delegation_policy(open_delegation_type4 dtype,
1211 rfs4_dinfo_t *dinfo, clientid4 cid)
1212 {
1213 time_t elapsed;
1214
1215 if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1216 return (OPEN_DELEGATE_NONE);
1217
1218 /*
1219 * Has this file/delegation ever been recalled? If not then
1220 * no further checks for a delegation race need to be done.
1221 * However if a recall has occurred, then check to see if a
1222 * client has caused its own delegation recall to occur. If
1223 * not, then has a delegation for this file been returned
1224 * recently? If so, then do not assign a new delegation to
1225 * avoid a "delegation race" between the original client and
1226 * the new/conflicting client.
1227 */
1228 if (dinfo->rd_ever_recalled == TRUE) {
1229 if (dinfo->rd_conflicted_client != cid) {
1230 elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1231 if (elapsed < rfs4_lease_time)
1232 return (OPEN_DELEGATE_NONE);
1233 }
1234 }
1235
1236 /* Limit the number of read grants */
1237 if (dtype == OPEN_DELEGATE_READ &&
1238 dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1239 return (OPEN_DELEGATE_NONE);
1240
1241 /*
1242 * Should consider limiting total number of read/write
1243 * delegations the server will permit.
1244 */
1245
1246 return (dtype);
1247 }
1248
1249 /*
1250 * Try and grant a delegation for an open give the state. The routine
1251 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1252 *
1253 * The state and associate file entry must be locked
1254 */
1255 rfs4_deleg_state_t *
1256 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1257 {
1258 rfs4_file_t *fp = sp->rs_finfo;
1259 open_delegation_type4 dtype;
1260 int no_delegation;
1261
1262 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1263 ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1264
1265 /* Is the server even providing delegations? */
1266 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)
1267 return (NULL);
1268
1269 /* Check to see if delegations have been temporarily disabled */
1270 mutex_enter(&rfs4_deleg_lock);
1271 no_delegation = rfs4_deleg_disabled;
1272 mutex_exit(&rfs4_deleg_lock);
1273
1274 if (no_delegation)
1275 return (NULL);
1276
1277 /* Don't grant a delegation if a deletion is impending. */
1278 if (fp->rf_dinfo.rd_hold_grant > 0) {
1279 return (NULL);
1280 }
1281
1282 /*
1283 * Don't grant a delegation if there are any lock manager
1284 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g.,
1285 * if there are only read locks we should be able to grant a
1286 * read-only delegation), but it's good enough for now.
1287 *
1288 * MT safety: the lock manager checks for conflicting delegations
1289 * before processing a lock request. That check will block until
1290 * we are done here. So if the lock manager acquires a lock after
1291 * we decide to grant the delegation, the delegation will get
1292 * immediately recalled (if there's a conflict), so we're safe.
1293 */
1294 if (lm_vp_active(fp->rf_vp)) {
1295 return (NULL);
1296 }
1297
1298 /*
1299 * Based on the type of delegation request passed in, take the
1300 * appropriate action (DELEG_NONE is handled above)
1301 */
1302 switch (dreq) {
1303
1304 case DELEG_READ:
1305 case DELEG_WRITE:
1306 /*
1307 * The server "must" grant the delegation in this case.
1308 * Client is using open previous
1309 */
1310 dtype = (open_delegation_type4)dreq;
1311 *recall = 1;
1312 break;
1313 case DELEG_ANY:
1314 /*
1315 * If a valid callback path does not exist, no delegation may
1316 * be granted.
1317 */
1318 if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
1319 return (NULL);
1320
1321 /*
1322 * If the original operation which caused time_rm_delayed
1323 * to be set hasn't been retried and completed for one
1324 * full lease period, clear it and allow delegations to
1325 * get granted again.
1326 */
1327 if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
1328 gethrestime_sec() >
1329 fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
1330 fp->rf_dinfo.rd_time_rm_delayed = 0;
1331
1332 /*
1333 * If we are waiting for a delegation to be returned then
1334 * don't delegate this file. We do this for correctness as
1335 * well as if the file is being recalled we would likely
1336 * recall this file again.
1337 */
1338
1339 if (fp->rf_dinfo.rd_time_recalled != 0 ||
1340 fp->rf_dinfo.rd_time_rm_delayed != 0)
1341 return (NULL);
1342
1343 /* Get the "best" delegation candidate */
1344 dtype = rfs4_check_delegation(sp, fp);
1345
1346 if (dtype == OPEN_DELEGATE_NONE)
1347 return (NULL);
1348
1349 /*
1350 * Based on policy and the history of the file get the
1351 * actual delegation.
1352 */
1353 dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo,
1354 sp->rs_owner->ro_client->rc_clientid);
1355
1356 if (dtype == OPEN_DELEGATE_NONE)
1357 return (NULL);
1358 break;
1359 default:
1360 return (NULL);
1361 }
1362
1363 /* set the delegation for the state */
1364 return (rfs4_deleg_state(sp, dtype, recall));
1365 }
1366
1367 void
1368 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1369 nfsace4 *ace, int recall)
1370 {
1371 open_write_delegation4 *wp;
1372 open_read_delegation4 *rp;
1373 nfs_space_limit4 *spl;
1374 nfsace4 nace;
1375
1376 /*
1377 * We need to allocate a new copy of the who string.
1378 * this string will be freed by the rfs4_op_open dis_resfree
1379 * routine. We need to do this allocation since replays will
1380 * be allocated and rfs4_compound can't tell the difference from
1381 * a replay and an inital open. N.B. if an ace is passed in, it
1382 * the caller's responsibility to free it.
1383 */
1384
1385 if (ace == NULL) {
1386 /*
1387 * Default is to deny all access, the client will have
1388 * to contact the server. XXX Do we want to actually
1389 * set a deny for every one, or do we simply want to
1390 * construct an entity that will match no one?
1391 */
1392 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
1393 nace.flag = 0;
1394 nace.access_mask = ACE4_VALID_MASK_BITS;
1395 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
1396 } else {
1397 nace.type = ace->type;
1398 nace.flag = ace->flag;
1399 nace.access_mask = ace->access_mask;
1400 (void) utf8_copy(&ace->who, &nace.who);
1401 }
1402
1403 dp->delegation_type = dsp->rds_dtype;
1404
1405 switch (dsp->rds_dtype) {
1406 case OPEN_DELEGATE_NONE:
1407 break;
1408 case OPEN_DELEGATE_READ:
1409 rp = &dp->open_delegation4_u.read;
1410 rp->stateid = dsp->rds_delegid.stateid;
1411 rp->recall = (bool_t)recall;
1412 rp->permissions = nace;
1413 break;
1414 case OPEN_DELEGATE_WRITE:
1415 wp = &dp->open_delegation4_u.write;
1416 wp->stateid = dsp->rds_delegid.stateid;
1417 wp->recall = (bool_t)recall;
1418 spl = &wp->space_limit;
1419 spl->limitby = NFS_LIMIT_SIZE;
1420 spl->nfs_space_limit4_u.filesize = 0;
1421 wp->permissions = nace;
1422 break;
1423 }
1424 }
1425
1426 /*
1427 * Check if the file is delegated via the provided file struct.
1428 * Return TRUE if it is delegated. This is intended for use by
1429 * the v4 server. The v2/v3 server code should use rfs4_check_delegated().
1430 *
1431 * Note that if the file is found to have a delegation, it is
1432 * recalled, unless the clientid of the caller matches the clientid of the
1433 * delegation. If the caller has specified, there is a slight delay
1434 * inserted in the hopes that the delegation will be returned quickly.
1435 */
1436 bool_t
1437 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1438 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1439 {
1440 rfs4_deleg_state_t *dsp;
1441
1442 /* Is delegation enabled? */
1443 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1444 return (FALSE);
1445
1446 /* do we have a delegation on this file? */
1447 rfs4_dbe_lock(fp->rf_dbe);
1448 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1449 if (is_rm)
1450 fp->rf_dinfo.rd_hold_grant++;
1451 rfs4_dbe_unlock(fp->rf_dbe);
1452 return (FALSE);
1453 }
1454 /*
1455 * do we have a write delegation on this file or are we
1456 * requesting write access to a file with any type of existing
1457 * delegation?
1458 */
1459 if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1460 if (cp != NULL) {
1461 dsp = list_head(&fp->rf_delegstatelist);
1462 if (dsp == NULL) {
1463 rfs4_dbe_unlock(fp->rf_dbe);
1464 return (FALSE);
1465 }
1466 /*
1467 * Does the requestor already own the delegation?
1468 */
1469 if (dsp->rds_client->rc_clientid == *(cp)) {
1470 rfs4_dbe_unlock(fp->rf_dbe);
1471 return (FALSE);
1472 }
1473 }
1474
1475 rfs4_dbe_unlock(fp->rf_dbe);
1476 rfs4_recall_deleg(fp, trunc, NULL);
1477
1478 if (!do_delay) {
1479 rfs4_dbe_lock(fp->rf_dbe);
1480 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1481 rfs4_dbe_unlock(fp->rf_dbe);
1482 return (TRUE);
1483 }
1484
1485 delay(NFS4_DELEGATION_CONFLICT_DELAY);
1486
1487 rfs4_dbe_lock(fp->rf_dbe);
1488 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1489 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1490 rfs4_dbe_unlock(fp->rf_dbe);
1491 return (TRUE);
1492 }
1493 }
1494 if (is_rm)
1495 fp->rf_dinfo.rd_hold_grant++;
1496 rfs4_dbe_unlock(fp->rf_dbe);
1497 return (FALSE);
1498 }
1499
1500 /*
1501 * Check if the file is delegated in the case of a v2 or v3 access.
1502 * Return TRUE if it is delegated which in turn means that v2 should
1503 * drop the request and in the case of v3 JUKEBOX should be returned.
1504 */
1505 bool_t
1506 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1507 {
1508 rfs4_file_t *fp;
1509 bool_t create = FALSE;
1510 bool_t rc = FALSE;
1511
1512 rfs4_hold_deleg_policy();
1513
1514 /* Is delegation enabled? */
1515 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1516 fp = rfs4_findfile(vp, NULL, &create);
1517 if (fp != NULL) {
1518 if (rfs4_check_delegated_byfp(mode, fp, trunc,
1519 TRUE, FALSE, NULL)) {
1520 rc = TRUE;
1521 }
1522 rfs4_file_rele(fp);
1523 }
1524 }
1525 rfs4_rele_deleg_policy();
1526 return (rc);
1527 }
1528
1529 /*
1530 * Release a hold on the hold_grant counter which
1531 * prevents delegation from being granted while a remove
1532 * or a rename is in progress.
1533 */
1534 void
1535 rfs4_clear_dont_grant(rfs4_file_t *fp)
1536 {
1537 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1538 return;
1539 rfs4_dbe_lock(fp->rf_dbe);
1540 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1541 fp->rf_dinfo.rd_hold_grant--;
1542 fp->rf_dinfo.rd_time_rm_delayed = 0;
1543 rfs4_dbe_unlock(fp->rf_dbe);
1544 }
1545
1546 /*
1547 * State support for delegation.
1548 * Set the state delegation type for this state;
1549 * This routine is called from open via rfs4_grant_delegation and the entry
1550 * locks on sp and sp->rs_finfo are assumed.
1551 */
1552 static rfs4_deleg_state_t *
1553 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1554 {
1555 rfs4_file_t *fp = sp->rs_finfo;
1556 bool_t create = TRUE;
1557 rfs4_deleg_state_t *dsp;
1558 vnode_t *vp;
1559 int open_prev = *recall;
1560 int ret;
1561 int fflags = 0;
1562
1563 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1564 ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1565
1566 /* Shouldn't happen */
1567 if (fp->rf_dinfo.rd_recall_count != 0 ||
1568 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1569 dtype != OPEN_DELEGATE_READ)) {
1570 return (NULL);
1571 }
1572
1573 /* Unlock to avoid deadlock */
1574 rfs4_dbe_unlock(fp->rf_dbe);
1575 rfs4_dbe_unlock(sp->rs_dbe);
1576
1577 dsp = rfs4_finddeleg(sp, &create);
1578
1579 rfs4_dbe_lock(sp->rs_dbe);
1580 rfs4_dbe_lock(fp->rf_dbe);
1581
1582 if (dsp == NULL)
1583 return (NULL);
1584
1585 /*
1586 * It is possible that since we dropped the lock
1587 * in order to call finddeleg, the rfs4_file_t
1588 * was marked such that we should not grant a
1589 * delegation, if so bail out.
1590 */
1591 if (fp->rf_dinfo.rd_hold_grant > 0) {
1592 rfs4_deleg_state_rele(dsp);
1593 return (NULL);
1594 }
1595
1596 if (create == FALSE) {
1597 if (sp->rs_owner->ro_client == dsp->rds_client &&
1598 dsp->rds_dtype == dtype) {
1599 return (dsp);
1600 } else {
1601 rfs4_deleg_state_rele(dsp);
1602 return (NULL);
1603 }
1604 }
1605
1606 /*
1607 * Check that this file has not been delegated to another
1608 * client
1609 */
1610 if (fp->rf_dinfo.rd_recall_count != 0 ||
1611 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
1612 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1613 dtype != OPEN_DELEGATE_READ)) {
1614 rfs4_deleg_state_rele(dsp);
1615 return (NULL);
1616 }
1617
1618 vp = fp->rf_vp;
1619 /* vnevent_support returns 0 if file system supports vnevents */
1620 if (vnevent_support(vp, NULL)) {
1621 rfs4_deleg_state_rele(dsp);
1622 return (NULL);
1623 }
1624
1625 /* Calculate the fflags for this OPEN. */
1626 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
1627 fflags |= FREAD;
1628 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
1629 fflags |= FWRITE;
1630
1631 *recall = 0;
1632 /*
1633 * Before granting a delegation we need to know if anyone else has
1634 * opened the file in a conflicting mode. However, first we need to
1635 * know how we opened the file to check the counts properly.
1636 */
1637 if (dtype == OPEN_DELEGATE_READ) {
1638 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1639 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1640 vn_is_mapped(vp, V_WRITE)) {
1641 if (open_prev) {
1642 *recall = 1;
1643 } else {
1644 rfs4_deleg_state_rele(dsp);
1645 return (NULL);
1646 }
1647 }
1648 ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ,
1649 rfs4_mon_hold, rfs4_mon_rele);
1650 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1651 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1652 vn_is_mapped(vp, V_WRITE)) {
1653 if (open_prev) {
1654 *recall = 1;
1655 } else {
1656 (void) fem_uninstall(vp, deleg_rdops,
1657 (void *)fp);
1658 rfs4_deleg_state_rele(dsp);
1659 return (NULL);
1660 }
1661 }
1662 /*
1663 * Because a client can hold onto a delegation after the
1664 * file has been closed, we need to keep track of the
1665 * access to this file. Otherwise the CIFS server would
1666 * not know about the client accessing the file and could
1667 * inappropriately grant an OPLOCK.
1668 * fem_install() returns EBUSY when asked to install a
1669 * OPUNIQ monitor more than once. Therefore, check the
1670 * return code because we only want this done once.
1671 */
1672 if (ret == 0)
1673 vn_open_upgrade(vp, FREAD);
1674 } else { /* WRITE */
1675 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1676 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1677 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1678 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1679 vn_is_mapped(vp, V_RDORWR)) {
1680 if (open_prev) {
1681 *recall = 1;
1682 } else {
1683 rfs4_deleg_state_rele(dsp);
1684 return (NULL);
1685 }
1686 }
1687 ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ,
1688 rfs4_mon_hold, rfs4_mon_rele);
1689 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1690 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1691 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1692 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1693 vn_is_mapped(vp, V_RDORWR)) {
1694 if (open_prev) {
1695 *recall = 1;
1696 } else {
1697 (void) fem_uninstall(vp, deleg_wrops,
1698 (void *)fp);
1699 rfs4_deleg_state_rele(dsp);
1700 return (NULL);
1701 }
1702 }
1703 /*
1704 * Because a client can hold onto a delegation after the
1705 * file has been closed, we need to keep track of the
1706 * access to this file. Otherwise the CIFS server would
1707 * not know about the client accessing the file and could
1708 * inappropriately grant an OPLOCK.
1709 * fem_install() returns EBUSY when asked to install a
1710 * OPUNIQ monitor more than once. Therefore, check the
1711 * return code because we only want this done once.
1712 */
1713 if (ret == 0)
1714 vn_open_upgrade(vp, FREAD|FWRITE);
1715 }
1716 /* Place on delegation list for file */
1717 ASSERT(!list_link_active(&dsp->rds_node));
1718 list_insert_tail(&fp->rf_delegstatelist, dsp);
1719
1720 dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
1721
1722 /* Update delegation stats for this file */
1723 fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
1724
1725 /* reset since this is a new delegation */
1726 fp->rf_dinfo.rd_conflicted_client = 0;
1727 fp->rf_dinfo.rd_ever_recalled = FALSE;
1728
1729 if (dtype == OPEN_DELEGATE_READ)
1730 fp->rf_dinfo.rd_rdgrants++;
1731 else
1732 fp->rf_dinfo.rd_wrgrants++;
1733
1734 return (dsp);
1735 }
1736
1737 /*
1738 * State routine for the server when a delegation is returned.
1739 */
1740 void
1741 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
1742 {
1743 rfs4_file_t *fp = dsp->rds_finfo;
1744 open_delegation_type4 dtypewas;
1745
1746 rfs4_dbe_lock(fp->rf_dbe);
1747
1748 /* nothing to do if no longer on list */
1749 if (!list_link_active(&dsp->rds_node)) {
1750 rfs4_dbe_unlock(fp->rf_dbe);
1751 return;
1752 }
1753
1754 /* Remove state from recall list */
1755 list_remove(&fp->rf_delegstatelist, dsp);
1756
1757 if (list_is_empty(&fp->rf_delegstatelist)) {
1758 dtypewas = fp->rf_dinfo.rd_dtype;
1759 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
1760 rfs4_dbe_cv_broadcast(fp->rf_dbe);
1761
1762 /* if file system was unshared, the vp will be NULL */
1763 if (fp->rf_vp != NULL) {
1764 /*
1765 * Once a delegation is no longer held by any client,
1766 * the monitor is uninstalled. At this point, the
1767 * client must send OPEN otw, so we don't need the
1768 * reference on the vnode anymore. The open
1769 * downgrade removes the reference put on earlier.
1770 */
1771 if (dtypewas == OPEN_DELEGATE_READ) {
1772 (void) fem_uninstall(fp->rf_vp, deleg_rdops,
1773 (void *)fp);
1774 vn_open_downgrade(fp->rf_vp, FREAD);
1775 } else if (dtypewas == OPEN_DELEGATE_WRITE) {
1776 (void) fem_uninstall(fp->rf_vp, deleg_wrops,
1777 (void *)fp);
1778 vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
1779 }
1780 }
1781 }
1782
1783 switch (dsp->rds_dtype) {
1784 case OPEN_DELEGATE_READ:
1785 fp->rf_dinfo.rd_rdgrants--;
1786 break;
1787 case OPEN_DELEGATE_WRITE:
1788 fp->rf_dinfo.rd_wrgrants--;
1789 break;
1790 default:
1791 break;
1792 }
1793
1794 /* used in the policy decision */
1795 fp->rf_dinfo.rd_time_returned = gethrestime_sec();
1796
1797 /*
1798 * reset the time_recalled field so future delegations are not
1799 * accidentally revoked
1800 */
1801 if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
1802 fp->rf_dinfo.rd_time_recalled = 0;
1803
1804 rfs4_dbe_unlock(fp->rf_dbe);
1805
1806 rfs4_dbe_lock(dsp->rds_dbe);
1807
1808 dsp->rds_dtype = OPEN_DELEGATE_NONE;
1809
1810 if (revoked == TRUE)
1811 dsp->rds_time_revoked = gethrestime_sec();
1812
1813 rfs4_dbe_invalidate(dsp->rds_dbe);
1814
1815 rfs4_dbe_unlock(dsp->rds_dbe);
1816
1817 if (revoked == TRUE) {
1818 rfs4_dbe_lock(dsp->rds_client->rc_dbe);
1819 dsp->rds_client->rc_deleg_revoked++; /* observability */
1820 rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
1821 }
1822 }
1823
1824 static void
1825 rfs4_revoke_file(rfs4_file_t *fp)
1826 {
1827 rfs4_deleg_state_t *dsp;
1828
1829 /*
1830 * The lock for rfs4_file_t must be held when traversing the
1831 * delegation list but that lock needs to be released to call
1832 * rfs4_return_deleg()
1833 */
1834 rfs4_dbe_lock(fp->rf_dbe);
1835 while (dsp = list_head(&fp->rf_delegstatelist)) {
1836 rfs4_dbe_hold(dsp->rds_dbe);
1837 rfs4_dbe_unlock(fp->rf_dbe);
1838 rfs4_return_deleg(dsp, TRUE);
1839 rfs4_deleg_state_rele(dsp);
1840 rfs4_dbe_lock(fp->rf_dbe);
1841 }
1842 rfs4_dbe_unlock(fp->rf_dbe);
1843 }
1844
1845 /*
1846 * A delegation is assumed to be present on the file associated with
1847 * "sp". Check to see if the delegation matches is associated with
1848 * the same client as referenced by "sp". If it is not, TRUE is
1849 * returned. If the delegation DOES match the client (or no
1850 * delegation is present), return FALSE.
1851 * Assume the state entry and file entry are locked.
1852 */
1853 bool_t
1854 rfs4_is_deleg(rfs4_state_t *sp)
1855 {
1856 rfs4_deleg_state_t *dsp;
1857 rfs4_file_t *fp = sp->rs_finfo;
1858 rfs4_client_t *cp = sp->rs_owner->ro_client;
1859
1860 ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1861 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1862 dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1863 if (cp != dsp->rds_client) {
1864 return (TRUE);
1865 }
1866 }
1867 return (FALSE);
1868 }
1869
1870 void
1871 rfs4_disable_delegation(void)
1872 {
1873 mutex_enter(&rfs4_deleg_lock);
1874 rfs4_deleg_disabled++;
1875 mutex_exit(&rfs4_deleg_lock);
1876 }
1877
1878 void
1879 rfs4_enable_delegation(void)
1880 {
1881 mutex_enter(&rfs4_deleg_lock);
1882 ASSERT(rfs4_deleg_disabled > 0);
1883 rfs4_deleg_disabled--;
1884 mutex_exit(&rfs4_deleg_lock);
1885 }
1886
1887 void
1888 rfs4_mon_hold(void *arg)
1889 {
1890 rfs4_file_t *fp = arg;
1891
1892 rfs4_dbe_hold(fp->rf_dbe);
1893 }
1894
1895 void
1896 rfs4_mon_rele(void *arg)
1897 {
1898 rfs4_file_t *fp = arg;
1899
1900 rfs4_dbe_rele_nolock(fp->rf_dbe);
1901 }