1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 27 */ 28 29 #include <sys/systm.h> 30 #include <rpc/auth.h> 31 #include <rpc/clnt.h> 32 #include <nfs/nfs4_kprot.h> 33 #include <nfs/nfs4.h> 34 #include <nfs/lm.h> 35 #include <sys/cmn_err.h> 36 #include <sys/disp.h> 37 #include <sys/sdt.h> 38 39 #include <sys/pathname.h> 40 41 #include <sys/strsubr.h> 42 #include <sys/ddi.h> 43 44 #include <sys/vnode.h> 45 #include <sys/sdt.h> 46 #include <inet/common.h> 47 #include <inet/ip.h> 48 #include <inet/ip6.h> 49 50 #define MAX_READ_DELEGATIONS 5 51 52 krwlock_t rfs4_deleg_policy_lock; 53 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE; 54 static int rfs4_deleg_wlp = 5; 55 kmutex_t rfs4_deleg_lock; 56 static int rfs4_deleg_disabled; 57 static int rfs4_max_setup_cb_tries = 5; 58 59 #ifdef DEBUG 60 61 static int rfs4_test_cbgetattr_fail = 0; 62 int rfs4_cb_null; 63 int rfs4_cb_debug; 64 int rfs4_deleg_debug; 65 66 #endif 67 68 static void rfs4_recall_file(rfs4_file_t *, 69 void (*recall)(rfs4_deleg_state_t *, bool_t), 70 bool_t, rfs4_client_t *); 71 static void rfs4_revoke_file(rfs4_file_t *); 72 static void rfs4_cb_chflush(rfs4_cbinfo_t *); 73 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *); 74 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t); 75 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *, 76 open_delegation_type4, int *); 77 78 /* 79 * Convert a universal address to an transport specific 80 * address using inet_pton. 81 */ 82 static int 83 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) 84 { 85 int dots = 0, i, j, len, k; 86 unsigned char c; 87 in_port_t port = 0; 88 89 len = strlen(ua); 90 91 for (i = len-1; i >= 0; i--) { 92 93 if (ua[i] == '.') 94 dots++; 95 96 if (dots == 2) { 97 98 ua[i] = '\0'; 99 /* 100 * We use k to remember were to stick '.' back, since 101 * ua was kmem_allocateded from the pool len+1. 102 */ 103 k = i; 104 if (inet_pton(af, ua, ap) == 1) { 105 106 c = 0; 107 108 for (j = i+1; j < len; j++) { 109 if (ua[j] == '.') { 110 port = c << 8; 111 c = 0; 112 } else if (ua[j] >= '0' && 113 ua[j] <= '9') { 114 c *= 10; 115 c += ua[j] - '0'; 116 } else { 117 ua[k] = '.'; 118 return (EINVAL); 119 } 120 } 121 port += c; 122 123 *pp = htons(port); 124 125 ua[k] = '.'; 126 return (0); 127 } else { 128 ua[k] = '.'; 129 return (EINVAL); 130 } 131 } 132 } 133 134 return (EINVAL); 135 } 136 137 /* 138 * Update the delegation policy with the 139 * value of "new_policy" 140 */ 141 void 142 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy) 143 { 144 rw_enter(&rfs4_deleg_policy_lock, RW_WRITER); 145 rfs4_deleg_policy = new_policy; 146 rw_exit(&rfs4_deleg_policy_lock); 147 } 148 149 void 150 rfs4_hold_deleg_policy(void) 151 { 152 rw_enter(&rfs4_deleg_policy_lock, RW_READER); 153 } 154 155 void 156 rfs4_rele_deleg_policy(void) 157 { 158 rw_exit(&rfs4_deleg_policy_lock); 159 } 160 161 162 /* 163 * This free function is to be used when the client struct is being 164 * released and nothing at all is needed of the callback info any 165 * longer. 166 */ 167 void 168 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp) 169 { 170 char *addr = cbp->cb_callback.cb_location.r_addr; 171 char *netid = cbp->cb_callback.cb_location.r_netid; 172 173 /* Free old address if any */ 174 175 if (addr) 176 kmem_free(addr, strlen(addr) + 1); 177 if (netid) 178 kmem_free(netid, strlen(netid) + 1); 179 180 addr = cbp->cb_newer.cb_callback.cb_location.r_addr; 181 netid = cbp->cb_newer.cb_callback.cb_location.r_netid; 182 183 if (addr) 184 kmem_free(addr, strlen(addr) + 1); 185 if (netid) 186 kmem_free(netid, strlen(netid) + 1); 187 188 if (cbp->cb_chc_free) { 189 rfs4_cb_chflush(cbp); 190 } 191 } 192 193 /* 194 * The server uses this to check the callback path supplied by the 195 * client. The callback connection is marked "in progress" while this 196 * work is going on and then eventually marked either OK or FAILED. 197 * This work can be done as part of a separate thread and at the end 198 * of this the thread will exit or it may be done such that the caller 199 * will continue with other work. 200 */ 201 static void 202 rfs4_do_cb_null(rfs4_client_t *cp) 203 { 204 struct timeval tv; 205 CLIENT *ch; 206 rfs4_cbstate_t newstate; 207 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 208 209 mutex_enter(cbp->cb_lock); 210 /* If another thread is doing CB_NULL RPC then return */ 211 if (cbp->cb_nullcaller == TRUE) { 212 mutex_exit(cbp->cb_lock); 213 rfs4_client_rele(cp); 214 return; 215 } 216 217 /* Mark the cbinfo as having a thread in the NULL callback */ 218 cbp->cb_nullcaller = TRUE; 219 220 /* 221 * Are there other threads still using the cbinfo client 222 * handles? If so, this thread must wait before going and 223 * mucking aroiund with the callback information 224 */ 225 while (cbp->cb_refcnt != 0) 226 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock); 227 228 /* 229 * This thread itself may find that new callback info has 230 * arrived and is set up to handle this case and redrive the 231 * call to the client's callback server. 232 */ 233 retry: 234 if (cbp->cb_newer.cb_new == TRUE && 235 cbp->cb_newer.cb_confirmed == TRUE) { 236 char *addr = cbp->cb_callback.cb_location.r_addr; 237 char *netid = cbp->cb_callback.cb_location.r_netid; 238 239 /* 240 * Free the old stuff if it exists; may be the first 241 * time through this path 242 */ 243 if (addr) 244 kmem_free(addr, strlen(addr) + 1); 245 if (netid) 246 kmem_free(netid, strlen(netid) + 1); 247 248 /* Move over the addr/netid */ 249 cbp->cb_callback.cb_location.r_addr = 250 cbp->cb_newer.cb_callback.cb_location.r_addr; 251 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL; 252 cbp->cb_callback.cb_location.r_netid = 253 cbp->cb_newer.cb_callback.cb_location.r_netid; 254 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL; 255 256 /* Get the program number */ 257 cbp->cb_callback.cb_program = 258 cbp->cb_newer.cb_callback.cb_program; 259 cbp->cb_newer.cb_callback.cb_program = 0; 260 261 /* Don't forget the protocol's "cb_ident" field */ 262 cbp->cb_ident = cbp->cb_newer.cb_ident; 263 cbp->cb_newer.cb_ident = 0; 264 265 /* no longer new */ 266 cbp->cb_newer.cb_new = FALSE; 267 cbp->cb_newer.cb_confirmed = FALSE; 268 269 /* get rid of the old client handles that may exist */ 270 rfs4_cb_chflush(cbp); 271 272 cbp->cb_state = CB_NONE; 273 cbp->cb_timefailed = 0; /* reset the clock */ 274 cbp->cb_notified_of_cb_path_down = TRUE; 275 } 276 277 if (cbp->cb_state != CB_NONE) { 278 cv_broadcast(cbp->cb_cv); /* let the others know */ 279 cbp->cb_nullcaller = FALSE; 280 mutex_exit(cbp->cb_lock); 281 rfs4_client_rele(cp); 282 return; 283 } 284 285 /* mark rfs4_client_t as CALLBACK NULL in progress */ 286 cbp->cb_state = CB_INPROG; 287 mutex_exit(cbp->cb_lock); 288 289 /* get/generate a client handle */ 290 if ((ch = rfs4_cb_getch(cbp)) == NULL) { 291 mutex_enter(cbp->cb_lock); 292 cbp->cb_state = CB_BAD; 293 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 294 goto retry; 295 } 296 297 298 tv.tv_sec = 30; 299 tv.tv_usec = 0; 300 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) { 301 newstate = CB_BAD; 302 } else { 303 newstate = CB_OK; 304 #ifdef DEBUG 305 rfs4_cb_null++; 306 #endif 307 } 308 309 /* Check to see if the client has specified new callback info */ 310 mutex_enter(cbp->cb_lock); 311 rfs4_cb_freech(cbp, ch, TRUE); 312 if (cbp->cb_newer.cb_new == TRUE && 313 cbp->cb_newer.cb_confirmed == TRUE) { 314 goto retry; /* give the CB_NULL another chance */ 315 } 316 317 cbp->cb_state = newstate; 318 if (cbp->cb_state == CB_BAD) 319 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 320 321 cv_broadcast(cbp->cb_cv); /* start up the other threads */ 322 cbp->cb_nullcaller = FALSE; 323 mutex_exit(cbp->cb_lock); 324 325 rfs4_client_rele(cp); 326 } 327 328 /* 329 * Given a client struct, inspect the callback info to see if the 330 * callback path is up and available. 331 * 332 * If new callback path is available and no one has set it up then 333 * try to set it up. If setup is not successful after 5 tries (5 secs) 334 * then gives up and returns NULL. 335 * 336 * If callback path is being initialized, then wait for the CB_NULL RPC 337 * call to occur. 338 */ 339 static rfs4_cbinfo_t * 340 rfs4_cbinfo_hold(rfs4_client_t *cp) 341 { 342 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 343 int retries = 0; 344 345 mutex_enter(cbp->cb_lock); 346 347 while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) { 348 /* 349 * Looks like a new callback path may be available and 350 * noone has set it up. 351 */ 352 mutex_exit(cbp->cb_lock); 353 rfs4_dbe_hold(cp->rc_dbe); 354 rfs4_do_cb_null(cp); /* caller will release client hold */ 355 356 mutex_enter(cbp->cb_lock); 357 /* 358 * If callback path is no longer new, or it's being setup 359 * then stop and wait for it to be done. 360 */ 361 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE) 362 break; 363 mutex_exit(cbp->cb_lock); 364 365 if (++retries >= rfs4_max_setup_cb_tries) 366 return (NULL); 367 delay(hz); 368 mutex_enter(cbp->cb_lock); 369 } 370 371 /* Is there a thread working on doing the CB_NULL RPC? */ 372 if (cbp->cb_nullcaller == TRUE) 373 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */ 374 375 /* If the callback path is not okay (up and running), just quit */ 376 if (cbp->cb_state != CB_OK) { 377 mutex_exit(cbp->cb_lock); 378 return (NULL); 379 } 380 381 /* Let someone know we are using the current callback info */ 382 cbp->cb_refcnt++; 383 mutex_exit(cbp->cb_lock); 384 return (cbp); 385 } 386 387 /* 388 * The caller is done with the callback info. It may be that the 389 * caller's RPC failed and the NFSv4 client has actually provided new 390 * callback information. If so, let the caller know so they can 391 * advantage of this and maybe retry the RPC that originally failed. 392 */ 393 static int 394 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate) 395 { 396 int cb_new = FALSE; 397 398 mutex_enter(cbp->cb_lock); 399 400 /* The caller gets a chance to mark the callback info as bad */ 401 if (newstate != CB_NOCHANGE) 402 cbp->cb_state = newstate; 403 if (newstate == CB_FAILED) { 404 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 405 cbp->cb_notified_of_cb_path_down = FALSE; 406 } 407 408 cbp->cb_refcnt--; /* no longer using the information */ 409 410 /* 411 * A thread may be waiting on this one to finish and if so, 412 * let it know that it is okay to do the CB_NULL to the 413 * client's callback server. 414 */ 415 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller) 416 cv_broadcast(cbp->cb_cv_nullcaller); 417 418 /* 419 * If this is the last thread to use the callback info and 420 * there is new callback information to try and no thread is 421 * there ready to do the CB_NULL, then return true to teh 422 * caller so they can do the CB_NULL 423 */ 424 if (cbp->cb_refcnt == 0 && 425 cbp->cb_nullcaller == FALSE && 426 cbp->cb_newer.cb_new == TRUE && 427 cbp->cb_newer.cb_confirmed == TRUE) 428 cb_new = TRUE; 429 430 mutex_exit(cbp->cb_lock); 431 432 return (cb_new); 433 } 434 435 /* 436 * Given the information in the callback info struct, create a client 437 * handle that can be used by the server for its callback path. 438 */ 439 static CLIENT * 440 rfs4_cbch_init(rfs4_cbinfo_t *cbp) 441 { 442 struct knetconfig knc; 443 vnode_t *vp; 444 struct sockaddr_in addr4; 445 struct sockaddr_in6 addr6; 446 void *addr, *taddr; 447 in_port_t *pp; 448 int af; 449 char *devnam; 450 struct netbuf nb; 451 int size; 452 CLIENT *ch = NULL; 453 int useresvport = 0; 454 455 mutex_enter(cbp->cb_lock); 456 457 if (cbp->cb_callback.cb_location.r_netid == NULL || 458 cbp->cb_callback.cb_location.r_addr == NULL) { 459 goto cb_init_out; 460 } 461 462 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) { 463 knc.knc_semantics = NC_TPI_COTS; 464 knc.knc_protofmly = "inet"; 465 knc.knc_proto = "tcp"; 466 devnam = "/dev/tcp"; 467 af = AF_INET; 468 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp") 469 == 0) { 470 knc.knc_semantics = NC_TPI_CLTS; 471 knc.knc_protofmly = "inet"; 472 knc.knc_proto = "udp"; 473 devnam = "/dev/udp"; 474 af = AF_INET; 475 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6") 476 == 0) { 477 knc.knc_semantics = NC_TPI_COTS; 478 knc.knc_protofmly = "inet6"; 479 knc.knc_proto = "tcp"; 480 devnam = "/dev/tcp6"; 481 af = AF_INET6; 482 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6") 483 == 0) { 484 knc.knc_semantics = NC_TPI_CLTS; 485 knc.knc_protofmly = "inet6"; 486 knc.knc_proto = "udp"; 487 devnam = "/dev/udp6"; 488 af = AF_INET6; 489 } else { 490 goto cb_init_out; 491 } 492 493 if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) { 494 495 goto cb_init_out; 496 } 497 498 if (vp->v_type != VCHR) { 499 VN_RELE(vp); 500 goto cb_init_out; 501 } 502 503 knc.knc_rdev = vp->v_rdev; 504 505 VN_RELE(vp); 506 507 if (af == AF_INET) { 508 size = sizeof (addr4); 509 bzero(&addr4, size); 510 addr4.sin_family = (sa_family_t)af; 511 addr = &addr4.sin_addr; 512 pp = &addr4.sin_port; 513 taddr = &addr4; 514 } else /* AF_INET6 */ { 515 size = sizeof (addr6); 516 bzero(&addr6, size); 517 addr6.sin6_family = (sa_family_t)af; 518 addr = &addr6.sin6_addr; 519 pp = &addr6.sin6_port; 520 taddr = &addr6; 521 } 522 523 if (uaddr2sockaddr(af, 524 cbp->cb_callback.cb_location.r_addr, addr, pp)) { 525 526 goto cb_init_out; 527 } 528 529 530 nb.maxlen = nb.len = size; 531 nb.buf = (char *)taddr; 532 533 if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program, 534 NFS_CB, 0, 0, curthread->t_cred, &ch)) { 535 536 ch = NULL; 537 } 538 539 /* turn off reserved port usage */ 540 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport); 541 542 cb_init_out: 543 mutex_exit(cbp->cb_lock); 544 return (ch); 545 } 546 547 /* 548 * Iterate over the client handle cache and 549 * destroy it. 550 */ 551 static void 552 rfs4_cb_chflush(rfs4_cbinfo_t *cbp) 553 { 554 CLIENT *ch; 555 556 while (cbp->cb_chc_free) { 557 cbp->cb_chc_free--; 558 ch = cbp->cb_chc[cbp->cb_chc_free]; 559 cbp->cb_chc[cbp->cb_chc_free] = NULL; 560 if (ch) { 561 if (ch->cl_auth) 562 auth_destroy(ch->cl_auth); 563 clnt_destroy(ch); 564 } 565 } 566 } 567 568 /* 569 * Return a client handle, either from a the small 570 * rfs4_client_t cache or one that we just created. 571 */ 572 static CLIENT * 573 rfs4_cb_getch(rfs4_cbinfo_t *cbp) 574 { 575 CLIENT *cbch = NULL; 576 uint32_t zilch = 0; 577 578 mutex_enter(cbp->cb_lock); 579 580 if (cbp->cb_chc_free) { 581 cbp->cb_chc_free--; 582 cbch = cbp->cb_chc[ cbp->cb_chc_free ]; 583 mutex_exit(cbp->cb_lock); 584 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch); 585 return (cbch); 586 } 587 588 mutex_exit(cbp->cb_lock); 589 590 /* none free so make it now */ 591 cbch = rfs4_cbch_init(cbp); 592 593 return (cbch); 594 } 595 596 /* 597 * Return the client handle to the small cache or 598 * destroy it. 599 */ 600 static void 601 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld) 602 { 603 if (lockheld == FALSE) 604 mutex_enter(cbp->cb_lock); 605 606 if (cbp->cb_chc_free < RFS4_CBCH_MAX) { 607 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch; 608 if (lockheld == FALSE) 609 mutex_exit(cbp->cb_lock); 610 return; 611 } 612 if (lockheld == FALSE) 613 mutex_exit(cbp->cb_lock); 614 615 /* 616 * cache maxed out of free entries, obliterate 617 * this client handle, destroy it, throw it away. 618 */ 619 if (ch->cl_auth) 620 auth_destroy(ch->cl_auth); 621 clnt_destroy(ch); 622 } 623 624 /* 625 * With the supplied callback information - initialize the client 626 * callback data. If there is a callback in progress, save the 627 * callback info so that a thread can pick it up in the future. 628 */ 629 void 630 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident) 631 { 632 char *addr = NULL; 633 char *netid = NULL; 634 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 635 size_t len; 636 637 /* Set the call back for the client */ 638 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' && 639 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') { 640 len = strlen(cb->cb_location.r_addr) + 1; 641 addr = kmem_alloc(len, KM_SLEEP); 642 bcopy(cb->cb_location.r_addr, addr, len); 643 len = strlen(cb->cb_location.r_netid) + 1; 644 netid = kmem_alloc(len, KM_SLEEP); 645 bcopy(cb->cb_location.r_netid, netid, len); 646 } 647 /* ready to save the new information but first free old, if exists */ 648 mutex_enter(cbp->cb_lock); 649 650 cbp->cb_newer.cb_callback.cb_program = cb->cb_program; 651 652 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL) 653 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr, 654 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1); 655 cbp->cb_newer.cb_callback.cb_location.r_addr = addr; 656 657 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL) 658 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid, 659 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1); 660 cbp->cb_newer.cb_callback.cb_location.r_netid = netid; 661 662 cbp->cb_newer.cb_ident = cb_ident; 663 664 if (addr && *addr && netid && *netid) { 665 cbp->cb_newer.cb_new = TRUE; 666 cbp->cb_newer.cb_confirmed = FALSE; 667 } else { 668 cbp->cb_newer.cb_new = FALSE; 669 cbp->cb_newer.cb_confirmed = FALSE; 670 } 671 672 mutex_exit(cbp->cb_lock); 673 } 674 675 /* 676 * The server uses this when processing SETCLIENTID_CONFIRM. Callback 677 * information may have been provided on SETCLIENTID and this call 678 * marks that information as confirmed and then starts a thread to 679 * test the callback path. 680 */ 681 void 682 rfs4_deleg_cb_check(rfs4_client_t *cp) 683 { 684 if (cp->rc_cbinfo.cb_newer.cb_new == FALSE) 685 return; 686 687 cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE; 688 689 rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */ 690 691 (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN, 692 minclsyspri); 693 } 694 695 static void 696 rfs4args_cb_recall_free(nfs_cb_argop4 *argop) 697 { 698 CB_RECALL4args *rec_argp; 699 700 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 701 if (rec_argp->fh.nfs_fh4_val) 702 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len); 703 } 704 705 /* ARGSUSED */ 706 static void 707 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop) 708 { 709 CB_GETATTR4args *argp; 710 711 argp = &argop->nfs_cb_argop4_u.opcbgetattr; 712 if (argp->fh.nfs_fh4_val) 713 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len); 714 } 715 716 static void 717 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp) 718 { 719 int i, arglen; 720 nfs_cb_argop4 *argop; 721 722 /* 723 * First free any special args alloc'd for specific ops. 724 */ 725 arglen = args->array_len; 726 argop = args->array; 727 for (i = 0; i < arglen; i++, argop++) { 728 729 switch (argop->argop) { 730 case OP_CB_RECALL: 731 rfs4args_cb_recall_free(argop); 732 break; 733 734 case OP_CB_GETATTR: 735 rfs4args_cb_getattr_free(argop); 736 break; 737 738 default: 739 return; 740 } 741 } 742 743 if (args->tag.utf8string_len > 0) 744 UTF8STRING_FREE(args->tag) 745 746 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4)); 747 if (resp) 748 (void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp); 749 } 750 751 /* 752 * General callback routine for the server to the client. 753 */ 754 static enum clnt_stat 755 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args, 756 CB_COMPOUND4res *res, struct timeval timeout) 757 { 758 rfs4_cbinfo_t *cbp; 759 CLIENT *ch; 760 /* start with this in case cb_getch() fails */ 761 enum clnt_stat stat = RPC_FAILED; 762 763 res->tag.utf8string_val = NULL; 764 res->array = NULL; 765 766 retry: 767 cbp = rfs4_cbinfo_hold(cp); 768 if (cbp == NULL) 769 return (stat); 770 771 /* get a client handle */ 772 if ((ch = rfs4_cb_getch(cbp)) != NULL) { 773 /* 774 * reset the cb_ident since it may have changed in 775 * rfs4_cbinfo_hold() 776 */ 777 args->callback_ident = cbp->cb_ident; 778 779 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv, 780 (caddr_t)args, xdr_CB_COMPOUND4res, 781 (caddr_t)res, timeout); 782 783 /* free client handle */ 784 rfs4_cb_freech(cbp, ch, FALSE); 785 } 786 787 /* 788 * If the rele says that there may be new callback info then 789 * retry this sequence and it may succeed as a result of the 790 * new callback path 791 */ 792 if (rfs4_cbinfo_rele(cbp, 793 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE) 794 goto retry; 795 796 return (stat); 797 } 798 799 /* 800 * Used by the NFSv4 server to get attributes for a file while 801 * handling the case where a file has been write delegated. For the 802 * time being, VOP_GETATTR() is called and CB_GETATTR processing is 803 * not undertaken. This call site is maintained in case the server is 804 * updated in the future to handle write delegation space guarantees. 805 */ 806 nfsstat4 807 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 808 { 809 810 int error; 811 812 error = VOP_GETATTR(vp, vap, flag, cr, NULL); 813 return (puterrno4(error)); 814 } 815 816 /* 817 * This is used everywhere in the v2/v3 server to allow the 818 * integration of all NFS versions and the support of delegation. For 819 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced 820 * in the future to provide space guarantees for write delegations 821 * then this call site should be expanded to interact with the client. 822 */ 823 int 824 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 825 { 826 return (VOP_GETATTR(vp, vap, flag, cr, NULL)); 827 } 828 829 /* 830 * Place the actual cb_recall otw call to client. 831 */ 832 static void 833 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc) 834 { 835 CB_COMPOUND4args cb4_args; 836 CB_COMPOUND4res cb4_res; 837 CB_RECALL4args *rec_argp; 838 CB_RECALL4res *rec_resp; 839 nfs_cb_argop4 *argop; 840 int numops; 841 int argoplist_size; 842 struct timeval timeout; 843 nfs_fh4 *fhp; 844 enum clnt_stat call_stat; 845 846 /* 847 * set up the compound args 848 */ 849 numops = 1; /* CB_RECALL only */ 850 851 argoplist_size = numops * sizeof (nfs_cb_argop4); 852 argop = kmem_zalloc(argoplist_size, KM_SLEEP); 853 argop->argop = OP_CB_RECALL; 854 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 855 856 (void) str_to_utf8("cb_recall", &cb4_args.tag); 857 cb4_args.minorversion = CB4_MINORVERSION; 858 /* cb4_args.callback_ident is set in rfs4_do_callback() */ 859 cb4_args.array_len = numops; 860 cb4_args.array = argop; 861 862 /* 863 * fill in the args struct 864 */ 865 bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4)); 866 rec_argp->truncate = trunc; 867 868 fhp = &dsp->rds_finfo->rf_filehandle; 869 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) * 870 fhp->nfs_fh4_len, KM_SLEEP); 871 nfs_fh4_copy(fhp, &rec_argp->fh); 872 873 /* Keep track of when we did this for observability */ 874 dsp->rds_time_recalled = gethrestime_sec(); 875 876 /* 877 * Set up the timeout for the callback and make the actual call. 878 * Timeout will be 80% of the lease period for this server. 879 */ 880 timeout.tv_sec = (rfs4_lease_time * 80) / 100; 881 timeout.tv_usec = 0; 882 883 DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client, 884 rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp); 885 886 call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res, 887 timeout); 888 889 rec_resp = (cb4_res.array_len == 0) ? NULL : 890 &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall; 891 892 DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client, 893 rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp); 894 895 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) { 896 rfs4_return_deleg(dsp, TRUE); 897 } 898 899 rfs4freeargres(&cb4_args, &cb4_res); 900 } 901 902 struct recall_arg { 903 rfs4_deleg_state_t *dsp; 904 void (*recall)(rfs4_deleg_state_t *, bool_t trunc); 905 bool_t trunc; 906 }; 907 908 static void 909 do_recall(struct recall_arg *arg) 910 { 911 rfs4_deleg_state_t *dsp = arg->dsp; 912 rfs4_file_t *fp = dsp->rds_finfo; 913 callb_cpr_t cpr_info; 914 kmutex_t cpr_lock; 915 916 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 917 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall"); 918 919 /* 920 * It is possible that before this thread starts 921 * the client has send us a return_delegation, and 922 * if that is the case we do not need to send the 923 * recall callback. 924 */ 925 if (dsp->rds_dtype != OPEN_DELEGATE_NONE) { 926 DTRACE_PROBE3(nfss__i__recall, 927 struct recall_arg *, arg, 928 struct rfs4_deleg_state_t *, dsp, 929 struct rfs4_file_t *, fp); 930 931 if (arg->recall) 932 (void) (*arg->recall)(dsp, arg->trunc); 933 } 934 935 mutex_enter(fp->rf_dinfo.rd_recall_lock); 936 /* 937 * Recall count may go negative if the parent thread that is 938 * creating the individual callback threads does not modify 939 * the recall_count field before the callback thread actually 940 * gets a response from the CB_RECALL 941 */ 942 fp->rf_dinfo.rd_recall_count--; 943 if (fp->rf_dinfo.rd_recall_count == 0) 944 cv_signal(fp->rf_dinfo.rd_recall_cv); 945 mutex_exit(fp->rf_dinfo.rd_recall_lock); 946 947 mutex_enter(&cpr_lock); 948 CALLB_CPR_EXIT(&cpr_info); 949 mutex_destroy(&cpr_lock); 950 951 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ 952 953 kmem_free(arg, sizeof (struct recall_arg)); 954 } 955 956 struct master_recall_args { 957 rfs4_file_t *fp; 958 void (*recall)(rfs4_deleg_state_t *, bool_t); 959 bool_t trunc; 960 }; 961 962 static void 963 do_recall_file(struct master_recall_args *map) 964 { 965 rfs4_file_t *fp = map->fp; 966 rfs4_deleg_state_t *dsp; 967 struct recall_arg *arg; 968 callb_cpr_t cpr_info; 969 kmutex_t cpr_lock; 970 int32_t recall_count; 971 972 rfs4_dbe_lock(fp->rf_dbe); 973 974 /* Recall already in progress ? */ 975 mutex_enter(fp->rf_dinfo.rd_recall_lock); 976 if (fp->rf_dinfo.rd_recall_count != 0) { 977 mutex_exit(fp->rf_dinfo.rd_recall_lock); 978 rfs4_dbe_rele_nolock(fp->rf_dbe); 979 rfs4_dbe_unlock(fp->rf_dbe); 980 kmem_free(map, sizeof (struct master_recall_args)); 981 return; 982 } 983 984 mutex_exit(fp->rf_dinfo.rd_recall_lock); 985 986 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 987 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile"); 988 989 recall_count = 0; 990 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 991 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 992 993 rfs4_dbe_lock(dsp->rds_dbe); 994 /* 995 * if this delegation state 996 * is being reaped skip it 997 */ 998 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) { 999 rfs4_dbe_unlock(dsp->rds_dbe); 1000 continue; 1001 } 1002 1003 /* hold for receiving thread */ 1004 rfs4_dbe_hold(dsp->rds_dbe); 1005 rfs4_dbe_unlock(dsp->rds_dbe); 1006 1007 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP); 1008 arg->recall = map->recall; 1009 arg->trunc = map->trunc; 1010 arg->dsp = dsp; 1011 1012 recall_count++; 1013 1014 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN, 1015 minclsyspri); 1016 } 1017 1018 rfs4_dbe_unlock(fp->rf_dbe); 1019 1020 mutex_enter(fp->rf_dinfo.rd_recall_lock); 1021 /* 1022 * Recall count may go negative if the parent thread that is 1023 * creating the individual callback threads does not modify 1024 * the recall_count field before the callback thread actually 1025 * gets a response from the CB_RECALL 1026 */ 1027 fp->rf_dinfo.rd_recall_count += recall_count; 1028 while (fp->rf_dinfo.rd_recall_count) 1029 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock); 1030 1031 mutex_exit(fp->rf_dinfo.rd_recall_lock); 1032 1033 DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp); 1034 rfs4_file_rele(fp); 1035 kmem_free(map, sizeof (struct master_recall_args)); 1036 mutex_enter(&cpr_lock); 1037 CALLB_CPR_EXIT(&cpr_info); 1038 mutex_destroy(&cpr_lock); 1039 } 1040 1041 static void 1042 rfs4_recall_file(rfs4_file_t *fp, 1043 void (*recall)(rfs4_deleg_state_t *, bool_t trunc), 1044 bool_t trunc, rfs4_client_t *cp) 1045 { 1046 struct master_recall_args *args; 1047 1048 rfs4_dbe_lock(fp->rf_dbe); 1049 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 1050 rfs4_dbe_unlock(fp->rf_dbe); 1051 return; 1052 } 1053 rfs4_dbe_hold(fp->rf_dbe); /* hold for new thread */ 1054 1055 /* 1056 * Mark the time we started the recall processing. 1057 * If it has been previously recalled, do not reset the 1058 * timer since this is used for the revocation decision. 1059 */ 1060 if (fp->rf_dinfo.rd_time_recalled == 0) 1061 fp->rf_dinfo.rd_time_recalled = gethrestime_sec(); 1062 fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */ 1063 /* Client causing recall not always available */ 1064 if (cp) 1065 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid; 1066 1067 rfs4_dbe_unlock(fp->rf_dbe); 1068 1069 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP); 1070 args->fp = fp; 1071 args->recall = recall; 1072 args->trunc = trunc; 1073 1074 (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN, 1075 minclsyspri); 1076 } 1077 1078 void 1079 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp) 1080 { 1081 time_t elapsed1, elapsed2; 1082 1083 if (fp->rf_dinfo.rd_time_recalled != 0) { 1084 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled; 1085 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite; 1086 /* First check to see if a revocation should occur */ 1087 if (elapsed1 > rfs4_lease_time && 1088 elapsed2 > rfs4_lease_time) { 1089 rfs4_revoke_file(fp); 1090 return; 1091 } 1092 /* 1093 * Next check to see if a recall should be done again 1094 * so quickly. 1095 */ 1096 if (elapsed1 <= ((rfs4_lease_time * 20) / 100)) 1097 return; 1098 } 1099 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp); 1100 } 1101 1102 /* 1103 * rfs4_check_recall is called from rfs4_do_open to determine if the current 1104 * open conflicts with the delegation. 1105 * Return true if we need recall otherwise false. 1106 * Assumes entry locks for sp and sp->rs_finfo are held. 1107 */ 1108 bool_t 1109 rfs4_check_recall(rfs4_state_t *sp, uint32_t access) 1110 { 1111 open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype; 1112 1113 switch (dtype) { 1114 case OPEN_DELEGATE_NONE: 1115 /* Not currently delegated so there is nothing to do */ 1116 return (FALSE); 1117 case OPEN_DELEGATE_READ: 1118 /* 1119 * If the access is only asking for READ then there is 1120 * no conflict and nothing to do. If it is asking 1121 * for write, then there will be conflict and the read 1122 * delegation should be recalled. 1123 */ 1124 if (access == OPEN4_SHARE_ACCESS_READ) 1125 return (FALSE); 1126 else 1127 return (TRUE); 1128 case OPEN_DELEGATE_WRITE: 1129 /* Check to see if this client has the delegation */ 1130 return (rfs4_is_deleg(sp)); 1131 } 1132 1133 return (FALSE); 1134 } 1135 1136 /* 1137 * Return the "best" allowable delegation available given the current 1138 * delegation type and the desired access and deny modes on the file. 1139 * At the point that this routine is called we know that the access and 1140 * deny modes are consistent with the file modes. 1141 */ 1142 static open_delegation_type4 1143 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) 1144 { 1145 open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype; 1146 uint32_t access = sp->rs_share_access; 1147 uint32_t deny = sp->rs_share_deny; 1148 int readcnt = 0; 1149 int writecnt = 0; 1150 1151 switch (dtype) { 1152 case OPEN_DELEGATE_NONE: 1153 /* 1154 * Determine if more than just this OPEN have the file 1155 * open and if so, no delegation may be provided to 1156 * the client. 1157 */ 1158 if (access & OPEN4_SHARE_ACCESS_WRITE) 1159 writecnt++; 1160 if (access & OPEN4_SHARE_ACCESS_READ) 1161 readcnt++; 1162 1163 if (fp->rf_access_read > readcnt || 1164 fp->rf_access_write > writecnt) 1165 return (OPEN_DELEGATE_NONE); 1166 1167 /* 1168 * If the client is going to write, or if the client 1169 * has exclusive access, return a write delegation. 1170 */ 1171 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1172 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) 1173 return (OPEN_DELEGATE_WRITE); 1174 /* 1175 * If we don't want to write or we've haven't denied read 1176 * access to others, return a read delegation. 1177 */ 1178 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) || 1179 (deny & ~OPEN4_SHARE_DENY_READ)) 1180 return (OPEN_DELEGATE_READ); 1181 1182 /* Shouldn't get here */ 1183 return (OPEN_DELEGATE_NONE); 1184 1185 case OPEN_DELEGATE_READ: 1186 /* 1187 * If the file is delegated for read but we wan't to 1188 * write or deny others to read then we can't delegate 1189 * the file. We shouldn't get here since the delegation should 1190 * have been recalled already. 1191 */ 1192 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1193 (deny & OPEN4_SHARE_DENY_READ)) 1194 return (OPEN_DELEGATE_NONE); 1195 return (OPEN_DELEGATE_READ); 1196 1197 case OPEN_DELEGATE_WRITE: 1198 return (OPEN_DELEGATE_WRITE); 1199 } 1200 1201 /* Shouldn't get here */ 1202 return (OPEN_DELEGATE_NONE); 1203 } 1204 1205 /* 1206 * Given the desired delegation type and the "history" of the file 1207 * determine the actual delegation type to return. 1208 */ 1209 static open_delegation_type4 1210 rfs4_delegation_policy(open_delegation_type4 dtype, 1211 rfs4_dinfo_t *dinfo, clientid4 cid) 1212 { 1213 time_t elapsed; 1214 1215 if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE) 1216 return (OPEN_DELEGATE_NONE); 1217 1218 /* 1219 * Has this file/delegation ever been recalled? If not then 1220 * no further checks for a delegation race need to be done. 1221 * However if a recall has occurred, then check to see if a 1222 * client has caused its own delegation recall to occur. If 1223 * not, then has a delegation for this file been returned 1224 * recently? If so, then do not assign a new delegation to 1225 * avoid a "delegation race" between the original client and 1226 * the new/conflicting client. 1227 */ 1228 if (dinfo->rd_ever_recalled == TRUE) { 1229 if (dinfo->rd_conflicted_client != cid) { 1230 elapsed = gethrestime_sec() - dinfo->rd_time_returned; 1231 if (elapsed < rfs4_lease_time) 1232 return (OPEN_DELEGATE_NONE); 1233 } 1234 } 1235 1236 /* Limit the number of read grants */ 1237 if (dtype == OPEN_DELEGATE_READ && 1238 dinfo->rd_rdgrants > MAX_READ_DELEGATIONS) 1239 return (OPEN_DELEGATE_NONE); 1240 1241 /* 1242 * Should consider limiting total number of read/write 1243 * delegations the server will permit. 1244 */ 1245 1246 return (dtype); 1247 } 1248 1249 /* 1250 * Try and grant a delegation for an open give the state. The routine 1251 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE. 1252 * 1253 * The state and associate file entry must be locked 1254 */ 1255 rfs4_deleg_state_t * 1256 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) 1257 { 1258 rfs4_file_t *fp = sp->rs_finfo; 1259 open_delegation_type4 dtype; 1260 int no_delegation; 1261 1262 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 1263 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1264 1265 /* Is the server even providing delegations? */ 1266 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE) 1267 return (NULL); 1268 1269 /* Check to see if delegations have been temporarily disabled */ 1270 mutex_enter(&rfs4_deleg_lock); 1271 no_delegation = rfs4_deleg_disabled; 1272 mutex_exit(&rfs4_deleg_lock); 1273 1274 if (no_delegation) 1275 return (NULL); 1276 1277 /* Don't grant a delegation if a deletion is impending. */ 1278 if (fp->rf_dinfo.rd_hold_grant > 0) { 1279 return (NULL); 1280 } 1281 1282 /* 1283 * Don't grant a delegation if there are any lock manager 1284 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g., 1285 * if there are only read locks we should be able to grant a 1286 * read-only delegation), but it's good enough for now. 1287 * 1288 * MT safety: the lock manager checks for conflicting delegations 1289 * before processing a lock request. That check will block until 1290 * we are done here. So if the lock manager acquires a lock after 1291 * we decide to grant the delegation, the delegation will get 1292 * immediately recalled (if there's a conflict), so we're safe. 1293 */ 1294 if (lm_vp_active(fp->rf_vp)) { 1295 return (NULL); 1296 } 1297 1298 /* 1299 * Based on the type of delegation request passed in, take the 1300 * appropriate action (DELEG_NONE is handled above) 1301 */ 1302 switch (dreq) { 1303 1304 case DELEG_READ: 1305 case DELEG_WRITE: 1306 /* 1307 * The server "must" grant the delegation in this case. 1308 * Client is using open previous 1309 */ 1310 dtype = (open_delegation_type4)dreq; 1311 *recall = 1; 1312 break; 1313 case DELEG_ANY: 1314 /* 1315 * If a valid callback path does not exist, no delegation may 1316 * be granted. 1317 */ 1318 if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK) 1319 return (NULL); 1320 1321 /* 1322 * If the original operation which caused time_rm_delayed 1323 * to be set hasn't been retried and completed for one 1324 * full lease period, clear it and allow delegations to 1325 * get granted again. 1326 */ 1327 if (fp->rf_dinfo.rd_time_rm_delayed > 0 && 1328 gethrestime_sec() > 1329 fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time) 1330 fp->rf_dinfo.rd_time_rm_delayed = 0; 1331 1332 /* 1333 * If we are waiting for a delegation to be returned then 1334 * don't delegate this file. We do this for correctness as 1335 * well as if the file is being recalled we would likely 1336 * recall this file again. 1337 */ 1338 1339 if (fp->rf_dinfo.rd_time_recalled != 0 || 1340 fp->rf_dinfo.rd_time_rm_delayed != 0) 1341 return (NULL); 1342 1343 /* Get the "best" delegation candidate */ 1344 dtype = rfs4_check_delegation(sp, fp); 1345 1346 if (dtype == OPEN_DELEGATE_NONE) 1347 return (NULL); 1348 1349 /* 1350 * Based on policy and the history of the file get the 1351 * actual delegation. 1352 */ 1353 dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo, 1354 sp->rs_owner->ro_client->rc_clientid); 1355 1356 if (dtype == OPEN_DELEGATE_NONE) 1357 return (NULL); 1358 break; 1359 default: 1360 return (NULL); 1361 } 1362 1363 /* set the delegation for the state */ 1364 return (rfs4_deleg_state(sp, dtype, recall)); 1365 } 1366 1367 void 1368 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp, 1369 nfsace4 *ace, int recall) 1370 { 1371 open_write_delegation4 *wp; 1372 open_read_delegation4 *rp; 1373 nfs_space_limit4 *spl; 1374 nfsace4 nace; 1375 1376 /* 1377 * We need to allocate a new copy of the who string. 1378 * this string will be freed by the rfs4_op_open dis_resfree 1379 * routine. We need to do this allocation since replays will 1380 * be allocated and rfs4_compound can't tell the difference from 1381 * a replay and an inital open. N.B. if an ace is passed in, it 1382 * the caller's responsibility to free it. 1383 */ 1384 1385 if (ace == NULL) { 1386 /* 1387 * Default is to deny all access, the client will have 1388 * to contact the server. XXX Do we want to actually 1389 * set a deny for every one, or do we simply want to 1390 * construct an entity that will match no one? 1391 */ 1392 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE; 1393 nace.flag = 0; 1394 nace.access_mask = ACE4_VALID_MASK_BITS; 1395 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who); 1396 } else { 1397 nace.type = ace->type; 1398 nace.flag = ace->flag; 1399 nace.access_mask = ace->access_mask; 1400 (void) utf8_copy(&ace->who, &nace.who); 1401 } 1402 1403 dp->delegation_type = dsp->rds_dtype; 1404 1405 switch (dsp->rds_dtype) { 1406 case OPEN_DELEGATE_NONE: 1407 break; 1408 case OPEN_DELEGATE_READ: 1409 rp = &dp->open_delegation4_u.read; 1410 rp->stateid = dsp->rds_delegid.stateid; 1411 rp->recall = (bool_t)recall; 1412 rp->permissions = nace; 1413 break; 1414 case OPEN_DELEGATE_WRITE: 1415 wp = &dp->open_delegation4_u.write; 1416 wp->stateid = dsp->rds_delegid.stateid; 1417 wp->recall = (bool_t)recall; 1418 spl = &wp->space_limit; 1419 spl->limitby = NFS_LIMIT_SIZE; 1420 spl->nfs_space_limit4_u.filesize = 0; 1421 wp->permissions = nace; 1422 break; 1423 } 1424 } 1425 1426 /* 1427 * Check if the file is delegated via the provided file struct. 1428 * Return TRUE if it is delegated. This is intended for use by 1429 * the v4 server. The v2/v3 server code should use rfs4_check_delegated(). 1430 * 1431 * Note that if the file is found to have a delegation, it is 1432 * recalled, unless the clientid of the caller matches the clientid of the 1433 * delegation. If the caller has specified, there is a slight delay 1434 * inserted in the hopes that the delegation will be returned quickly. 1435 */ 1436 bool_t 1437 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, 1438 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp) 1439 { 1440 rfs4_deleg_state_t *dsp; 1441 1442 /* Is delegation enabled? */ 1443 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1444 return (FALSE); 1445 1446 /* do we have a delegation on this file? */ 1447 rfs4_dbe_lock(fp->rf_dbe); 1448 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 1449 if (is_rm) 1450 fp->rf_dinfo.rd_hold_grant++; 1451 rfs4_dbe_unlock(fp->rf_dbe); 1452 return (FALSE); 1453 } 1454 /* 1455 * do we have a write delegation on this file or are we 1456 * requesting write access to a file with any type of existing 1457 * delegation? 1458 */ 1459 if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) { 1460 if (cp != NULL) { 1461 dsp = list_head(&fp->rf_delegstatelist); 1462 if (dsp == NULL) { 1463 rfs4_dbe_unlock(fp->rf_dbe); 1464 return (FALSE); 1465 } 1466 /* 1467 * Does the requestor already own the delegation? 1468 */ 1469 if (dsp->rds_client->rc_clientid == *(cp)) { 1470 rfs4_dbe_unlock(fp->rf_dbe); 1471 return (FALSE); 1472 } 1473 } 1474 1475 rfs4_dbe_unlock(fp->rf_dbe); 1476 rfs4_recall_deleg(fp, trunc, NULL); 1477 1478 if (!do_delay) { 1479 rfs4_dbe_lock(fp->rf_dbe); 1480 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec(); 1481 rfs4_dbe_unlock(fp->rf_dbe); 1482 return (TRUE); 1483 } 1484 1485 delay(NFS4_DELEGATION_CONFLICT_DELAY); 1486 1487 rfs4_dbe_lock(fp->rf_dbe); 1488 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) { 1489 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec(); 1490 rfs4_dbe_unlock(fp->rf_dbe); 1491 return (TRUE); 1492 } 1493 } 1494 if (is_rm) 1495 fp->rf_dinfo.rd_hold_grant++; 1496 rfs4_dbe_unlock(fp->rf_dbe); 1497 return (FALSE); 1498 } 1499 1500 /* 1501 * Check if the file is delegated in the case of a v2 or v3 access. 1502 * Return TRUE if it is delegated which in turn means that v2 should 1503 * drop the request and in the case of v3 JUKEBOX should be returned. 1504 */ 1505 bool_t 1506 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) 1507 { 1508 rfs4_file_t *fp; 1509 bool_t create = FALSE; 1510 bool_t rc = FALSE; 1511 1512 rfs4_hold_deleg_policy(); 1513 1514 /* Is delegation enabled? */ 1515 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) { 1516 fp = rfs4_findfile(vp, NULL, &create); 1517 if (fp != NULL) { 1518 if (rfs4_check_delegated_byfp(mode, fp, trunc, 1519 TRUE, FALSE, NULL)) { 1520 rc = TRUE; 1521 } 1522 rfs4_file_rele(fp); 1523 } 1524 } 1525 rfs4_rele_deleg_policy(); 1526 return (rc); 1527 } 1528 1529 /* 1530 * Release a hold on the hold_grant counter which 1531 * prevents delegation from being granted while a remove 1532 * or a rename is in progress. 1533 */ 1534 void 1535 rfs4_clear_dont_grant(rfs4_file_t *fp) 1536 { 1537 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1538 return; 1539 rfs4_dbe_lock(fp->rf_dbe); 1540 ASSERT(fp->rf_dinfo.rd_hold_grant > 0); 1541 fp->rf_dinfo.rd_hold_grant--; 1542 fp->rf_dinfo.rd_time_rm_delayed = 0; 1543 rfs4_dbe_unlock(fp->rf_dbe); 1544 } 1545 1546 /* 1547 * State support for delegation. 1548 * Set the state delegation type for this state; 1549 * This routine is called from open via rfs4_grant_delegation and the entry 1550 * locks on sp and sp->rs_finfo are assumed. 1551 */ 1552 static rfs4_deleg_state_t * 1553 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall) 1554 { 1555 rfs4_file_t *fp = sp->rs_finfo; 1556 bool_t create = TRUE; 1557 rfs4_deleg_state_t *dsp; 1558 vnode_t *vp; 1559 int open_prev = *recall; 1560 int ret; 1561 int fflags = 0; 1562 1563 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 1564 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1565 1566 /* Shouldn't happen */ 1567 if (fp->rf_dinfo.rd_recall_count != 0 || 1568 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ && 1569 dtype != OPEN_DELEGATE_READ)) { 1570 return (NULL); 1571 } 1572 1573 /* Unlock to avoid deadlock */ 1574 rfs4_dbe_unlock(fp->rf_dbe); 1575 rfs4_dbe_unlock(sp->rs_dbe); 1576 1577 dsp = rfs4_finddeleg(sp, &create); 1578 1579 rfs4_dbe_lock(sp->rs_dbe); 1580 rfs4_dbe_lock(fp->rf_dbe); 1581 1582 if (dsp == NULL) 1583 return (NULL); 1584 1585 /* 1586 * It is possible that since we dropped the lock 1587 * in order to call finddeleg, the rfs4_file_t 1588 * was marked such that we should not grant a 1589 * delegation, if so bail out. 1590 */ 1591 if (fp->rf_dinfo.rd_hold_grant > 0) { 1592 rfs4_deleg_state_rele(dsp); 1593 return (NULL); 1594 } 1595 1596 if (create == FALSE) { 1597 if (sp->rs_owner->ro_client == dsp->rds_client && 1598 dsp->rds_dtype == dtype) { 1599 return (dsp); 1600 } else { 1601 rfs4_deleg_state_rele(dsp); 1602 return (NULL); 1603 } 1604 } 1605 1606 /* 1607 * Check that this file has not been delegated to another 1608 * client 1609 */ 1610 if (fp->rf_dinfo.rd_recall_count != 0 || 1611 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE || 1612 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ && 1613 dtype != OPEN_DELEGATE_READ)) { 1614 rfs4_deleg_state_rele(dsp); 1615 return (NULL); 1616 } 1617 1618 vp = fp->rf_vp; 1619 /* vnevent_support returns 0 if file system supports vnevents */ 1620 if (vnevent_support(vp, NULL)) { 1621 rfs4_deleg_state_rele(dsp); 1622 return (NULL); 1623 } 1624 1625 /* Calculate the fflags for this OPEN. */ 1626 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ) 1627 fflags |= FREAD; 1628 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE) 1629 fflags |= FWRITE; 1630 1631 *recall = 0; 1632 /* 1633 * Before granting a delegation we need to know if anyone else has 1634 * opened the file in a conflicting mode. However, first we need to 1635 * know how we opened the file to check the counts properly. 1636 */ 1637 if (dtype == OPEN_DELEGATE_READ) { 1638 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1639 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1640 vn_is_mapped(vp, V_WRITE)) { 1641 if (open_prev) { 1642 *recall = 1; 1643 } else { 1644 rfs4_deleg_state_rele(dsp); 1645 return (NULL); 1646 } 1647 } 1648 ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ, 1649 rfs4_mon_hold, rfs4_mon_rele); 1650 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1651 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1652 vn_is_mapped(vp, V_WRITE)) { 1653 if (open_prev) { 1654 *recall = 1; 1655 } else { 1656 (void) fem_uninstall(vp, deleg_rdops, 1657 (void *)fp); 1658 rfs4_deleg_state_rele(dsp); 1659 return (NULL); 1660 } 1661 } 1662 /* 1663 * Because a client can hold onto a delegation after the 1664 * file has been closed, we need to keep track of the 1665 * access to this file. Otherwise the CIFS server would 1666 * not know about the client accessing the file and could 1667 * inappropriately grant an OPLOCK. 1668 * fem_install() returns EBUSY when asked to install a 1669 * OPUNIQ monitor more than once. Therefore, check the 1670 * return code because we only want this done once. 1671 */ 1672 if (ret == 0) 1673 vn_open_upgrade(vp, FREAD); 1674 } else { /* WRITE */ 1675 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1676 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1677 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1678 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1679 vn_is_mapped(vp, V_RDORWR)) { 1680 if (open_prev) { 1681 *recall = 1; 1682 } else { 1683 rfs4_deleg_state_rele(dsp); 1684 return (NULL); 1685 } 1686 } 1687 ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ, 1688 rfs4_mon_hold, rfs4_mon_rele); 1689 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1690 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1691 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1692 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1693 vn_is_mapped(vp, V_RDORWR)) { 1694 if (open_prev) { 1695 *recall = 1; 1696 } else { 1697 (void) fem_uninstall(vp, deleg_wrops, 1698 (void *)fp); 1699 rfs4_deleg_state_rele(dsp); 1700 return (NULL); 1701 } 1702 } 1703 /* 1704 * Because a client can hold onto a delegation after the 1705 * file has been closed, we need to keep track of the 1706 * access to this file. Otherwise the CIFS server would 1707 * not know about the client accessing the file and could 1708 * inappropriately grant an OPLOCK. 1709 * fem_install() returns EBUSY when asked to install a 1710 * OPUNIQ monitor more than once. Therefore, check the 1711 * return code because we only want this done once. 1712 */ 1713 if (ret == 0) 1714 vn_open_upgrade(vp, FREAD|FWRITE); 1715 } 1716 /* Place on delegation list for file */ 1717 ASSERT(!list_link_active(&dsp->rds_node)); 1718 list_insert_tail(&fp->rf_delegstatelist, dsp); 1719 1720 dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype; 1721 1722 /* Update delegation stats for this file */ 1723 fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec(); 1724 1725 /* reset since this is a new delegation */ 1726 fp->rf_dinfo.rd_conflicted_client = 0; 1727 fp->rf_dinfo.rd_ever_recalled = FALSE; 1728 1729 if (dtype == OPEN_DELEGATE_READ) 1730 fp->rf_dinfo.rd_rdgrants++; 1731 else 1732 fp->rf_dinfo.rd_wrgrants++; 1733 1734 return (dsp); 1735 } 1736 1737 /* 1738 * State routine for the server when a delegation is returned. 1739 */ 1740 void 1741 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked) 1742 { 1743 rfs4_file_t *fp = dsp->rds_finfo; 1744 open_delegation_type4 dtypewas; 1745 1746 rfs4_dbe_lock(fp->rf_dbe); 1747 1748 /* nothing to do if no longer on list */ 1749 if (!list_link_active(&dsp->rds_node)) { 1750 rfs4_dbe_unlock(fp->rf_dbe); 1751 return; 1752 } 1753 1754 /* Remove state from recall list */ 1755 list_remove(&fp->rf_delegstatelist, dsp); 1756 1757 if (list_is_empty(&fp->rf_delegstatelist)) { 1758 dtypewas = fp->rf_dinfo.rd_dtype; 1759 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE; 1760 rfs4_dbe_cv_broadcast(fp->rf_dbe); 1761 1762 /* if file system was unshared, the vp will be NULL */ 1763 if (fp->rf_vp != NULL) { 1764 /* 1765 * Once a delegation is no longer held by any client, 1766 * the monitor is uninstalled. At this point, the 1767 * client must send OPEN otw, so we don't need the 1768 * reference on the vnode anymore. The open 1769 * downgrade removes the reference put on earlier. 1770 */ 1771 if (dtypewas == OPEN_DELEGATE_READ) { 1772 (void) fem_uninstall(fp->rf_vp, deleg_rdops, 1773 (void *)fp); 1774 vn_open_downgrade(fp->rf_vp, FREAD); 1775 } else if (dtypewas == OPEN_DELEGATE_WRITE) { 1776 (void) fem_uninstall(fp->rf_vp, deleg_wrops, 1777 (void *)fp); 1778 vn_open_downgrade(fp->rf_vp, FREAD|FWRITE); 1779 } 1780 } 1781 } 1782 1783 switch (dsp->rds_dtype) { 1784 case OPEN_DELEGATE_READ: 1785 fp->rf_dinfo.rd_rdgrants--; 1786 break; 1787 case OPEN_DELEGATE_WRITE: 1788 fp->rf_dinfo.rd_wrgrants--; 1789 break; 1790 default: 1791 break; 1792 } 1793 1794 /* used in the policy decision */ 1795 fp->rf_dinfo.rd_time_returned = gethrestime_sec(); 1796 1797 /* 1798 * reset the time_recalled field so future delegations are not 1799 * accidentally revoked 1800 */ 1801 if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0) 1802 fp->rf_dinfo.rd_time_recalled = 0; 1803 1804 rfs4_dbe_unlock(fp->rf_dbe); 1805 1806 rfs4_dbe_lock(dsp->rds_dbe); 1807 1808 dsp->rds_dtype = OPEN_DELEGATE_NONE; 1809 1810 if (revoked == TRUE) 1811 dsp->rds_time_revoked = gethrestime_sec(); 1812 1813 rfs4_dbe_invalidate(dsp->rds_dbe); 1814 1815 rfs4_dbe_unlock(dsp->rds_dbe); 1816 1817 if (revoked == TRUE) { 1818 rfs4_dbe_lock(dsp->rds_client->rc_dbe); 1819 dsp->rds_client->rc_deleg_revoked++; /* observability */ 1820 rfs4_dbe_unlock(dsp->rds_client->rc_dbe); 1821 } 1822 } 1823 1824 static void 1825 rfs4_revoke_file(rfs4_file_t *fp) 1826 { 1827 rfs4_deleg_state_t *dsp; 1828 1829 /* 1830 * The lock for rfs4_file_t must be held when traversing the 1831 * delegation list but that lock needs to be released to call 1832 * rfs4_return_deleg() 1833 */ 1834 rfs4_dbe_lock(fp->rf_dbe); 1835 while (dsp = list_head(&fp->rf_delegstatelist)) { 1836 rfs4_dbe_hold(dsp->rds_dbe); 1837 rfs4_dbe_unlock(fp->rf_dbe); 1838 rfs4_return_deleg(dsp, TRUE); 1839 rfs4_deleg_state_rele(dsp); 1840 rfs4_dbe_lock(fp->rf_dbe); 1841 } 1842 rfs4_dbe_unlock(fp->rf_dbe); 1843 } 1844 1845 /* 1846 * A delegation is assumed to be present on the file associated with 1847 * "sp". Check to see if the delegation matches is associated with 1848 * the same client as referenced by "sp". If it is not, TRUE is 1849 * returned. If the delegation DOES match the client (or no 1850 * delegation is present), return FALSE. 1851 * Assume the state entry and file entry are locked. 1852 */ 1853 bool_t 1854 rfs4_is_deleg(rfs4_state_t *sp) 1855 { 1856 rfs4_deleg_state_t *dsp; 1857 rfs4_file_t *fp = sp->rs_finfo; 1858 rfs4_client_t *cp = sp->rs_owner->ro_client; 1859 1860 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1861 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 1862 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 1863 if (cp != dsp->rds_client) { 1864 return (TRUE); 1865 } 1866 } 1867 return (FALSE); 1868 } 1869 1870 void 1871 rfs4_disable_delegation(void) 1872 { 1873 mutex_enter(&rfs4_deleg_lock); 1874 rfs4_deleg_disabled++; 1875 mutex_exit(&rfs4_deleg_lock); 1876 } 1877 1878 void 1879 rfs4_enable_delegation(void) 1880 { 1881 mutex_enter(&rfs4_deleg_lock); 1882 ASSERT(rfs4_deleg_disabled > 0); 1883 rfs4_deleg_disabled--; 1884 mutex_exit(&rfs4_deleg_lock); 1885 } 1886 1887 void 1888 rfs4_mon_hold(void *arg) 1889 { 1890 rfs4_file_t *fp = arg; 1891 1892 rfs4_dbe_hold(fp->rf_dbe); 1893 } 1894 1895 void 1896 rfs4_mon_rele(void *arg) 1897 { 1898 rfs4_file_t *fp = arg; 1899 1900 rfs4_dbe_rele_nolock(fp->rf_dbe); 1901 }