1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/systm.h>
  27 #include <rpc/auth.h>
  28 #include <rpc/clnt.h>
  29 #include <nfs/nfs4_kprot.h>
  30 #include <nfs/nfs4.h>
  31 #include <nfs/lm.h>
  32 #include <sys/cmn_err.h>
  33 #include <sys/disp.h>
  34 #include <sys/sdt.h>
  35 
  36 #include <sys/pathname.h>
  37 
  38 #include <sys/strsubr.h>
  39 #include <sys/ddi.h>
  40 
  41 #include <sys/vnode.h>
  42 #include <sys/sdt.h>
  43 #include <inet/common.h>
  44 #include <inet/ip.h>
  45 #include <inet/ip6.h>
  46 
  47 #define MAX_READ_DELEGATIONS 5
  48 
  49 krwlock_t rfs4_deleg_policy_lock;
  50 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE;
  51 static int rfs4_deleg_wlp = 5;
  52 kmutex_t rfs4_deleg_lock;
  53 static int rfs4_deleg_disabled;
  54 static int rfs4_max_setup_cb_tries = 5;
  55 
  56 #ifdef DEBUG
  57 
  58 static int rfs4_test_cbgetattr_fail = 0;
  59 int rfs4_cb_null;
  60 int rfs4_cb_debug;
  61 int rfs4_deleg_debug;
  62 
  63 #endif
  64 
  65 static void rfs4_recall_file(rfs4_file_t *,
  66     void (*recall)(rfs4_deleg_state_t *, bool_t),
  67     bool_t, rfs4_client_t *);
  68 static  void            rfs4_revoke_file(rfs4_file_t *);
  69 static  void            rfs4_cb_chflush(rfs4_cbinfo_t *);
  70 static  CLIENT          *rfs4_cb_getch(rfs4_cbinfo_t *);
  71 static  void            rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
  72 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
  73     open_delegation_type4, int *);
  74 
  75 /*
  76  * Convert a universal address to an transport specific
  77  * address using inet_pton.
  78  */
  79 static int
  80 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
  81 {
  82         int dots = 0, i, j, len, k;
  83         unsigned char c;
  84         in_port_t port = 0;
  85 
  86         len = strlen(ua);
  87 
  88         for (i = len-1; i >= 0; i--) {
  89 
  90                 if (ua[i] == '.')
  91                         dots++;
  92 
  93                 if (dots == 2) {
  94 
  95                         ua[i] = '\0';
  96                         /*
  97                          * We use k to remember were to stick '.' back, since
  98                          * ua was kmem_allocateded from the pool len+1.
  99                          */
 100                         k = i;
 101                         if (inet_pton(af, ua, ap) == 1) {
 102 
 103                                 c = 0;
 104 
 105                                 for (j = i+1; j < len; j++) {
 106                                         if (ua[j] == '.') {
 107                                                 port = c << 8;
 108                                                 c = 0;
 109                                         } else if (ua[j] >= '0' &&
 110                                             ua[j] <= '9') {
 111                                                 c *= 10;
 112                                                 c += ua[j] - '0';
 113                                         } else {
 114                                                 ua[k] = '.';
 115                                                 return (EINVAL);
 116                                         }
 117                                 }
 118                                 port += c;
 119 
 120 
 121                                 /* reset to network order */
 122                                 if (af == AF_INET) {
 123                                         *(uint32_t *)ap =
 124                                             htonl(*(uint32_t *)ap);
 125                                         *pp = htons(port);
 126                                 } else {
 127                                         int ix;
 128                                         uint16_t *sap;
 129 
 130                                         for (sap = ap, ix = 0; ix <
 131                                             sizeof (struct in6_addr) /
 132                                             sizeof (uint16_t); ix++)
 133                                                 sap[ix] = htons(sap[ix]);
 134 
 135                                         *pp = htons(port);
 136                                 }
 137 
 138                                 ua[k] = '.';
 139                                 return (0);
 140                         } else {
 141                                 ua[k] = '.';
 142                                 return (EINVAL);
 143                         }
 144                 }
 145         }
 146 
 147         return (EINVAL);
 148 }
 149 
 150 /*
 151  * Update the delegation policy with the
 152  * value of "new_policy"
 153  */
 154 void
 155 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy)
 156 {
 157         rw_enter(&rfs4_deleg_policy_lock, RW_WRITER);
 158         rfs4_deleg_policy = new_policy;
 159         rw_exit(&rfs4_deleg_policy_lock);
 160 }
 161 
 162 void
 163 rfs4_hold_deleg_policy(void)
 164 {
 165         rw_enter(&rfs4_deleg_policy_lock, RW_READER);
 166 }
 167 
 168 void
 169 rfs4_rele_deleg_policy(void)
 170 {
 171         rw_exit(&rfs4_deleg_policy_lock);
 172 }
 173 
 174 
 175 /*
 176  * This free function is to be used when the client struct is being
 177  * released and nothing at all is needed of the callback info any
 178  * longer.
 179  */
 180 void
 181 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
 182 {
 183         char *addr = cbp->cb_callback.cb_location.r_addr;
 184         char *netid = cbp->cb_callback.cb_location.r_netid;
 185 
 186         /* Free old address if any */
 187 
 188         if (addr)
 189                 kmem_free(addr, strlen(addr) + 1);
 190         if (netid)
 191                 kmem_free(netid, strlen(netid) + 1);
 192 
 193         addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
 194         netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
 195 
 196         if (addr)
 197                 kmem_free(addr, strlen(addr) + 1);
 198         if (netid)
 199                 kmem_free(netid, strlen(netid) + 1);
 200 
 201         if (cbp->cb_chc_free) {
 202                 rfs4_cb_chflush(cbp);
 203         }
 204 }
 205 
 206 /*
 207  * The server uses this to check the callback path supplied by the
 208  * client.  The callback connection is marked "in progress" while this
 209  * work is going on and then eventually marked either OK or FAILED.
 210  * This work can be done as part of a separate thread and at the end
 211  * of this the thread will exit or it may be done such that the caller
 212  * will continue with other work.
 213  */
 214 static void
 215 rfs4_do_cb_null(rfs4_client_t *cp)
 216 {
 217         struct timeval tv;
 218         CLIENT *ch;
 219         rfs4_cbstate_t newstate;
 220         rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 221 
 222         mutex_enter(cbp->cb_lock);
 223         /* If another thread is doing CB_NULL RPC then return */
 224         if (cbp->cb_nullcaller == TRUE) {
 225                 mutex_exit(cbp->cb_lock);
 226                 rfs4_client_rele(cp);
 227                 return;
 228         }
 229 
 230         /* Mark the cbinfo as having a thread in the NULL callback */
 231         cbp->cb_nullcaller = TRUE;
 232 
 233         /*
 234          * Are there other threads still using the cbinfo client
 235          * handles?  If so, this thread must wait before going and
 236          * mucking aroiund with the callback information
 237          */
 238         while (cbp->cb_refcnt != 0)
 239                 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
 240 
 241         /*
 242          * This thread itself may find that new callback info has
 243          * arrived and is set up to handle this case and redrive the
 244          * call to the client's callback server.
 245          */
 246 retry:
 247         if (cbp->cb_newer.cb_new == TRUE &&
 248             cbp->cb_newer.cb_confirmed == TRUE) {
 249                 char *addr = cbp->cb_callback.cb_location.r_addr;
 250                 char *netid = cbp->cb_callback.cb_location.r_netid;
 251 
 252                 /*
 253                  * Free the old stuff if it exists; may be the first
 254                  * time through this path
 255                  */
 256                 if (addr)
 257                         kmem_free(addr, strlen(addr) + 1);
 258                 if (netid)
 259                         kmem_free(netid, strlen(netid) + 1);
 260 
 261                 /* Move over the addr/netid */
 262                 cbp->cb_callback.cb_location.r_addr =
 263                     cbp->cb_newer.cb_callback.cb_location.r_addr;
 264                 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
 265                 cbp->cb_callback.cb_location.r_netid =
 266                     cbp->cb_newer.cb_callback.cb_location.r_netid;
 267                 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
 268 
 269                 /* Get the program number */
 270                 cbp->cb_callback.cb_program =
 271                     cbp->cb_newer.cb_callback.cb_program;
 272                 cbp->cb_newer.cb_callback.cb_program = 0;
 273 
 274                 /* Don't forget the protocol's "cb_ident" field */
 275                 cbp->cb_ident = cbp->cb_newer.cb_ident;
 276                 cbp->cb_newer.cb_ident = 0;
 277 
 278                 /* no longer new */
 279                 cbp->cb_newer.cb_new = FALSE;
 280                 cbp->cb_newer.cb_confirmed = FALSE;
 281 
 282                 /* get rid of the old client handles that may exist */
 283                 rfs4_cb_chflush(cbp);
 284 
 285                 cbp->cb_state = CB_NONE;
 286                 cbp->cb_timefailed = 0; /* reset the clock */
 287                 cbp->cb_notified_of_cb_path_down = TRUE;
 288         }
 289 
 290         if (cbp->cb_state != CB_NONE) {
 291                 cv_broadcast(cbp->cb_cv);    /* let the others know */
 292                 cbp->cb_nullcaller = FALSE;
 293                 mutex_exit(cbp->cb_lock);
 294                 rfs4_client_rele(cp);
 295                 return;
 296         }
 297 
 298         /* mark rfs4_client_t as CALLBACK NULL in progress */
 299         cbp->cb_state = CB_INPROG;
 300         mutex_exit(cbp->cb_lock);
 301 
 302         /* get/generate a client handle */
 303         if ((ch = rfs4_cb_getch(cbp)) == NULL) {
 304                 mutex_enter(cbp->cb_lock);
 305                 cbp->cb_state = CB_BAD;
 306                 cbp->cb_timefailed = gethrestime_sec(); /* observability */
 307                 goto retry;
 308         }
 309 
 310 
 311         tv.tv_sec = 30;
 312         tv.tv_usec = 0;
 313         if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
 314                 newstate = CB_BAD;
 315         } else {
 316                 newstate = CB_OK;
 317 #ifdef  DEBUG
 318                 rfs4_cb_null++;
 319 #endif
 320         }
 321 
 322         /* Check to see if the client has specified new callback info */
 323         mutex_enter(cbp->cb_lock);
 324         rfs4_cb_freech(cbp, ch, TRUE);
 325         if (cbp->cb_newer.cb_new == TRUE &&
 326             cbp->cb_newer.cb_confirmed == TRUE) {
 327                 goto retry;     /* give the CB_NULL another chance */
 328         }
 329 
 330         cbp->cb_state = newstate;
 331         if (cbp->cb_state == CB_BAD)
 332                 cbp->cb_timefailed = gethrestime_sec(); /* observability */
 333 
 334         cv_broadcast(cbp->cb_cv);    /* start up the other threads */
 335         cbp->cb_nullcaller = FALSE;
 336         mutex_exit(cbp->cb_lock);
 337 
 338         rfs4_client_rele(cp);
 339 }
 340 
 341 /*
 342  * Given a client struct, inspect the callback info to see if the
 343  * callback path is up and available.
 344  *
 345  * If new callback path is available and no one has set it up then
 346  * try to set it up. If setup is not successful after 5 tries (5 secs)
 347  * then gives up and returns NULL.
 348  *
 349  * If callback path is being initialized, then wait for the CB_NULL RPC
 350  * call to occur.
 351  */
 352 static rfs4_cbinfo_t *
 353 rfs4_cbinfo_hold(rfs4_client_t *cp)
 354 {
 355         rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 356         int retries = 0;
 357 
 358         mutex_enter(cbp->cb_lock);
 359 
 360         while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
 361                 /*
 362                  * Looks like a new callback path may be available and
 363                  * noone has set it up.
 364                  */
 365                 mutex_exit(cbp->cb_lock);
 366                 rfs4_dbe_hold(cp->rc_dbe);
 367                 rfs4_do_cb_null(cp); /* caller will release client hold */
 368 
 369                 mutex_enter(cbp->cb_lock);
 370                 /*
 371                  * If callback path is no longer new, or it's being setup
 372                  * then stop and wait for it to be done.
 373                  */
 374                 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
 375                         break;
 376                 mutex_exit(cbp->cb_lock);
 377 
 378                 if (++retries >= rfs4_max_setup_cb_tries)
 379                         return (NULL);
 380                 delay(hz);
 381                 mutex_enter(cbp->cb_lock);
 382         }
 383 
 384         /* Is there a thread working on doing the CB_NULL RPC? */
 385         if (cbp->cb_nullcaller == TRUE)
 386                 cv_wait(cbp->cb_cv, cbp->cb_lock);  /* if so, wait on it */
 387 
 388         /* If the callback path is not okay (up and running), just quit */
 389         if (cbp->cb_state != CB_OK) {
 390                 mutex_exit(cbp->cb_lock);
 391                 return (NULL);
 392         }
 393 
 394         /* Let someone know we are using the current callback info */
 395         cbp->cb_refcnt++;
 396         mutex_exit(cbp->cb_lock);
 397         return (cbp);
 398 }
 399 
 400 /*
 401  * The caller is done with the callback info.  It may be that the
 402  * caller's RPC failed and the NFSv4 client has actually provided new
 403  * callback information.  If so, let the caller know so they can
 404  * advantage of this and maybe retry the RPC that originally failed.
 405  */
 406 static int
 407 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
 408 {
 409         int cb_new = FALSE;
 410 
 411         mutex_enter(cbp->cb_lock);
 412 
 413         /* The caller gets a chance to mark the callback info as bad */
 414         if (newstate != CB_NOCHANGE)
 415                 cbp->cb_state = newstate;
 416         if (newstate == CB_FAILED) {
 417                 cbp->cb_timefailed = gethrestime_sec(); /* observability */
 418                 cbp->cb_notified_of_cb_path_down = FALSE;
 419         }
 420 
 421         cbp->cb_refcnt--;    /* no longer using the information */
 422 
 423         /*
 424          * A thread may be waiting on this one to finish and if so,
 425          * let it know that it is okay to do the CB_NULL to the
 426          * client's callback server.
 427          */
 428         if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
 429                 cv_broadcast(cbp->cb_cv_nullcaller);
 430 
 431         /*
 432          * If this is the last thread to use the callback info and
 433          * there is new callback information to try and no thread is
 434          * there ready to do the CB_NULL, then return true to teh
 435          * caller so they can do the CB_NULL
 436          */
 437         if (cbp->cb_refcnt == 0 &&
 438             cbp->cb_nullcaller == FALSE &&
 439             cbp->cb_newer.cb_new == TRUE &&
 440             cbp->cb_newer.cb_confirmed == TRUE)
 441                 cb_new = TRUE;
 442 
 443         mutex_exit(cbp->cb_lock);
 444 
 445         return (cb_new);
 446 }
 447 
 448 /*
 449  * Given the information in the callback info struct, create a client
 450  * handle that can be used by the server for its callback path.
 451  */
 452 static CLIENT *
 453 rfs4_cbch_init(rfs4_cbinfo_t *cbp)
 454 {
 455         struct knetconfig knc;
 456         vnode_t *vp;
 457         struct sockaddr_in addr4;
 458         struct sockaddr_in6 addr6;
 459         void *addr, *taddr;
 460         in_port_t *pp;
 461         int af;
 462         char *devnam;
 463         struct netbuf nb;
 464         int size;
 465         CLIENT *ch = NULL;
 466         int useresvport = 0;
 467 
 468         mutex_enter(cbp->cb_lock);
 469 
 470         if (cbp->cb_callback.cb_location.r_netid == NULL ||
 471             cbp->cb_callback.cb_location.r_addr == NULL) {
 472                 goto cb_init_out;
 473         }
 474 
 475         if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
 476                 knc.knc_semantics = NC_TPI_COTS;
 477                 knc.knc_protofmly = "inet";
 478                 knc.knc_proto = "tcp";
 479                 devnam = "/dev/tcp";
 480                 af = AF_INET;
 481         } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
 482             == 0) {
 483                 knc.knc_semantics = NC_TPI_CLTS;
 484                 knc.knc_protofmly = "inet";
 485                 knc.knc_proto = "udp";
 486                 devnam = "/dev/udp";
 487                 af = AF_INET;
 488         } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
 489             == 0) {
 490                 knc.knc_semantics = NC_TPI_COTS;
 491                 knc.knc_protofmly = "inet6";
 492                 knc.knc_proto = "tcp";
 493                 devnam = "/dev/tcp6";
 494                 af = AF_INET6;
 495         } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
 496             == 0) {
 497                 knc.knc_semantics = NC_TPI_CLTS;
 498                 knc.knc_protofmly = "inet6";
 499                 knc.knc_proto = "udp";
 500                 devnam = "/dev/udp6";
 501                 af = AF_INET6;
 502         } else {
 503                 goto cb_init_out;
 504         }
 505 
 506         if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
 507 
 508                 goto cb_init_out;
 509         }
 510 
 511         if (vp->v_type != VCHR) {
 512                 VN_RELE(vp);
 513                 goto cb_init_out;
 514         }
 515 
 516         knc.knc_rdev = vp->v_rdev;
 517 
 518         VN_RELE(vp);
 519 
 520         if (af == AF_INET) {
 521                 size = sizeof (addr4);
 522                 bzero(&addr4, size);
 523                 addr4.sin_family = (sa_family_t)af;
 524                 addr = &addr4.sin_addr;
 525                 pp = &addr4.sin_port;
 526                 taddr = &addr4;
 527         } else /* AF_INET6 */ {
 528                 size = sizeof (addr6);
 529                 bzero(&addr6, size);
 530                 addr6.sin6_family = (sa_family_t)af;
 531                 addr = &addr6.sin6_addr;
 532                 pp = &addr6.sin6_port;
 533                 taddr = &addr6;
 534         }
 535 
 536         if (uaddr2sockaddr(af,
 537             cbp->cb_callback.cb_location.r_addr, addr, pp)) {
 538 
 539                 goto cb_init_out;
 540         }
 541 
 542 
 543         nb.maxlen = nb.len = size;
 544         nb.buf = (char *)taddr;
 545 
 546         if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
 547             NFS_CB, 0, 0, curthread->t_cred, &ch)) {
 548 
 549                 ch = NULL;
 550         }
 551 
 552         /* turn off reserved port usage */
 553         (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
 554 
 555 cb_init_out:
 556         mutex_exit(cbp->cb_lock);
 557         return (ch);
 558 }
 559 
 560 /*
 561  * Iterate over the client handle cache and
 562  * destroy it.
 563  */
 564 static void
 565 rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
 566 {
 567         CLIENT *ch;
 568 
 569         while (cbp->cb_chc_free) {
 570                 cbp->cb_chc_free--;
 571                 ch = cbp->cb_chc[cbp->cb_chc_free];
 572                 cbp->cb_chc[cbp->cb_chc_free] = NULL;
 573                 if (ch) {
 574                         if (ch->cl_auth)
 575                                 auth_destroy(ch->cl_auth);
 576                         clnt_destroy(ch);
 577                 }
 578         }
 579 }
 580 
 581 /*
 582  * Return a client handle, either from a the small
 583  * rfs4_client_t cache or one that we just created.
 584  */
 585 static CLIENT *
 586 rfs4_cb_getch(rfs4_cbinfo_t *cbp)
 587 {
 588         CLIENT *cbch = NULL;
 589         uint32_t zilch = 0;
 590 
 591         mutex_enter(cbp->cb_lock);
 592 
 593         if (cbp->cb_chc_free) {
 594                 cbp->cb_chc_free--;
 595                 cbch = cbp->cb_chc[ cbp->cb_chc_free ];
 596                 mutex_exit(cbp->cb_lock);
 597                 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
 598                 return (cbch);
 599         }
 600 
 601         mutex_exit(cbp->cb_lock);
 602 
 603         /* none free so make it now */
 604         cbch = rfs4_cbch_init(cbp);
 605 
 606         return (cbch);
 607 }
 608 
 609 /*
 610  * Return the client handle to the small cache or
 611  * destroy it.
 612  */
 613 static void
 614 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
 615 {
 616         if (lockheld == FALSE)
 617                 mutex_enter(cbp->cb_lock);
 618 
 619         if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
 620                 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
 621                 if (lockheld == FALSE)
 622                         mutex_exit(cbp->cb_lock);
 623                 return;
 624         }
 625         if (lockheld == FALSE)
 626                 mutex_exit(cbp->cb_lock);
 627 
 628         /*
 629          * cache maxed out of free entries, obliterate
 630          * this client handle, destroy it, throw it away.
 631          */
 632         if (ch->cl_auth)
 633                 auth_destroy(ch->cl_auth);
 634         clnt_destroy(ch);
 635 }
 636 
 637 /*
 638  * With the supplied callback information - initialize the client
 639  * callback data.  If there is a callback in progress, save the
 640  * callback info so that a thread can pick it up in the future.
 641  */
 642 void
 643 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
 644 {
 645         char *addr = NULL;
 646         char *netid = NULL;
 647         rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 648         size_t len;
 649 
 650         /* Set the call back for the client */
 651         if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
 652             cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
 653                 len = strlen(cb->cb_location.r_addr) + 1;
 654                 addr = kmem_alloc(len, KM_SLEEP);
 655                 bcopy(cb->cb_location.r_addr, addr, len);
 656                 len = strlen(cb->cb_location.r_netid) + 1;
 657                 netid = kmem_alloc(len, KM_SLEEP);
 658                 bcopy(cb->cb_location.r_netid, netid, len);
 659         }
 660         /* ready to save the new information but first free old, if exists */
 661         mutex_enter(cbp->cb_lock);
 662 
 663         cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
 664 
 665         if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
 666                 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
 667                     strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
 668         cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
 669 
 670         if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
 671                 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
 672                     strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
 673         cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
 674 
 675         cbp->cb_newer.cb_ident = cb_ident;
 676 
 677         if (addr && *addr && netid && *netid) {
 678                 cbp->cb_newer.cb_new = TRUE;
 679                 cbp->cb_newer.cb_confirmed = FALSE;
 680         } else {
 681                 cbp->cb_newer.cb_new = FALSE;
 682                 cbp->cb_newer.cb_confirmed = FALSE;
 683         }
 684 
 685         mutex_exit(cbp->cb_lock);
 686 }
 687 
 688 /*
 689  * The server uses this when processing SETCLIENTID_CONFIRM.  Callback
 690  * information may have been provided on SETCLIENTID and this call
 691  * marks that information as confirmed and then starts a thread to
 692  * test the callback path.
 693  */
 694 void
 695 rfs4_deleg_cb_check(rfs4_client_t *cp)
 696 {
 697         if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
 698                 return;
 699 
 700         cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
 701 
 702         rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
 703 
 704         (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN,
 705             minclsyspri);
 706 }
 707 
 708 static void
 709 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
 710 {
 711         CB_RECALL4args  *rec_argp;
 712 
 713         rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
 714         if (rec_argp->fh.nfs_fh4_val)
 715                 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
 716 }
 717 
 718 /* ARGSUSED */
 719 static void
 720 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
 721 {
 722         CB_GETATTR4args *argp;
 723 
 724         argp = &argop->nfs_cb_argop4_u.opcbgetattr;
 725         if (argp->fh.nfs_fh4_val)
 726                 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
 727 }
 728 
 729 static void
 730 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
 731 {
 732         int i, arglen;
 733         nfs_cb_argop4 *argop;
 734 
 735         /*
 736          * First free any special args alloc'd for specific ops.
 737          */
 738         arglen = args->array_len;
 739         argop = args->array;
 740         for (i = 0; i < arglen; i++, argop++) {
 741 
 742                 switch (argop->argop) {
 743                 case OP_CB_RECALL:
 744                         rfs4args_cb_recall_free(argop);
 745                         break;
 746 
 747                 case OP_CB_GETATTR:
 748                         rfs4args_cb_getattr_free(argop);
 749                         break;
 750 
 751                 default:
 752                         return;
 753                 }
 754         }
 755 
 756         if (args->tag.utf8string_len > 0)
 757                 UTF8STRING_FREE(args->tag)
 758 
 759         kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
 760         if (resp)
 761                 (void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
 762 }
 763 
 764 /*
 765  * General callback routine for the server to the client.
 766  */
 767 static enum clnt_stat
 768 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
 769     CB_COMPOUND4res *res, struct timeval timeout)
 770 {
 771         rfs4_cbinfo_t *cbp;
 772         CLIENT *ch;
 773         /* start with this in case cb_getch() fails */
 774         enum clnt_stat  stat = RPC_FAILED;
 775 
 776         res->tag.utf8string_val = NULL;
 777         res->array = NULL;
 778 
 779 retry:
 780         cbp = rfs4_cbinfo_hold(cp);
 781         if (cbp == NULL)
 782                 return (stat);
 783 
 784         /* get a client handle */
 785         if ((ch = rfs4_cb_getch(cbp)) != NULL) {
 786                 /*
 787                  * reset the cb_ident since it may have changed in
 788                  * rfs4_cbinfo_hold()
 789                  */
 790                 args->callback_ident = cbp->cb_ident;
 791 
 792                 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
 793                     (caddr_t)args, xdr_CB_COMPOUND4res,
 794                     (caddr_t)res, timeout);
 795 
 796                 /* free client handle */
 797                 rfs4_cb_freech(cbp, ch, FALSE);
 798         }
 799 
 800         /*
 801          * If the rele says that there may be new callback info then
 802          * retry this sequence and it may succeed as a result of the
 803          * new callback path
 804          */
 805         if (rfs4_cbinfo_rele(cbp,
 806             (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
 807                 goto retry;
 808 
 809         return (stat);
 810 }
 811 
 812 /*
 813  * Used by the NFSv4 server to get attributes for a file while
 814  * handling the case where a file has been write delegated.  For the
 815  * time being, VOP_GETATTR() is called and CB_GETATTR processing is
 816  * not undertaken.  This call site is maintained in case the server is
 817  * updated in the future to handle write delegation space guarantees.
 818  */
 819 nfsstat4
 820 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
 821 {
 822 
 823         int error;
 824 
 825         error = VOP_GETATTR(vp, vap, flag, cr, NULL);
 826         return (puterrno4(error));
 827 }
 828 
 829 /*
 830  * This is used everywhere in the v2/v3 server to allow the
 831  * integration of all NFS versions and the support of delegation.  For
 832  * now, just call the VOP_GETATTR().  If the NFSv4 server is enhanced
 833  * in the future to provide space guarantees for write delegations
 834  * then this call site should be expanded to interact with the client.
 835  */
 836 int
 837 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
 838 {
 839         return (VOP_GETATTR(vp, vap, flag, cr, NULL));
 840 }
 841 
 842 /*
 843  * Place the actual cb_recall otw call to client.
 844  */
 845 static void
 846 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
 847 {
 848         CB_COMPOUND4args        cb4_args;
 849         CB_COMPOUND4res         cb4_res;
 850         CB_RECALL4args          *rec_argp;
 851         CB_RECALL4res           *rec_resp;
 852         nfs_cb_argop4           *argop;
 853         int                     numops;
 854         int                     argoplist_size;
 855         struct timeval          timeout;
 856         nfs_fh4                 *fhp;
 857         enum clnt_stat          call_stat;
 858 
 859         /*
 860          * set up the compound args
 861          */
 862         numops = 1;     /* CB_RECALL only */
 863 
 864         argoplist_size = numops * sizeof (nfs_cb_argop4);
 865         argop = kmem_zalloc(argoplist_size, KM_SLEEP);
 866         argop->argop = OP_CB_RECALL;
 867         rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
 868 
 869         (void) str_to_utf8("cb_recall", &cb4_args.tag);
 870         cb4_args.minorversion = CB4_MINORVERSION;
 871         /* cb4_args.callback_ident is set in rfs4_do_callback() */
 872         cb4_args.array_len = numops;
 873         cb4_args.array = argop;
 874 
 875         /*
 876          * fill in the args struct
 877          */
 878         bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
 879         rec_argp->truncate = trunc;
 880 
 881         fhp = &dsp->rds_finfo->rf_filehandle;
 882         rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
 883             fhp->nfs_fh4_len, KM_SLEEP);
 884         nfs_fh4_copy(fhp, &rec_argp->fh);
 885 
 886         /* Keep track of when we did this for observability */
 887         dsp->rds_time_recalled = gethrestime_sec();
 888 
 889         /*
 890          * Set up the timeout for the callback and make the actual call.
 891          * Timeout will be 80% of the lease period for this server.
 892          */
 893         timeout.tv_sec = (rfs4_lease_time * 80) / 100;
 894         timeout.tv_usec = 0;
 895 
 896         DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
 897             rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
 898 
 899         call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
 900             timeout);
 901 
 902         rec_resp = (cb4_res.array_len == 0) ? NULL :
 903             &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
 904 
 905         DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
 906             rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
 907 
 908         if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
 909                 rfs4_return_deleg(dsp, TRUE);
 910         }
 911 
 912         rfs4freeargres(&cb4_args, &cb4_res);
 913 }
 914 
 915 struct recall_arg {
 916         rfs4_deleg_state_t *dsp;
 917         void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
 918         bool_t trunc;
 919 };
 920 
 921 static void
 922 do_recall(struct recall_arg *arg)
 923 {
 924         rfs4_deleg_state_t *dsp = arg->dsp;
 925         rfs4_file_t *fp = dsp->rds_finfo;
 926         callb_cpr_t cpr_info;
 927         kmutex_t cpr_lock;
 928 
 929         mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
 930         CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
 931 
 932         /*
 933          * It is possible that before this thread starts
 934          * the client has send us a return_delegation, and
 935          * if that is the case we do not need to send the
 936          * recall callback.
 937          */
 938         if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
 939                 DTRACE_PROBE3(nfss__i__recall,
 940                     struct recall_arg *, arg,
 941                     struct rfs4_deleg_state_t *, dsp,
 942                     struct rfs4_file_t *, fp);
 943 
 944                 if (arg->recall)
 945                         (void) (*arg->recall)(dsp, arg->trunc);
 946         }
 947 
 948         mutex_enter(fp->rf_dinfo.rd_recall_lock);
 949         /*
 950          * Recall count may go negative if the parent thread that is
 951          * creating the individual callback threads does not modify
 952          * the recall_count field before the callback thread actually
 953          * gets a response from the CB_RECALL
 954          */
 955         fp->rf_dinfo.rd_recall_count--;
 956         if (fp->rf_dinfo.rd_recall_count == 0)
 957                 cv_signal(fp->rf_dinfo.rd_recall_cv);
 958         mutex_exit(fp->rf_dinfo.rd_recall_lock);
 959 
 960         mutex_enter(&cpr_lock);
 961         CALLB_CPR_EXIT(&cpr_info);
 962         mutex_destroy(&cpr_lock);
 963 
 964         rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
 965 
 966         kmem_free(arg, sizeof (struct recall_arg));
 967 }
 968 
 969 struct master_recall_args {
 970     rfs4_file_t *fp;
 971     void (*recall)(rfs4_deleg_state_t *, bool_t);
 972     bool_t trunc;
 973 };
 974 
 975 static void
 976 do_recall_file(struct master_recall_args *map)
 977 {
 978         rfs4_file_t *fp = map->fp;
 979         rfs4_deleg_state_t *dsp;
 980         struct recall_arg *arg;
 981         callb_cpr_t cpr_info;
 982         kmutex_t cpr_lock;
 983         int32_t recall_count;
 984 
 985         rfs4_dbe_lock(fp->rf_dbe);
 986 
 987         /* Recall already in progress ? */
 988         mutex_enter(fp->rf_dinfo.rd_recall_lock);
 989         if (fp->rf_dinfo.rd_recall_count != 0) {
 990                 mutex_exit(fp->rf_dinfo.rd_recall_lock);
 991                 rfs4_dbe_rele_nolock(fp->rf_dbe);
 992                 rfs4_dbe_unlock(fp->rf_dbe);
 993                 kmem_free(map, sizeof (struct master_recall_args));
 994                 return;
 995         }
 996 
 997         mutex_exit(fp->rf_dinfo.rd_recall_lock);
 998 
 999         mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
1000         CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile");
1001 
1002         recall_count = 0;
1003         for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1004             dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1005 
1006                 rfs4_dbe_lock(dsp->rds_dbe);
1007                 /*
1008                  * if this delegation state
1009                  * is being reaped skip it
1010                  */
1011                 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
1012                         rfs4_dbe_unlock(dsp->rds_dbe);
1013                         continue;
1014                 }
1015 
1016                 /* hold for receiving thread */
1017                 rfs4_dbe_hold(dsp->rds_dbe);
1018                 rfs4_dbe_unlock(dsp->rds_dbe);
1019 
1020                 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1021                 arg->recall = map->recall;
1022                 arg->trunc = map->trunc;
1023                 arg->dsp = dsp;
1024 
1025                 recall_count++;
1026 
1027                 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN,
1028                     minclsyspri);
1029         }
1030 
1031         rfs4_dbe_unlock(fp->rf_dbe);
1032 
1033         mutex_enter(fp->rf_dinfo.rd_recall_lock);
1034         /*
1035          * Recall count may go negative if the parent thread that is
1036          * creating the individual callback threads does not modify
1037          * the recall_count field before the callback thread actually
1038          * gets a response from the CB_RECALL
1039          */
1040         fp->rf_dinfo.rd_recall_count += recall_count;
1041         while (fp->rf_dinfo.rd_recall_count)
1042                 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1043 
1044         mutex_exit(fp->rf_dinfo.rd_recall_lock);
1045 
1046         DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1047         rfs4_file_rele(fp);
1048         kmem_free(map, sizeof (struct master_recall_args));
1049         mutex_enter(&cpr_lock);
1050         CALLB_CPR_EXIT(&cpr_info);
1051         mutex_destroy(&cpr_lock);
1052 }
1053 
1054 static void
1055 rfs4_recall_file(rfs4_file_t *fp,
1056     void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1057     bool_t trunc, rfs4_client_t *cp)
1058 {
1059         struct master_recall_args *args;
1060 
1061         rfs4_dbe_lock(fp->rf_dbe);
1062         if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1063                 rfs4_dbe_unlock(fp->rf_dbe);
1064                 return;
1065         }
1066         rfs4_dbe_hold(fp->rf_dbe);   /* hold for new thread */
1067 
1068         /*
1069          * Mark the time we started the recall processing.
1070          * If it has been previously recalled, do not reset the
1071          * timer since this is used for the revocation decision.
1072          */
1073         if (fp->rf_dinfo.rd_time_recalled == 0)
1074                 fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1075         fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1076         /* Client causing recall not always available */
1077         if (cp)
1078                 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1079 
1080         rfs4_dbe_unlock(fp->rf_dbe);
1081 
1082         args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1083         args->fp = fp;
1084         args->recall = recall;
1085         args->trunc = trunc;
1086 
1087         (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN,
1088             minclsyspri);
1089 }
1090 
1091 void
1092 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1093 {
1094         time_t elapsed1, elapsed2;
1095 
1096         if (fp->rf_dinfo.rd_time_recalled != 0) {
1097                 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1098                 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1099                 /* First check to see if a revocation should occur */
1100                 if (elapsed1 > rfs4_lease_time &&
1101                     elapsed2 > rfs4_lease_time) {
1102                         rfs4_revoke_file(fp);
1103                         return;
1104                 }
1105                 /*
1106                  * Next check to see if a recall should be done again
1107                  * so quickly.
1108                  */
1109                 if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
1110                         return;
1111         }
1112         rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
1113 }
1114 
1115 /*
1116  * rfs4_check_recall is called from rfs4_do_open to determine if the current
1117  * open conflicts with the delegation.
1118  * Return true if we need recall otherwise false.
1119  * Assumes entry locks for sp and sp->rs_finfo are held.
1120  */
1121 bool_t
1122 rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
1123 {
1124         open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
1125 
1126         switch (dtype) {
1127         case OPEN_DELEGATE_NONE:
1128                 /* Not currently delegated so there is nothing to do */
1129                 return (FALSE);
1130         case OPEN_DELEGATE_READ:
1131                 /*
1132                  * If the access is only asking for READ then there is
1133                  * no conflict and nothing to do.  If it is asking
1134                  * for write, then there will be conflict and the read
1135                  * delegation should be recalled.
1136                  */
1137                 if (access == OPEN4_SHARE_ACCESS_READ)
1138                         return (FALSE);
1139                 else
1140                         return (TRUE);
1141         case OPEN_DELEGATE_WRITE:
1142                 /* Check to see if this client has the delegation */
1143                 return (rfs4_is_deleg(sp));
1144         }
1145 
1146         return (FALSE);
1147 }
1148 
1149 /*
1150  * Return the "best" allowable delegation available given the current
1151  * delegation type and the desired access and deny modes on the file.
1152  * At the point that this routine is called we know that the access and
1153  * deny modes are consistent with the file modes.
1154  */
1155 static open_delegation_type4
1156 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
1157 {
1158         open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
1159         uint32_t access = sp->rs_share_access;
1160         uint32_t deny = sp->rs_share_deny;
1161         int readcnt = 0;
1162         int writecnt = 0;
1163 
1164         switch (dtype) {
1165         case OPEN_DELEGATE_NONE:
1166                 /*
1167                  * Determine if more than just this OPEN have the file
1168                  * open and if so, no delegation may be provided to
1169                  * the client.
1170                  */
1171                 if (access & OPEN4_SHARE_ACCESS_WRITE)
1172                         writecnt++;
1173                 if (access & OPEN4_SHARE_ACCESS_READ)
1174                         readcnt++;
1175 
1176                 if (fp->rf_access_read > readcnt ||
1177                     fp->rf_access_write > writecnt)
1178                         return (OPEN_DELEGATE_NONE);
1179 
1180                 /*
1181                  * If the client is going to write, or if the client
1182                  * has exclusive access, return a write delegation.
1183                  */
1184                 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1185                     (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
1186                         return (OPEN_DELEGATE_WRITE);
1187                 /*
1188                  * If we don't want to write or we've haven't denied read
1189                  * access to others, return a read delegation.
1190                  */
1191                 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
1192                     (deny & ~OPEN4_SHARE_DENY_READ))
1193                         return (OPEN_DELEGATE_READ);
1194 
1195                 /* Shouldn't get here */
1196                 return (OPEN_DELEGATE_NONE);
1197 
1198         case OPEN_DELEGATE_READ:
1199                 /*
1200                  * If the file is delegated for read but we wan't to
1201                  * write or deny others to read then we can't delegate
1202                  * the file. We shouldn't get here since the delegation should
1203                  * have been recalled already.
1204                  */
1205                 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1206                     (deny & OPEN4_SHARE_DENY_READ))
1207                         return (OPEN_DELEGATE_NONE);
1208                 return (OPEN_DELEGATE_READ);
1209 
1210         case OPEN_DELEGATE_WRITE:
1211                 return (OPEN_DELEGATE_WRITE);
1212         }
1213 
1214         /* Shouldn't get here */
1215         return (OPEN_DELEGATE_NONE);
1216 }
1217 
1218 /*
1219  * Given the desired delegation type and the "history" of the file
1220  * determine the actual delegation type to return.
1221  */
1222 static open_delegation_type4
1223 rfs4_delegation_policy(open_delegation_type4 dtype,
1224     rfs4_dinfo_t *dinfo, clientid4 cid)
1225 {
1226         time_t elapsed;
1227 
1228         if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1229                 return (OPEN_DELEGATE_NONE);
1230 
1231         /*
1232          * Has this file/delegation ever been recalled?  If not then
1233          * no further checks for a delegation race need to be done.
1234          * However if a recall has occurred, then check to see if a
1235          * client has caused its own delegation recall to occur.  If
1236          * not, then has a delegation for this file been returned
1237          * recently?  If so, then do not assign a new delegation to
1238          * avoid a "delegation race" between the original client and
1239          * the new/conflicting client.
1240          */
1241         if (dinfo->rd_ever_recalled == TRUE) {
1242                 if (dinfo->rd_conflicted_client != cid) {
1243                         elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1244                         if (elapsed < rfs4_lease_time)
1245                                 return (OPEN_DELEGATE_NONE);
1246                 }
1247         }
1248 
1249         /* Limit the number of read grants */
1250         if (dtype == OPEN_DELEGATE_READ &&
1251             dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1252                 return (OPEN_DELEGATE_NONE);
1253 
1254         /*
1255          * Should consider limiting total number of read/write
1256          * delegations the server will permit.
1257          */
1258 
1259         return (dtype);
1260 }
1261 
1262 /*
1263  * Try and grant a delegation for an open give the state. The routine
1264  * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1265  *
1266  * The state and associate file entry must be locked
1267  */
1268 rfs4_deleg_state_t *
1269 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1270 {
1271         rfs4_file_t *fp = sp->rs_finfo;
1272         open_delegation_type4 dtype;
1273         int no_delegation;
1274 
1275         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1276         ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1277 
1278         /* Is the server even providing delegations? */
1279         if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)
1280                 return (NULL);
1281 
1282         /* Check to see if delegations have been temporarily disabled */
1283         mutex_enter(&rfs4_deleg_lock);
1284         no_delegation = rfs4_deleg_disabled;
1285         mutex_exit(&rfs4_deleg_lock);
1286 
1287         if (no_delegation)
1288                 return (NULL);
1289 
1290         /* Don't grant a delegation if a deletion is impending. */
1291         if (fp->rf_dinfo.rd_hold_grant > 0) {
1292                 return (NULL);
1293         }
1294 
1295         /*
1296          * Don't grant a delegation if there are any lock manager
1297          * (NFSv2/v3) locks for the file.  This is a bit of a hack (e.g.,
1298          * if there are only read locks we should be able to grant a
1299          * read-only delegation), but it's good enough for now.
1300          *
1301          * MT safety: the lock manager checks for conflicting delegations
1302          * before processing a lock request.  That check will block until
1303          * we are done here.  So if the lock manager acquires a lock after
1304          * we decide to grant the delegation, the delegation will get
1305          * immediately recalled (if there's a conflict), so we're safe.
1306          */
1307         if (lm_vp_active(fp->rf_vp)) {
1308                 return (NULL);
1309         }
1310 
1311         /*
1312          * Based on the type of delegation request passed in, take the
1313          * appropriate action (DELEG_NONE is handled above)
1314          */
1315         switch (dreq) {
1316 
1317         case DELEG_READ:
1318         case DELEG_WRITE:
1319                 /*
1320                  * The server "must" grant the delegation in this case.
1321                  * Client is using open previous
1322                  */
1323                 dtype = (open_delegation_type4)dreq;
1324                 *recall = 1;
1325                 break;
1326         case DELEG_ANY:
1327                 /*
1328                  * If a valid callback path does not exist, no delegation may
1329                  * be granted.
1330                  */
1331                 if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
1332                         return (NULL);
1333 
1334                 /*
1335                  * If the original operation which caused time_rm_delayed
1336                  * to be set hasn't been retried and completed for one
1337                  * full lease period, clear it and allow delegations to
1338                  * get granted again.
1339                  */
1340                 if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
1341                     gethrestime_sec() >
1342                     fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
1343                         fp->rf_dinfo.rd_time_rm_delayed = 0;
1344 
1345                 /*
1346                  * If we are waiting for a delegation to be returned then
1347                  * don't delegate this file. We do this for correctness as
1348                  * well as if the file is being recalled we would likely
1349                  * recall this file again.
1350                  */
1351 
1352                 if (fp->rf_dinfo.rd_time_recalled != 0 ||
1353                     fp->rf_dinfo.rd_time_rm_delayed != 0)
1354                         return (NULL);
1355 
1356                 /* Get the "best" delegation candidate */
1357                 dtype = rfs4_check_delegation(sp, fp);
1358 
1359                 if (dtype == OPEN_DELEGATE_NONE)
1360                         return (NULL);
1361 
1362                 /*
1363                  * Based on policy and the history of the file get the
1364                  * actual delegation.
1365                  */
1366                 dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo,
1367                     sp->rs_owner->ro_client->rc_clientid);
1368 
1369                 if (dtype == OPEN_DELEGATE_NONE)
1370                         return (NULL);
1371                 break;
1372         default:
1373                 return (NULL);
1374         }
1375 
1376         /* set the delegation for the state */
1377         return (rfs4_deleg_state(sp, dtype, recall));
1378 }
1379 
1380 void
1381 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1382     nfsace4 *ace,  int recall)
1383 {
1384         open_write_delegation4 *wp;
1385         open_read_delegation4 *rp;
1386         nfs_space_limit4 *spl;
1387         nfsace4 nace;
1388 
1389         /*
1390          * We need to allocate a new copy of the who string.
1391          * this string will be freed by the rfs4_op_open dis_resfree
1392          * routine. We need to do this allocation since replays will
1393          * be allocated and rfs4_compound can't tell the difference from
1394          * a replay and an inital open. N.B. if an ace is passed in, it
1395          * the caller's responsibility to free it.
1396          */
1397 
1398         if (ace == NULL) {
1399                 /*
1400                  * Default is to deny all access, the client will have
1401                  * to contact the server.  XXX Do we want to actually
1402                  * set a deny for every one, or do we simply want to
1403                  * construct an entity that will match no one?
1404                  */
1405                 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
1406                 nace.flag = 0;
1407                 nace.access_mask = ACE4_VALID_MASK_BITS;
1408                 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
1409         } else {
1410                 nace.type = ace->type;
1411                 nace.flag = ace->flag;
1412                 nace.access_mask = ace->access_mask;
1413                 (void) utf8_copy(&ace->who, &nace.who);
1414         }
1415 
1416         dp->delegation_type = dsp->rds_dtype;
1417 
1418         switch (dsp->rds_dtype) {
1419         case OPEN_DELEGATE_NONE:
1420                 break;
1421         case OPEN_DELEGATE_READ:
1422                 rp = &dp->open_delegation4_u.read;
1423                 rp->stateid = dsp->rds_delegid.stateid;
1424                 rp->recall = (bool_t)recall;
1425                 rp->permissions = nace;
1426                 break;
1427         case OPEN_DELEGATE_WRITE:
1428                 wp = &dp->open_delegation4_u.write;
1429                 wp->stateid = dsp->rds_delegid.stateid;
1430                 wp->recall = (bool_t)recall;
1431                 spl = &wp->space_limit;
1432                 spl->limitby = NFS_LIMIT_SIZE;
1433                 spl->nfs_space_limit4_u.filesize = 0;
1434                 wp->permissions = nace;
1435                 break;
1436         }
1437 }
1438 
1439 /*
1440  * Check if the file is delegated via the provided file struct.
1441  * Return TRUE if it is delegated.  This is intended for use by
1442  * the v4 server.  The v2/v3 server code should use rfs4_check_delegated().
1443  *
1444  * Note that if the file is found to have a delegation, it is
1445  * recalled, unless the clientid of the caller matches the clientid of the
1446  * delegation. If the caller has specified, there is a slight delay
1447  * inserted in the hopes that the delegation will be returned quickly.
1448  */
1449 bool_t
1450 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1451     bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1452 {
1453         rfs4_deleg_state_t *dsp;
1454 
1455         /* Is delegation enabled? */
1456         if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1457                 return (FALSE);
1458 
1459         /* do we have a delegation on this file? */
1460         rfs4_dbe_lock(fp->rf_dbe);
1461         if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1462                 if (is_rm)
1463                         fp->rf_dinfo.rd_hold_grant++;
1464                 rfs4_dbe_unlock(fp->rf_dbe);
1465                 return (FALSE);
1466         }
1467         /*
1468          * do we have a write delegation on this file or are we
1469          * requesting write access to a file with any type of existing
1470          * delegation?
1471          */
1472         if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1473                 if (cp != NULL) {
1474                         dsp = list_head(&fp->rf_delegstatelist);
1475                         if (dsp == NULL) {
1476                                 rfs4_dbe_unlock(fp->rf_dbe);
1477                                 return (FALSE);
1478                         }
1479                         /*
1480                          * Does the requestor already own the delegation?
1481                          */
1482                         if (dsp->rds_client->rc_clientid == *(cp)) {
1483                                 rfs4_dbe_unlock(fp->rf_dbe);
1484                                 return (FALSE);
1485                         }
1486                 }
1487 
1488                 rfs4_dbe_unlock(fp->rf_dbe);
1489                 rfs4_recall_deleg(fp, trunc, NULL);
1490 
1491                 if (!do_delay) {
1492                         rfs4_dbe_lock(fp->rf_dbe);
1493                         fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1494                         rfs4_dbe_unlock(fp->rf_dbe);
1495                         return (TRUE);
1496                 }
1497 
1498                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
1499 
1500                 rfs4_dbe_lock(fp->rf_dbe);
1501                 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1502                         fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1503                         rfs4_dbe_unlock(fp->rf_dbe);
1504                         return (TRUE);
1505                 }
1506         }
1507         if (is_rm)
1508                 fp->rf_dinfo.rd_hold_grant++;
1509         rfs4_dbe_unlock(fp->rf_dbe);
1510         return (FALSE);
1511 }
1512 
1513 /*
1514  * Check if the file is delegated in the case of a v2 or v3 access.
1515  * Return TRUE if it is delegated which in turn means that v2 should
1516  * drop the request and in the case of v3 JUKEBOX should be returned.
1517  */
1518 bool_t
1519 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1520 {
1521         rfs4_file_t *fp;
1522         bool_t create = FALSE;
1523         bool_t rc = FALSE;
1524 
1525         rfs4_hold_deleg_policy();
1526 
1527         /* Is delegation enabled? */
1528         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1529                 fp = rfs4_findfile(vp, NULL, &create);
1530                 if (fp != NULL) {
1531                         if (rfs4_check_delegated_byfp(mode, fp, trunc,
1532                             TRUE, FALSE, NULL)) {
1533                                 rc = TRUE;
1534                         }
1535                         rfs4_file_rele(fp);
1536                 }
1537         }
1538         rfs4_rele_deleg_policy();
1539         return (rc);
1540 }
1541 
1542 /*
1543  * Release a hold on the hold_grant counter which
1544  * prevents delegation from being granted while a remove
1545  * or a rename is in progress.
1546  */
1547 void
1548 rfs4_clear_dont_grant(rfs4_file_t *fp)
1549 {
1550         if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1551                 return;
1552         rfs4_dbe_lock(fp->rf_dbe);
1553         ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1554         fp->rf_dinfo.rd_hold_grant--;
1555         fp->rf_dinfo.rd_time_rm_delayed = 0;
1556         rfs4_dbe_unlock(fp->rf_dbe);
1557 }
1558 
1559 /*
1560  * State support for delegation.
1561  * Set the state delegation type for this state;
1562  * This routine is called from open via rfs4_grant_delegation and the entry
1563  * locks on sp and sp->rs_finfo are assumed.
1564  */
1565 static rfs4_deleg_state_t *
1566 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1567 {
1568         rfs4_file_t *fp = sp->rs_finfo;
1569         bool_t create = TRUE;
1570         rfs4_deleg_state_t *dsp;
1571         vnode_t *vp;
1572         int open_prev = *recall;
1573         int ret;
1574         int fflags = 0;
1575 
1576         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1577         ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1578 
1579         /* Shouldn't happen */
1580         if (fp->rf_dinfo.rd_recall_count != 0 ||
1581             (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1582             dtype != OPEN_DELEGATE_READ)) {
1583                 return (NULL);
1584         }
1585 
1586         /* Unlock to avoid deadlock */
1587         rfs4_dbe_unlock(fp->rf_dbe);
1588         rfs4_dbe_unlock(sp->rs_dbe);
1589 
1590         dsp = rfs4_finddeleg(sp, &create);
1591 
1592         rfs4_dbe_lock(sp->rs_dbe);
1593         rfs4_dbe_lock(fp->rf_dbe);
1594 
1595         if (dsp == NULL)
1596                 return (NULL);
1597 
1598         /*
1599          * It is possible that since we dropped the lock
1600          * in order to call finddeleg, the rfs4_file_t
1601          * was marked such that we should not grant a
1602          * delegation, if so bail out.
1603          */
1604         if (fp->rf_dinfo.rd_hold_grant > 0) {
1605                 rfs4_deleg_state_rele(dsp);
1606                 return (NULL);
1607         }
1608 
1609         if (create == FALSE) {
1610                 if (sp->rs_owner->ro_client == dsp->rds_client &&
1611                     dsp->rds_dtype == dtype) {
1612                         return (dsp);
1613                 } else {
1614                         rfs4_deleg_state_rele(dsp);
1615                         return (NULL);
1616                 }
1617         }
1618 
1619         /*
1620          * Check that this file has not been delegated to another
1621          * client
1622          */
1623         if (fp->rf_dinfo.rd_recall_count != 0 ||
1624             fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
1625             (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1626             dtype != OPEN_DELEGATE_READ)) {
1627                 rfs4_deleg_state_rele(dsp);
1628                 return (NULL);
1629         }
1630 
1631         vp = fp->rf_vp;
1632         /* vnevent_support returns 0 if file system supports vnevents */
1633         if (vnevent_support(vp, NULL)) {
1634                 rfs4_deleg_state_rele(dsp);
1635                 return (NULL);
1636         }
1637 
1638         /* Calculate the fflags for this OPEN. */
1639         if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
1640                 fflags |= FREAD;
1641         if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
1642                 fflags |= FWRITE;
1643 
1644         *recall = 0;
1645         /*
1646          * Before granting a delegation we need to know if anyone else has
1647          * opened the file in a conflicting mode.  However, first we need to
1648          * know how we opened the file to check the counts properly.
1649          */
1650         if (dtype == OPEN_DELEGATE_READ) {
1651                 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1652                     (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1653                     vn_is_mapped(vp, V_WRITE)) {
1654                         if (open_prev) {
1655                                 *recall = 1;
1656                         } else {
1657                                 rfs4_deleg_state_rele(dsp);
1658                                 return (NULL);
1659                         }
1660                 }
1661                 ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ,
1662                     rfs4_mon_hold, rfs4_mon_rele);
1663                 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1664                     (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1665                     vn_is_mapped(vp, V_WRITE)) {
1666                         if (open_prev) {
1667                                 *recall = 1;
1668                         } else {
1669                                 (void) fem_uninstall(vp, deleg_rdops,
1670                                     (void *)fp);
1671                                 rfs4_deleg_state_rele(dsp);
1672                                 return (NULL);
1673                         }
1674                 }
1675                 /*
1676                  * Because a client can hold onto a delegation after the
1677                  * file has been closed, we need to keep track of the
1678                  * access to this file.  Otherwise the CIFS server would
1679                  * not know about the client accessing the file and could
1680                  * inappropriately grant an OPLOCK.
1681                  * fem_install() returns EBUSY when asked to install a
1682                  * OPUNIQ monitor more than once.  Therefore, check the
1683                  * return code because we only want this done once.
1684                  */
1685                 if (ret == 0)
1686                         vn_open_upgrade(vp, FREAD);
1687         } else { /* WRITE */
1688                 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1689                     (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1690                     ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1691                     (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1692                     vn_is_mapped(vp, V_RDORWR)) {
1693                         if (open_prev) {
1694                                 *recall = 1;
1695                         } else {
1696                                 rfs4_deleg_state_rele(dsp);
1697                                 return (NULL);
1698                         }
1699                 }
1700                 ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ,
1701                     rfs4_mon_hold, rfs4_mon_rele);
1702                 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1703                     (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1704                     ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1705                     (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1706                     vn_is_mapped(vp, V_RDORWR)) {
1707                         if (open_prev) {
1708                                 *recall = 1;
1709                         } else {
1710                                 (void) fem_uninstall(vp, deleg_wrops,
1711                                     (void *)fp);
1712                                 rfs4_deleg_state_rele(dsp);
1713                                 return (NULL);
1714                         }
1715                 }
1716                 /*
1717                  * Because a client can hold onto a delegation after the
1718                  * file has been closed, we need to keep track of the
1719                  * access to this file.  Otherwise the CIFS server would
1720                  * not know about the client accessing the file and could
1721                  * inappropriately grant an OPLOCK.
1722                  * fem_install() returns EBUSY when asked to install a
1723                  * OPUNIQ monitor more than once.  Therefore, check the
1724                  * return code because we only want this done once.
1725                  */
1726                 if (ret == 0)
1727                         vn_open_upgrade(vp, FREAD|FWRITE);
1728         }
1729         /* Place on delegation list for file */
1730         ASSERT(!list_link_active(&dsp->rds_node));
1731         list_insert_tail(&fp->rf_delegstatelist, dsp);
1732 
1733         dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
1734 
1735         /* Update delegation stats for this file */
1736         fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
1737 
1738         /* reset since this is a new delegation */
1739         fp->rf_dinfo.rd_conflicted_client = 0;
1740         fp->rf_dinfo.rd_ever_recalled = FALSE;
1741 
1742         if (dtype == OPEN_DELEGATE_READ)
1743                 fp->rf_dinfo.rd_rdgrants++;
1744         else
1745                 fp->rf_dinfo.rd_wrgrants++;
1746 
1747         return (dsp);
1748 }
1749 
1750 /*
1751  * State routine for the server when a delegation is returned.
1752  */
1753 void
1754 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
1755 {
1756         rfs4_file_t *fp = dsp->rds_finfo;
1757         open_delegation_type4 dtypewas;
1758 
1759         rfs4_dbe_lock(fp->rf_dbe);
1760 
1761         /* nothing to do if no longer on list */
1762         if (!list_link_active(&dsp->rds_node)) {
1763                 rfs4_dbe_unlock(fp->rf_dbe);
1764                 return;
1765         }
1766 
1767         /* Remove state from recall list */
1768         list_remove(&fp->rf_delegstatelist, dsp);
1769 
1770         if (list_is_empty(&fp->rf_delegstatelist)) {
1771                 dtypewas = fp->rf_dinfo.rd_dtype;
1772                 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
1773                 rfs4_dbe_cv_broadcast(fp->rf_dbe);
1774 
1775                 /* if file system was unshared, the vp will be NULL */
1776                 if (fp->rf_vp != NULL) {
1777                         /*
1778                          * Once a delegation is no longer held by any client,
1779                          * the monitor is uninstalled.  At this point, the
1780                          * client must send OPEN otw, so we don't need the
1781                          * reference on the vnode anymore.  The open
1782                          * downgrade removes the reference put on earlier.
1783                          */
1784                         if (dtypewas == OPEN_DELEGATE_READ) {
1785                                 (void) fem_uninstall(fp->rf_vp, deleg_rdops,
1786                                     (void *)fp);
1787                                 vn_open_downgrade(fp->rf_vp, FREAD);
1788                         } else if (dtypewas == OPEN_DELEGATE_WRITE) {
1789                                 (void) fem_uninstall(fp->rf_vp, deleg_wrops,
1790                                     (void *)fp);
1791                                 vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
1792                         }
1793                 }
1794         }
1795 
1796         switch (dsp->rds_dtype) {
1797         case OPEN_DELEGATE_READ:
1798                 fp->rf_dinfo.rd_rdgrants--;
1799                 break;
1800         case OPEN_DELEGATE_WRITE:
1801                 fp->rf_dinfo.rd_wrgrants--;
1802                 break;
1803         default:
1804                 break;
1805         }
1806 
1807         /* used in the policy decision */
1808         fp->rf_dinfo.rd_time_returned = gethrestime_sec();
1809 
1810         /*
1811          * reset the time_recalled field so future delegations are not
1812          * accidentally revoked
1813          */
1814         if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
1815                 fp->rf_dinfo.rd_time_recalled = 0;
1816 
1817         rfs4_dbe_unlock(fp->rf_dbe);
1818 
1819         rfs4_dbe_lock(dsp->rds_dbe);
1820 
1821         dsp->rds_dtype = OPEN_DELEGATE_NONE;
1822 
1823         if (revoked == TRUE)
1824                 dsp->rds_time_revoked = gethrestime_sec();
1825 
1826         rfs4_dbe_invalidate(dsp->rds_dbe);
1827 
1828         rfs4_dbe_unlock(dsp->rds_dbe);
1829 
1830         if (revoked == TRUE) {
1831                 rfs4_dbe_lock(dsp->rds_client->rc_dbe);
1832                 dsp->rds_client->rc_deleg_revoked++;      /* observability */
1833                 rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
1834         }
1835 }
1836 
1837 static void
1838 rfs4_revoke_file(rfs4_file_t *fp)
1839 {
1840         rfs4_deleg_state_t *dsp;
1841 
1842         /*
1843          * The lock for rfs4_file_t must be held when traversing the
1844          * delegation list but that lock needs to be released to call
1845          * rfs4_return_deleg()
1846          */
1847         rfs4_dbe_lock(fp->rf_dbe);
1848         while (dsp = list_head(&fp->rf_delegstatelist)) {
1849                 rfs4_dbe_hold(dsp->rds_dbe);
1850                 rfs4_dbe_unlock(fp->rf_dbe);
1851                 rfs4_return_deleg(dsp, TRUE);
1852                 rfs4_deleg_state_rele(dsp);
1853                 rfs4_dbe_lock(fp->rf_dbe);
1854         }
1855         rfs4_dbe_unlock(fp->rf_dbe);
1856 }
1857 
1858 /*
1859  * A delegation is assumed to be present on the file associated with
1860  * "sp".  Check to see if the delegation matches is associated with
1861  * the same client as referenced by "sp".  If it is not, TRUE is
1862  * returned.  If the delegation DOES match the client (or no
1863  * delegation is present), return FALSE.
1864  * Assume the state entry and file entry are locked.
1865  */
1866 bool_t
1867 rfs4_is_deleg(rfs4_state_t *sp)
1868 {
1869         rfs4_deleg_state_t *dsp;
1870         rfs4_file_t *fp = sp->rs_finfo;
1871         rfs4_client_t *cp = sp->rs_owner->ro_client;
1872 
1873         ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1874         for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1875             dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1876                 if (cp != dsp->rds_client) {
1877                         return (TRUE);
1878                 }
1879         }
1880         return (FALSE);
1881 }
1882 
1883 void
1884 rfs4_disable_delegation(void)
1885 {
1886         mutex_enter(&rfs4_deleg_lock);
1887         rfs4_deleg_disabled++;
1888         mutex_exit(&rfs4_deleg_lock);
1889 }
1890 
1891 void
1892 rfs4_enable_delegation(void)
1893 {
1894         mutex_enter(&rfs4_deleg_lock);
1895         ASSERT(rfs4_deleg_disabled > 0);
1896         rfs4_deleg_disabled--;
1897         mutex_exit(&rfs4_deleg_lock);
1898 }
1899 
1900 void
1901 rfs4_mon_hold(void *arg)
1902 {
1903         rfs4_file_t *fp = arg;
1904 
1905         rfs4_dbe_hold(fp->rf_dbe);
1906 }
1907 
1908 void
1909 rfs4_mon_rele(void *arg)
1910 {
1911         rfs4_file_t *fp = arg;
1912 
1913         rfs4_dbe_rele_nolock(fp->rf_dbe);
1914 }