1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27 /* All Rights Reserved */
  28 
  29 #include <sys/param.h>
  30 #include <sys/types.h>
  31 #include <sys/systm.h>
  32 #include <sys/cred.h>
  33 #include <sys/vfs.h>
  34 #include <sys/vnode.h>
  35 #include <sys/pathname.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/kmem.h>
  38 #include <sys/kstat.h>
  39 #include <sys/mkdev.h>
  40 #include <sys/mount.h>
  41 #include <sys/statvfs.h>
  42 #include <sys/errno.h>
  43 #include <sys/debug.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/utsname.h>
  46 #include <sys/bootconf.h>
  47 #include <sys/modctl.h>
  48 #include <sys/acl.h>
  49 #include <sys/flock.h>
  50 #include <sys/kstr.h>
  51 #include <sys/stropts.h>
  52 #include <sys/strsubr.h>
  53 #include <sys/atomic.h>
  54 #include <sys/disp.h>
  55 #include <sys/policy.h>
  56 #include <sys/list.h>
  57 #include <sys/zone.h>
  58 
  59 #include <rpc/types.h>
  60 #include <rpc/auth.h>
  61 #include <rpc/rpcsec_gss.h>
  62 #include <rpc/clnt.h>
  63 #include <rpc/xdr.h>
  64 
  65 #include <nfs/nfs.h>
  66 #include <nfs/nfs_clnt.h>
  67 #include <nfs/mount.h>
  68 #include <nfs/nfs_acl.h>
  69 
  70 #include <fs/fs_subr.h>
  71 
  72 #include <nfs/nfs4.h>
  73 #include <nfs/rnode4.h>
  74 #include <nfs/nfs4_clnt.h>
  75 #include <nfs/nfssys.h>
  76 
  77 #ifdef  DEBUG
  78 /*
  79  * These are "special" state IDs and file handles that
  80  * match any delegation state ID or file handled.  This
  81  * is for testing purposes only.
  82  */
  83 
  84 stateid4 nfs4_deleg_any = { 0x7FFFFFF0 };
  85 char nfs4_deleg_fh[] = "\0377\0376\0375\0374";
  86 nfs_fh4 nfs4_deleg_anyfh = { sizeof (nfs4_deleg_fh)-1, nfs4_deleg_fh };
  87 nfsstat4 cb4_getattr_fail = NFS4_OK;
  88 nfsstat4 cb4_recall_fail = NFS4_OK;
  89 
  90 int nfs4_callback_debug;
  91 int nfs4_recall_debug;
  92 int nfs4_drat_debug;
  93 
  94 #endif
  95 
  96 #define CB_NOTE(x)      NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, x))
  97 #define CB_WARN(x)      NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x))
  98 #define CB_WARN1(x, y)  NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x, y))
  99 
 100 enum nfs4_delegreturn_policy nfs4_delegreturn_policy = INACTIVE;
 101 
 102 static zone_key_t nfs4_callback_zone_key;
 103 
 104 /*
 105  * NFS4_MAPSIZE is the number of bytes we are willing to consume
 106  * for the block allocation map when the server grants a NFS_LIMIT_BLOCK
 107  * style delegation.
 108  */
 109 
 110 #define NFS4_MAPSIZE    8192
 111 #define NFS4_MAPWORDS   NFS4_MAPSIZE/sizeof (uint_t)
 112 #define NbPW            (NBBY*sizeof (uint_t))
 113 
 114 static int nfs4_num_prognums = 1024;
 115 static SVC_CALLOUT_TABLE nfs4_cb_sct;
 116 
 117 struct nfs4_dnode {
 118         list_node_t     linkage;
 119         rnode4_t        *rnodep;
 120         int             flags;          /* Flags for nfs4delegreturn_impl() */
 121 };
 122 
 123 static const struct nfs4_callback_stats nfs4_callback_stats_tmpl = {
 124         { "delegations",        KSTAT_DATA_UINT64 },
 125         { "cb_getattr",         KSTAT_DATA_UINT64 },
 126         { "cb_recall",          KSTAT_DATA_UINT64 },
 127         { "cb_null",            KSTAT_DATA_UINT64 },
 128         { "cb_dispatch",        KSTAT_DATA_UINT64 },
 129         { "delegaccept_r",      KSTAT_DATA_UINT64 },
 130         { "delegaccept_rw",     KSTAT_DATA_UINT64 },
 131         { "delegreturn",        KSTAT_DATA_UINT64 },
 132         { "callbacks",          KSTAT_DATA_UINT64 },
 133         { "claim_cur",          KSTAT_DATA_UINT64 },
 134         { "claim_cur_ok",       KSTAT_DATA_UINT64 },
 135         { "recall_trunc",       KSTAT_DATA_UINT64 },
 136         { "recall_failed",      KSTAT_DATA_UINT64 },
 137         { "return_limit_write", KSTAT_DATA_UINT64 },
 138         { "return_limit_addmap", KSTAT_DATA_UINT64 },
 139         { "deleg_recover",      KSTAT_DATA_UINT64 },
 140         { "cb_illegal",         KSTAT_DATA_UINT64 }
 141 };
 142 
 143 struct nfs4_cb_port {
 144         list_node_t             linkage; /* linkage into per-zone port list */
 145         char                    netid[KNC_STRSIZE];
 146         char                    uaddr[KNC_STRSIZE];
 147         char                    protofmly[KNC_STRSIZE];
 148         char                    proto[KNC_STRSIZE];
 149 };
 150 
 151 static int cb_getattr_bytes;
 152 
 153 struct cb_recall_pass {
 154         rnode4_t        *rp;
 155         int             flags;          /* Flags for nfs4delegreturn_impl() */
 156         bool_t          truncate;
 157 };
 158 
 159 static nfs4_open_stream_t *get_next_deleg_stream(rnode4_t *, int);
 160 static void nfs4delegreturn_thread(struct cb_recall_pass *);
 161 static int deleg_reopen(vnode_t *, bool_t *, struct nfs4_callback_globals *,
 162     int);
 163 static void nfs4_dlistadd(rnode4_t *, struct nfs4_callback_globals *, int);
 164 static void nfs4_dlistclean_impl(struct nfs4_callback_globals *, int);
 165 static int nfs4delegreturn_impl(rnode4_t *, int,
 166     struct nfs4_callback_globals *);
 167 static void nfs4delegreturn_cleanup_impl(rnode4_t *, nfs4_server_t *,
 168     struct nfs4_callback_globals *);
 169 
 170 static void
 171 cb_getattr(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 172         struct compound_state *cs, struct nfs4_callback_globals *ncg)
 173 {
 174         CB_GETATTR4args *args = &argop->nfs_cb_argop4_u.opcbgetattr;
 175         CB_GETATTR4res *resp = &resop->nfs_cb_resop4_u.opcbgetattr;
 176         rnode4_t *rp;
 177         vnode_t *vp;
 178         bool_t found = FALSE;
 179         struct nfs4_server *sp;
 180         struct fattr4 *fap;
 181         rpc_inline_t *fdata;
 182         long mapcnt;
 183         fattr4_change change;
 184         fattr4_size size;
 185         uint_t rflag;
 186 
 187         ncg->nfs4_callback_stats.cb_getattr.value.ui64++;
 188 
 189 #ifdef DEBUG
 190         /*
 191          * error injection hook: set cb_getattr_fail global to
 192          * NFS4 pcol error to be returned
 193          */
 194         if (cb4_getattr_fail != NFS4_OK) {
 195                 *cs->statusp = resp->status = cb4_getattr_fail;
 196                 return;
 197         }
 198 #endif
 199 
 200         resp->obj_attributes.attrmask = 0;
 201 
 202         mutex_enter(&ncg->nfs4_cb_lock);
 203         sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 204         mutex_exit(&ncg->nfs4_cb_lock);
 205 
 206         if (nfs4_server_vlock(sp, 0) == FALSE) {
 207 
 208                 CB_WARN("cb_getattr: cannot find server\n");
 209 
 210                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 211                 return;
 212         }
 213 
 214         /*
 215          * In cb_compound, callback_ident was validated against rq_prog,
 216          * but we couldn't verify that it was set to the value we provided
 217          * at setclientid time (because we didn't have server struct yet).
 218          * Now we have the server struct, but don't have callback_ident
 219          * handy.  So, validate server struct program number against req
 220          * RPC's prog number.  At this point, we know the RPC prog num
 221          * is valid (else we wouldn't be here); however, we don't know
 222          * that it was the prog number we supplied to this server at
 223          * setclientid time.  If the prog numbers aren't equivalent, then
 224          * log the problem and fail the request because either cbserv
 225          * and/or cbclient are confused.  This will probably never happen.
 226          */
 227         if (sp->s_program != req->rq_prog) {
 228 #ifdef DEBUG
 229                 zcmn_err(getzoneid(), CE_WARN,
 230                     "cb_getattr: wrong server program number srv=%d req=%d\n",
 231                     sp->s_program, req->rq_prog);
 232 #else
 233                 zcmn_err(getzoneid(), CE_WARN,
 234                     "cb_getattr: wrong server program number\n");
 235 #endif
 236                 mutex_exit(&sp->s_lock);
 237                 nfs4_server_rele(sp);
 238                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 239                 return;
 240         }
 241 
 242         /*
 243          * Search the delegation list for a matching file handle;
 244          * mutex on sp prevents the list from changing.
 245          */
 246 
 247         rp = list_head(&sp->s_deleg_list);
 248         for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
 249                 nfs4_fhandle_t fhandle;
 250 
 251                 sfh4_copyval(rp->r_fh, &fhandle);
 252 
 253                 if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
 254                     bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
 255                     fhandle.fh_len) == 0)) {
 256 
 257                         found = TRUE;
 258                         break;
 259                 }
 260 #ifdef  DEBUG
 261                 if (nfs4_deleg_anyfh.nfs_fh4_len == args->fh.nfs_fh4_len &&
 262                     bcmp(nfs4_deleg_anyfh.nfs_fh4_val, args->fh.nfs_fh4_val,
 263                     args->fh.nfs_fh4_len) == 0) {
 264 
 265                         found = TRUE;
 266                         break;
 267                 }
 268 #endif
 269         }
 270 
 271         /*
 272          * VN_HOLD the vnode before releasing s_lock to guarantee
 273          * we have a valid vnode reference.
 274          */
 275         if (found == TRUE) {
 276                 vp = RTOV4(rp);
 277                 VN_HOLD(vp);
 278         }
 279 
 280         mutex_exit(&sp->s_lock);
 281         nfs4_server_rele(sp);
 282 
 283         if (found == FALSE) {
 284 
 285                 CB_WARN("cb_getattr: bad fhandle\n");
 286 
 287                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 288                 return;
 289         }
 290 
 291         /*
 292          * Figure out which attributes the server wants.  We only
 293          * offer FATTR4_CHANGE & FATTR4_SIZE; ignore the rest.
 294          */
 295         fdata = kmem_alloc(cb_getattr_bytes, KM_SLEEP);
 296 
 297         /*
 298          * Don't actually need to create XDR to encode these
 299          * simple data structures.
 300          * xdrmem_create(&xdr, fdata, cb_getattr_bytes, XDR_ENCODE);
 301          */
 302         fap = &resp->obj_attributes;
 303 
 304         fap->attrmask = 0;
 305         /* attrlist4_len starts at 0 and increases as attrs are processed */
 306         fap->attrlist4 = (char *)fdata;
 307         fap->attrlist4_len = 0;
 308 
 309         /* don't supply attrs if request was zero */
 310         if (args->attr_request != 0) {
 311                 if (args->attr_request & FATTR4_CHANGE_MASK) {
 312                         /*
 313                          * If the file is mmapped, then increment the change
 314                          * attribute and return it.  This will guarantee that
 315                          * the server will perceive that the file has changed
 316                          * if there is any chance that the client application
 317                          * has changed it.  Otherwise, just return the change
 318                          * attribute as it has been updated by nfs4write_deleg.
 319                          */
 320 
 321                         mutex_enter(&rp->r_statelock);
 322                         mapcnt = rp->r_mapcnt;
 323                         rflag = rp->r_flags;
 324                         mutex_exit(&rp->r_statelock);
 325 
 326                         mutex_enter(&rp->r_statev4_lock);
 327                         /*
 328                          * If object mapped, then always return new change.
 329                          * Otherwise, return change if object has dirty
 330                          * pages.  If object doesn't have any dirty pages,
 331                          * then all changes have been pushed to server, so
 332                          * reset change to grant change.
 333                          */
 334                         if (mapcnt)
 335                                 rp->r_deleg_change++;
 336                         else if (! (rflag & R4DIRTY))
 337                                 rp->r_deleg_change = rp->r_deleg_change_grant;
 338                         change = rp->r_deleg_change;
 339                         mutex_exit(&rp->r_statev4_lock);
 340 
 341                         /*
 342                          * Use inline XDR code directly, we know that we
 343                          * going to a memory buffer and it has enough
 344                          * space so it cannot fail.
 345                          */
 346                         IXDR_PUT_U_HYPER(fdata, change);
 347                         fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
 348                         fap->attrmask |= FATTR4_CHANGE_MASK;
 349                 }
 350 
 351                 if (args->attr_request & FATTR4_SIZE_MASK) {
 352                         /*
 353                          * Use an atomic add of 0 to fetch a consistent view
 354                          * of r_size; this avoids having to take rw_lock
 355                          * which could cause a deadlock.
 356                          */
 357                         size = atomic_add_64_nv((uint64_t *)&rp->r_size, 0);
 358 
 359                         /*
 360                          * Use inline XDR code directly, we know that we
 361                          * going to a memory buffer and it has enough
 362                          * space so it cannot fail.
 363                          */
 364                         IXDR_PUT_U_HYPER(fdata, size);
 365                         fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
 366                         fap->attrmask |= FATTR4_SIZE_MASK;
 367                 }
 368         }
 369 
 370         VN_RELE(vp);
 371 
 372         *cs->statusp = resp->status = NFS4_OK;
 373 }
 374 
 375 static void
 376 cb_getattr_free(nfs_cb_resop4 *resop)
 377 {
 378         if (resop->nfs_cb_resop4_u.opcbgetattr.obj_attributes.attrlist4)
 379                 kmem_free(resop->nfs_cb_resop4_u.opcbgetattr.
 380                     obj_attributes.attrlist4, cb_getattr_bytes);
 381 }
 382 
 383 static void
 384 cb_recall(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 385         struct compound_state *cs, struct nfs4_callback_globals *ncg)
 386 {
 387         CB_RECALL4args * args = &argop->nfs_cb_argop4_u.opcbrecall;
 388         CB_RECALL4res *resp = &resop->nfs_cb_resop4_u.opcbrecall;
 389         rnode4_t *rp;
 390         vnode_t *vp;
 391         struct nfs4_server *sp;
 392         bool_t found = FALSE;
 393 
 394         ncg->nfs4_callback_stats.cb_recall.value.ui64++;
 395 
 396         ASSERT(req->rq_prog >= NFS4_CALLBACK);
 397         ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
 398 
 399 #ifdef DEBUG
 400         /*
 401          * error injection hook: set cb_recall_fail global to
 402          * NFS4 pcol error to be returned
 403          */
 404         if (cb4_recall_fail != NFS4_OK) {
 405                 *cs->statusp = resp->status = cb4_recall_fail;
 406                 return;
 407         }
 408 #endif
 409 
 410         mutex_enter(&ncg->nfs4_cb_lock);
 411         sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 412         mutex_exit(&ncg->nfs4_cb_lock);
 413 
 414         if (nfs4_server_vlock(sp, 0) == FALSE) {
 415 
 416                 CB_WARN("cb_recall: cannot find server\n");
 417 
 418                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 419                 return;
 420         }
 421 
 422         /*
 423          * Search the delegation list for a matching file handle
 424          * AND stateid; mutex on sp prevents the list from changing.
 425          */
 426 
 427         rp = list_head(&sp->s_deleg_list);
 428         for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
 429                 mutex_enter(&rp->r_statev4_lock);
 430 
 431                 /* check both state id and file handle! */
 432 
 433                 if ((bcmp(&rp->r_deleg_stateid, &args->stateid,
 434                     sizeof (stateid4)) == 0)) {
 435                         nfs4_fhandle_t fhandle;
 436 
 437                         sfh4_copyval(rp->r_fh, &fhandle);
 438                         if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
 439                             bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
 440                             fhandle.fh_len) == 0)) {
 441 
 442                                 found = TRUE;
 443                                 break;
 444                         } else {
 445 #ifdef  DEBUG
 446                                 CB_WARN("cb_recall: stateid OK, bad fh");
 447 #endif
 448                         }
 449                 }
 450 #ifdef  DEBUG
 451                 if (bcmp(&args->stateid, &nfs4_deleg_any,
 452                     sizeof (stateid4)) == 0) {
 453 
 454                         found = TRUE;
 455                         break;
 456                 }
 457 #endif
 458                 mutex_exit(&rp->r_statev4_lock);
 459         }
 460 
 461         /*
 462          * VN_HOLD the vnode before releasing s_lock to guarantee
 463          * we have a valid vnode reference.  The async thread will
 464          * release the hold when it's done.
 465          */
 466         if (found == TRUE) {
 467                 mutex_exit(&rp->r_statev4_lock);
 468                 vp = RTOV4(rp);
 469                 VN_HOLD(vp);
 470         }
 471         mutex_exit(&sp->s_lock);
 472         nfs4_server_rele(sp);
 473 
 474         if (found == FALSE) {
 475 
 476                 CB_WARN("cb_recall: bad stateid\n");
 477 
 478                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
 479                 return;
 480         }
 481 
 482         /* Fire up a thread to do the delegreturn */
 483         nfs4delegreturn_async(rp, NFS4_DR_RECALL|NFS4_DR_REOPEN,
 484             args->truncate);
 485 
 486         *cs->statusp = resp->status = 0;
 487 }
 488 
 489 /* ARGSUSED */
 490 static void
 491 cb_recall_free(nfs_cb_resop4 *resop)
 492 {
 493         /* nothing to do here, cb_recall doesn't kmem_alloc */
 494 }
 495 
 496 /*
 497  * This function handles the CB_NULL proc call from an NFSv4 Server.
 498  *
 499  * We take note that the server has sent a CB_NULL for later processing
 500  * in the recovery logic. It is noted so we may pause slightly after the
 501  * setclientid and before reopening files. The pause is to allow the
 502  * NFSv4 Server time to receive the CB_NULL reply and adjust any of
 503  * its internal structures such that it has the opportunity to grant
 504  * delegations to reopened files.
 505  *
 506  */
 507 
 508 /* ARGSUSED */
 509 static void
 510 cb_null(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
 511     struct nfs4_callback_globals *ncg)
 512 {
 513         struct nfs4_server *sp;
 514 
 515         ncg->nfs4_callback_stats.cb_null.value.ui64++;
 516 
 517         ASSERT(req->rq_prog >= NFS4_CALLBACK);
 518         ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
 519 
 520         mutex_enter(&ncg->nfs4_cb_lock);
 521         sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 522         mutex_exit(&ncg->nfs4_cb_lock);
 523 
 524         if (nfs4_server_vlock(sp, 0) != FALSE) {
 525                 sp->s_flags |= N4S_CB_PINGED;
 526                 cv_broadcast(&sp->wait_cb_null);
 527                 mutex_exit(&sp->s_lock);
 528                 nfs4_server_rele(sp);
 529         }
 530 }
 531 
 532 /*
 533  * cb_illegal   args: void
 534  *              res : status (NFS4ERR_OP_CB_ILLEGAL)
 535  */
 536 /* ARGSUSED */
 537 static void
 538 cb_illegal(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 539         struct compound_state *cs, struct nfs4_callback_globals *ncg)
 540 {
 541         CB_ILLEGAL4res *resp = &resop->nfs_cb_resop4_u.opcbillegal;
 542 
 543         ncg->nfs4_callback_stats.cb_illegal.value.ui64++;
 544         resop->resop = OP_CB_ILLEGAL;
 545         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
 546 }
 547 
 548 static void
 549 cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
 550         struct nfs4_callback_globals *ncg)
 551 {
 552         uint_t i;
 553         struct compound_state cs;
 554         nfs_cb_argop4 *argop;
 555         nfs_cb_resop4 *resop, *new_res;
 556         uint_t op;
 557 
 558         bzero(&cs, sizeof (cs));
 559         cs.statusp = &resp->status;
 560         cs.cont = TRUE;
 561 
 562         /*
 563          * Form a reply tag by copying over the reqeuest tag.
 564          */
 565         resp->tag.utf8string_len = args->tag.utf8string_len;
 566         resp->tag.utf8string_val = kmem_alloc(resp->tag.utf8string_len,
 567             KM_SLEEP);
 568         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
 569             args->tag.utf8string_len);
 570 
 571         /*
 572          * XXX for now, minorversion should be zero
 573          */
 574         if (args->minorversion != CB4_MINORVERSION) {
 575                 resp->array_len = 0;
 576                 resp->array = NULL;
 577                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
 578                 return;
 579         }
 580 
 581 #ifdef DEBUG
 582         /*
 583          * Verify callback_ident.  It doesn't really matter if it's wrong
 584          * because we don't really use callback_ident -- we use prog number
 585          * of the RPC request instead.  In this case, just print a DEBUG
 586          * console message to reveal brokenness of cbclient (at bkoff/cthon).
 587          */
 588         if (args->callback_ident != req->rq_prog)
 589                 zcmn_err(getzoneid(), CE_WARN,
 590                     "cb_compound: cb_client using wrong "
 591                     "callback_ident(%d), should be %d",
 592                     args->callback_ident, req->rq_prog);
 593 #endif
 594 
 595         resp->array_len = args->array_len;
 596         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_cb_resop4),
 597             KM_SLEEP);
 598 
 599         for (i = 0; i < args->array_len && cs.cont; i++) {
 600 
 601                 argop = &args->array[i];
 602                 resop = &resp->array[i];
 603                 resop->resop = argop->argop;
 604                 op = (uint_t)resop->resop;
 605 
 606                 switch (op) {
 607 
 608                 case OP_CB_GETATTR:
 609 
 610                         cb_getattr(argop, resop, req, &cs, ncg);
 611                         break;
 612 
 613                 case OP_CB_RECALL:
 614 
 615                         cb_recall(argop, resop, req, &cs, ncg);
 616                         break;
 617 
 618                 case OP_CB_ILLEGAL:
 619 
 620                         /* fall through */
 621 
 622                 default:
 623                         /*
 624                          * Handle OP_CB_ILLEGAL and any undefined opcode.
 625                          * Currently, the XDR code will return BADXDR
 626                          * if cb op doesn't decode to legal value, so
 627                          * it really only handles OP_CB_ILLEGAL.
 628                          */
 629                         op = OP_CB_ILLEGAL;
 630                         cb_illegal(argop, resop, req, &cs, ncg);
 631                 }
 632 
 633                 if (*cs.statusp != NFS4_OK)
 634                         cs.cont = FALSE;
 635 
 636                 /*
 637                  * If not at last op, and if we are to stop, then
 638                  * compact the results array.
 639                  */
 640                 if ((i + 1) < args->array_len && !cs.cont) {
 641 
 642                         new_res = kmem_alloc(
 643                             (i+1) * sizeof (nfs_cb_resop4), KM_SLEEP);
 644                         bcopy(resp->array,
 645                             new_res, (i+1) * sizeof (nfs_cb_resop4));
 646                         kmem_free(resp->array,
 647                             args->array_len * sizeof (nfs_cb_resop4));
 648 
 649                         resp->array_len =  i + 1;
 650                         resp->array = new_res;
 651                 }
 652         }
 653 
 654 }
 655 
 656 static void
 657 cb_compound_free(CB_COMPOUND4res *resp)
 658 {
 659         uint_t i, op;
 660         nfs_cb_resop4 *resop;
 661 
 662         if (resp->tag.utf8string_val) {
 663                 UTF8STRING_FREE(resp->tag)
 664         }
 665 
 666         for (i = 0; i < resp->array_len; i++) {
 667 
 668                 resop = &resp->array[i];
 669                 op = (uint_t)resop->resop;
 670 
 671                 switch (op) {
 672 
 673                 case OP_CB_GETATTR:
 674 
 675                         cb_getattr_free(resop);
 676                         break;
 677 
 678                 case OP_CB_RECALL:
 679 
 680                         cb_recall_free(resop);
 681                         break;
 682 
 683                 default:
 684                         break;
 685                 }
 686         }
 687 
 688         if (resp->array != NULL) {
 689                 kmem_free(resp->array,
 690                     resp->array_len * sizeof (nfs_cb_resop4));
 691         }
 692 }
 693 
 694 static void
 695 cb_dispatch(struct svc_req *req, SVCXPRT *xprt)
 696 {
 697         CB_COMPOUND4args args;
 698         CB_COMPOUND4res res;
 699         struct nfs4_callback_globals *ncg;
 700 
 701         bool_t (*xdr_args)(), (*xdr_res)();
 702         void (*proc)(CB_COMPOUND4args *, CB_COMPOUND4res *, struct svc_req *,
 703             struct nfs4_callback_globals *);
 704         void (*freeproc)(CB_COMPOUND4res *);
 705 
 706         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
 707         ASSERT(ncg != NULL);
 708 
 709         ncg->nfs4_callback_stats.cb_dispatch.value.ui64++;
 710 
 711         switch (req->rq_proc) {
 712         case CB_NULL:
 713                 xdr_args = xdr_void;
 714                 xdr_res = xdr_void;
 715                 proc = cb_null;
 716                 freeproc = NULL;
 717                 break;
 718 
 719         case CB_COMPOUND:
 720                 xdr_args = xdr_CB_COMPOUND4args_clnt;
 721                 xdr_res = xdr_CB_COMPOUND4res;
 722                 proc = cb_compound;
 723                 freeproc = cb_compound_free;
 724                 break;
 725 
 726         default:
 727                 CB_WARN("cb_dispatch: no proc\n");
 728                 svcerr_noproc(xprt);
 729                 return;
 730         }
 731 
 732         args.tag.utf8string_val = NULL;
 733         args.array = NULL;
 734 
 735         if (!SVC_GETARGS(xprt, xdr_args, (caddr_t)&args)) {
 736 
 737                 CB_WARN("cb_dispatch: cannot getargs\n");
 738                 svcerr_decode(xprt);
 739                 return;
 740         }
 741 
 742         (*proc)(&args, &res, req, ncg);
 743 
 744         if (svc_sendreply(xprt, xdr_res, (caddr_t)&res) == FALSE) {
 745 
 746                 CB_WARN("cb_dispatch: bad sendreply\n");
 747                 svcerr_systemerr(xprt);
 748         }
 749 
 750         if (freeproc)
 751                 (*freeproc)(&res);
 752 
 753         if (!SVC_FREEARGS(xprt, xdr_args, (caddr_t)&args)) {
 754 
 755                 CB_WARN("cb_dispatch: bad freeargs\n");
 756         }
 757 }
 758 
 759 static rpcprog_t
 760 nfs4_getnextprogram(struct nfs4_callback_globals *ncg)
 761 {
 762         int i, j;
 763 
 764         j = ncg->nfs4_program_hint;
 765         for (i = 0; i < nfs4_num_prognums; i++, j++) {
 766 
 767                 if (j >= nfs4_num_prognums)
 768                         j = 0;
 769 
 770                 if (ncg->nfs4prog2server[j] == NULL) {
 771                         ncg->nfs4_program_hint = j+1;
 772                         return (j+NFS4_CALLBACK);
 773                 }
 774         }
 775 
 776         return (0);
 777 }
 778 
 779 void
 780 nfs4callback_destroy(nfs4_server_t *np)
 781 {
 782         struct nfs4_callback_globals *ncg;
 783         int i;
 784 
 785         if (np->s_program == 0)
 786                 return;
 787 
 788         ncg = np->zone_globals;
 789         i = np->s_program - NFS4_CALLBACK;
 790 
 791         mutex_enter(&ncg->nfs4_cb_lock);
 792 
 793         ASSERT(ncg->nfs4prog2server[i] == np);
 794 
 795         ncg->nfs4prog2server[i] = NULL;
 796 
 797         if (i < ncg->nfs4_program_hint)
 798                 ncg->nfs4_program_hint = i;
 799 
 800         mutex_exit(&ncg->nfs4_cb_lock);
 801 }
 802 
 803 /*
 804  * nfs4_setport - This function saves a netid and univeral address for
 805  * the callback program.  These values will be used during setclientid.
 806  */
 807 static void
 808 nfs4_setport(char *netid, char *uaddr, char *protofmly, char *proto,
 809         struct nfs4_callback_globals *ncg)
 810 {
 811         struct nfs4_cb_port *p;
 812         bool_t found = FALSE;
 813 
 814         ASSERT(MUTEX_HELD(&ncg->nfs4_cb_lock));
 815 
 816         p = list_head(&ncg->nfs4_cb_ports);
 817         for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
 818                 if (strcmp(p->netid, netid) == 0) {
 819                         found = TRUE;
 820                         break;
 821                 }
 822         }
 823         if (found == TRUE)
 824                 (void) strcpy(p->uaddr, uaddr);
 825         else {
 826                 p = kmem_alloc(sizeof (*p), KM_SLEEP);
 827 
 828                 (void) strcpy(p->uaddr, uaddr);
 829                 (void) strcpy(p->netid, netid);
 830                 (void) strcpy(p->protofmly, protofmly);
 831                 (void) strcpy(p->proto, proto);
 832                 list_insert_head(&ncg->nfs4_cb_ports, p);
 833         }
 834 }
 835 
 836 /*
 837  * nfs4_cb_args - This function is used to construct the callback
 838  * portion of the arguments needed for setclientid.
 839  */
 840 
 841 void
 842 nfs4_cb_args(nfs4_server_t *np, struct knetconfig *knc, SETCLIENTID4args *args)
 843 {
 844         struct nfs4_cb_port *p;
 845         bool_t found = FALSE;
 846         rpcprog_t pgm;
 847         struct nfs4_callback_globals *ncg = np->zone_globals;
 848 
 849         /*
 850          * This server structure may already have a program number
 851          * assigned to it.  This happens when the client has to
 852          * re-issue SETCLIENTID.  Just re-use the information.
 853          */
 854         if (np->s_program >= NFS4_CALLBACK &&
 855             np->s_program < NFS4_CALLBACK + nfs4_num_prognums)
 856                 nfs4callback_destroy(np);
 857 
 858         mutex_enter(&ncg->nfs4_cb_lock);
 859 
 860         p = list_head(&ncg->nfs4_cb_ports);
 861         for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
 862                 if (strcmp(p->protofmly, knc->knc_protofmly) == 0 &&
 863                     strcmp(p->proto, knc->knc_proto) == 0) {
 864                         found = TRUE;
 865                         break;
 866                 }
 867         }
 868 
 869         if (found == FALSE) {
 870 
 871                 NFS4_DEBUG(nfs4_callback_debug,
 872                     (CE_WARN, "nfs4_cb_args: could not find netid for %s/%s\n",
 873                     knc->knc_protofmly, knc->knc_proto));
 874 
 875                 args->callback.cb_program = 0;
 876                 args->callback.cb_location.r_netid = NULL;
 877                 args->callback.cb_location.r_addr = NULL;
 878                 args->callback_ident = 0;
 879                 mutex_exit(&ncg->nfs4_cb_lock);
 880                 return;
 881         }
 882 
 883         if ((pgm = nfs4_getnextprogram(ncg)) == 0) {
 884                 CB_WARN("nfs4_cb_args: out of program numbers\n");
 885 
 886                 args->callback.cb_program = 0;
 887                 args->callback.cb_location.r_netid = NULL;
 888                 args->callback.cb_location.r_addr = NULL;
 889                 args->callback_ident = 0;
 890                 mutex_exit(&ncg->nfs4_cb_lock);
 891                 return;
 892         }
 893 
 894         ncg->nfs4prog2server[pgm-NFS4_CALLBACK] = np;
 895         args->callback.cb_program = pgm;
 896         args->callback.cb_location.r_netid = p->netid;
 897         args->callback.cb_location.r_addr = p->uaddr;
 898         args->callback_ident = pgm;
 899 
 900         np->s_program = pgm;
 901 
 902         mutex_exit(&ncg->nfs4_cb_lock);
 903 }
 904 
 905 static int
 906 nfs4_dquery(struct nfs4_svc_args *arg, model_t model)
 907 {
 908         file_t *fp;
 909         vnode_t *vp;
 910         rnode4_t *rp;
 911         int error;
 912         STRUCT_HANDLE(nfs4_svc_args, uap);
 913 
 914         STRUCT_SET_HANDLE(uap, model, arg);
 915 
 916         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 917                 return (EBADF);
 918 
 919         vp = fp->f_vnode;
 920 
 921         if (vp == NULL || vp->v_type != VREG ||
 922             !vn_matchops(vp, nfs4_vnodeops)) {
 923                 releasef(STRUCT_FGET(uap, fd));
 924                 return (EBADF);
 925         }
 926 
 927         rp = VTOR4(vp);
 928 
 929         /*
 930          * I can't convince myself that we need locking here.  The
 931          * rnode cannot disappear and the value returned is instantly
 932          * stale anway, so why bother?
 933          */
 934 
 935         error = suword32(STRUCT_FGETP(uap, netid), rp->r_deleg_type);
 936         releasef(STRUCT_FGET(uap, fd));
 937         return (error);
 938 }
 939 
 940 
 941 /*
 942  * NFS4 client system call.  This service does the
 943  * necessary initialization for the callback program.
 944  * This is fashioned after the server side interaction
 945  * between nfsd and the kernel.  On the client, the
 946  * mount command forks and the child process does the
 947  * necessary interaction with the kernel.
 948  *
 949  * uap->fd is the fd of an open transport provider
 950  */
 951 int
 952 nfs4_svc(struct nfs4_svc_args *arg, model_t model)
 953 {
 954         file_t *fp;
 955         int error;
 956         int readsize;
 957         char buf[KNC_STRSIZE], uaddr[KNC_STRSIZE];
 958         char protofmly[KNC_STRSIZE], proto[KNC_STRSIZE];
 959         size_t len;
 960         STRUCT_HANDLE(nfs4_svc_args, uap);
 961         struct netbuf addrmask;
 962         int cmd;
 963         SVCMASTERXPRT *cb_xprt;
 964         struct nfs4_callback_globals *ncg;
 965 
 966 #ifdef lint
 967         model = model;          /* STRUCT macros don't always refer to it */
 968 #endif
 969 
 970         STRUCT_SET_HANDLE(uap, model, arg);
 971 
 972         if (STRUCT_FGET(uap, cmd) == NFS4_DQUERY)
 973                 return (nfs4_dquery(arg, model));
 974 
 975         if (secpolicy_nfs(CRED()) != 0)
 976                 return (EPERM);
 977 
 978         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 979                 return (EBADF);
 980 
 981         /*
 982          * Set read buffer size to rsize
 983          * and add room for RPC headers.
 984          */
 985         readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
 986         if (readsize < RPC_MAXDATASIZE)
 987                 readsize = RPC_MAXDATASIZE;
 988 
 989         error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
 990             KNC_STRSIZE, &len);
 991         if (error) {
 992                 releasef(STRUCT_FGET(uap, fd));
 993                 return (error);
 994         }
 995 
 996         cmd = STRUCT_FGET(uap, cmd);
 997 
 998         if (cmd & NFS4_KRPC_START) {
 999                 addrmask.len = STRUCT_FGET(uap, addrmask.len);
1000                 addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
1001                 addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
1002                 error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
1003                     addrmask.len);
1004                 if (error) {
1005                         releasef(STRUCT_FGET(uap, fd));
1006                         kmem_free(addrmask.buf, addrmask.maxlen);
1007                         return (error);
1008                 }
1009         }
1010         else
1011                 addrmask.buf = NULL;
1012 
1013         error = copyinstr((const char *)STRUCT_FGETP(uap, addr), uaddr,
1014             sizeof (uaddr), &len);
1015         if (error) {
1016                 releasef(STRUCT_FGET(uap, fd));
1017                 if (addrmask.buf)
1018                         kmem_free(addrmask.buf, addrmask.maxlen);
1019                 return (error);
1020         }
1021 
1022         error = copyinstr((const char *)STRUCT_FGETP(uap, protofmly), protofmly,
1023             sizeof (protofmly), &len);
1024         if (error) {
1025                 releasef(STRUCT_FGET(uap, fd));
1026                 if (addrmask.buf)
1027                         kmem_free(addrmask.buf, addrmask.maxlen);
1028                 return (error);
1029         }
1030 
1031         error = copyinstr((const char *)STRUCT_FGETP(uap, proto), proto,
1032             sizeof (proto), &len);
1033         if (error) {
1034                 releasef(STRUCT_FGET(uap, fd));
1035                 if (addrmask.buf)
1036                         kmem_free(addrmask.buf, addrmask.maxlen);
1037                 return (error);
1038         }
1039 
1040         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1041         ASSERT(ncg != NULL);
1042 
1043         mutex_enter(&ncg->nfs4_cb_lock);
1044         if (cmd & NFS4_SETPORT)
1045                 nfs4_setport(buf, uaddr, protofmly, proto, ncg);
1046 
1047         if (cmd & NFS4_KRPC_START) {
1048                 error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &cb_xprt,
1049                     &nfs4_cb_sct, NULL, NFS_CB_SVCPOOL_ID, FALSE);
1050                 if (error) {
1051                         CB_WARN1("nfs4_svc: svc_tli_kcreate failed %d\n",
1052                             error);
1053                         kmem_free(addrmask.buf, addrmask.maxlen);
1054                 }
1055         }
1056 
1057         mutex_exit(&ncg->nfs4_cb_lock);
1058         releasef(STRUCT_FGET(uap, fd));
1059         return (error);
1060 }
1061 
1062 struct nfs4_callback_globals *
1063 nfs4_get_callback_globals(void)
1064 {
1065         return (zone_getspecific(nfs4_callback_zone_key, nfs_zone()));
1066 }
1067 
1068 static void *
1069 nfs4_callback_init_zone(zoneid_t zoneid)
1070 {
1071         kstat_t *nfs4_callback_kstat;
1072         struct nfs4_callback_globals *ncg;
1073 
1074         ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP);
1075 
1076         ncg->nfs4prog2server = kmem_zalloc(nfs4_num_prognums *
1077             sizeof (struct nfs4_server *), KM_SLEEP);
1078 
1079         /* initialize the dlist */
1080         mutex_init(&ncg->nfs4_dlist_lock, NULL, MUTEX_DEFAULT, NULL);
1081         list_create(&ncg->nfs4_dlist, sizeof (struct nfs4_dnode),
1082             offsetof(struct nfs4_dnode, linkage));
1083 
1084         /* initialize cb_port list */
1085         mutex_init(&ncg->nfs4_cb_lock, NULL, MUTEX_DEFAULT, NULL);
1086         list_create(&ncg->nfs4_cb_ports, sizeof (struct nfs4_cb_port),
1087             offsetof(struct nfs4_cb_port, linkage));
1088 
1089         /* get our own copy of the kstats */
1090         bcopy(&nfs4_callback_stats_tmpl, &ncg->nfs4_callback_stats,
1091             sizeof (nfs4_callback_stats_tmpl));
1092         /* register "nfs:0:nfs4_callback_stats" for this zone */
1093         if ((nfs4_callback_kstat =
1094             kstat_create_zone("nfs", 0, "nfs4_callback_stats", "misc",
1095             KSTAT_TYPE_NAMED,
1096             sizeof (ncg->nfs4_callback_stats) / sizeof (kstat_named_t),
1097             KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE,
1098             zoneid)) != NULL) {
1099                 nfs4_callback_kstat->ks_data = &ncg->nfs4_callback_stats;
1100                 kstat_install(nfs4_callback_kstat);
1101         }
1102         return (ncg);
1103 }
1104 
1105 static void
1106 nfs4_discard_delegations(struct nfs4_callback_globals *ncg)
1107 {
1108         nfs4_server_t *sp;
1109         int i, num_removed;
1110 
1111         /*
1112          * It's OK here to just run through the registered "programs", as
1113          * servers without programs won't have any delegations to handle.
1114          */
1115         for (i = 0; i < nfs4_num_prognums; i++) {
1116                 rnode4_t *rp;
1117 
1118                 mutex_enter(&ncg->nfs4_cb_lock);
1119                 sp = ncg->nfs4prog2server[i];
1120                 mutex_exit(&ncg->nfs4_cb_lock);
1121 
1122                 if (nfs4_server_vlock(sp, 1) == FALSE)
1123                         continue;
1124                 num_removed = 0;
1125                 while ((rp = list_head(&sp->s_deleg_list)) != NULL) {
1126                         mutex_enter(&rp->r_statev4_lock);
1127                         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1128                                 /*
1129                                  * We need to take matters into our own hands,
1130                                  * as nfs4delegreturn_cleanup_impl() won't
1131                                  * remove this from the list.
1132                                  */
1133                                 list_remove(&sp->s_deleg_list, rp);
1134                                 mutex_exit(&rp->r_statev4_lock);
1135                                 nfs4_dec_state_ref_count_nolock(sp,
1136                                     VTOMI4(RTOV4(rp)));
1137                                 num_removed++;
1138                                 continue;
1139                         }
1140                         mutex_exit(&rp->r_statev4_lock);
1141                         VN_HOLD(RTOV4(rp));
1142                         mutex_exit(&sp->s_lock);
1143                         /*
1144                          * The following will remove the node from the list.
1145                          */
1146                         nfs4delegreturn_cleanup_impl(rp, sp, ncg);
1147                         VN_RELE(RTOV4(rp));
1148                         mutex_enter(&sp->s_lock);
1149                 }
1150                 mutex_exit(&sp->s_lock);
1151                 /* each removed list node reles a reference */
1152                 while (num_removed-- > 0)
1153                         nfs4_server_rele(sp);
1154                 /* remove our reference for nfs4_server_vlock */
1155                 nfs4_server_rele(sp);
1156         }
1157 }
1158 
1159 /* ARGSUSED */
1160 static void
1161 nfs4_callback_shutdown_zone(zoneid_t zoneid, void *data)
1162 {
1163         struct nfs4_callback_globals *ncg = data;
1164 
1165         /*
1166          * Clean pending delegation return list.
1167          */
1168         nfs4_dlistclean_impl(ncg, NFS4_DR_DISCARD);
1169 
1170         /*
1171          * Discard all delegations.
1172          */
1173         nfs4_discard_delegations(ncg);
1174 }
1175 
1176 static void
1177 nfs4_callback_fini_zone(zoneid_t zoneid, void *data)
1178 {
1179         struct nfs4_callback_globals *ncg = data;
1180         struct nfs4_cb_port *p;
1181         nfs4_server_t *sp, *next;
1182         nfs4_server_t freelist;
1183         int i;
1184 
1185         kstat_delete_byname_zone("nfs", 0, "nfs4_callback_stats", zoneid);
1186 
1187         /*
1188          * Discard all delegations that may have crept in since we did the
1189          * _shutdown.
1190          */
1191         nfs4_discard_delegations(ncg);
1192         /*
1193          * We're completely done with this zone and all associated
1194          * nfs4_server_t's.  Any remaining nfs4_server_ts should only have one
1195          * more reference outstanding -- the reference we didn't release in
1196          * nfs4_renew_lease_thread().
1197          *
1198          * Here we need to run through the global nfs4_server_lst as we need to
1199          * deal with nfs4_server_ts without programs, as they also have threads
1200          * created for them, and so have outstanding references that we need to
1201          * release.
1202          */
1203         freelist.forw = &freelist;
1204         freelist.back = &freelist;
1205         mutex_enter(&nfs4_server_lst_lock);
1206         sp = nfs4_server_lst.forw;
1207         while (sp != &nfs4_server_lst) {
1208                 next = sp->forw;
1209                 if (sp->zoneid == zoneid) {
1210                         remque(sp);
1211                         insque(sp, &freelist);
1212                 }
1213                 sp = next;
1214         }
1215         mutex_exit(&nfs4_server_lst_lock);
1216 
1217         sp = freelist.forw;
1218         while (sp != &freelist) {
1219                 next = sp->forw;
1220                 nfs4_server_rele(sp);   /* free the list's reference */
1221                 sp = next;
1222         }
1223 
1224 #ifdef DEBUG
1225         for (i = 0; i < nfs4_num_prognums; i++) {
1226                 ASSERT(ncg->nfs4prog2server[i] == NULL);
1227         }
1228 #endif
1229         kmem_free(ncg->nfs4prog2server, nfs4_num_prognums *
1230             sizeof (struct nfs4_server *));
1231 
1232         mutex_enter(&ncg->nfs4_cb_lock);
1233         while ((p = list_head(&ncg->nfs4_cb_ports)) != NULL) {
1234                 list_remove(&ncg->nfs4_cb_ports, p);
1235                 kmem_free(p, sizeof (*p));
1236         }
1237         list_destroy(&ncg->nfs4_cb_ports);
1238         mutex_destroy(&ncg->nfs4_cb_lock);
1239         list_destroy(&ncg->nfs4_dlist);
1240         mutex_destroy(&ncg->nfs4_dlist_lock);
1241         kmem_free(ncg, sizeof (*ncg));
1242 }
1243 
1244 void
1245 nfs4_callback_init(void)
1246 {
1247         int i;
1248         SVC_CALLOUT *nfs4_cb_sc;
1249 
1250         /* initialize the callback table */
1251         nfs4_cb_sc = kmem_alloc(nfs4_num_prognums *
1252             sizeof (SVC_CALLOUT), KM_SLEEP);
1253 
1254         for (i = 0; i < nfs4_num_prognums; i++) {
1255                 nfs4_cb_sc[i].sc_prog = NFS4_CALLBACK+i;
1256                 nfs4_cb_sc[i].sc_versmin = NFS_CB;
1257                 nfs4_cb_sc[i].sc_versmax = NFS_CB;
1258                 nfs4_cb_sc[i].sc_dispatch = cb_dispatch;
1259         }
1260 
1261         nfs4_cb_sct.sct_size = nfs4_num_prognums;
1262         nfs4_cb_sct.sct_free = FALSE;
1263         nfs4_cb_sct.sct_sc = nfs4_cb_sc;
1264 
1265         /*
1266          * Compute max bytes required for dyamically allocated parts
1267          * of cb_getattr reply.  Only size and change are supported now.
1268          * If CB_GETATTR is changed to reply with additional attrs,
1269          * additional sizes must be added below.
1270          *
1271          * fattr4_change + fattr4_size == uint64_t + uint64_t
1272          */
1273         cb_getattr_bytes = 2 * BYTES_PER_XDR_UNIT + 2 * BYTES_PER_XDR_UNIT;
1274 
1275         zone_key_create(&nfs4_callback_zone_key, nfs4_callback_init_zone,
1276             nfs4_callback_shutdown_zone, nfs4_callback_fini_zone);
1277 }
1278 
1279 void
1280 nfs4_callback_fini(void)
1281 {
1282 }
1283 
1284 /*
1285  * NB: This function can be called from the *wrong* zone (ie, the zone that
1286  * 'rp' belongs to and the caller's zone may not be the same).  This can happen
1287  * if the zone is going away and we get called from nfs4_async_inactive().  In
1288  * this case the globals will be NULL and we won't update the counters, which
1289  * doesn't matter as the zone is going away anyhow.
1290  */
1291 static void
1292 nfs4delegreturn_cleanup_impl(rnode4_t *rp, nfs4_server_t *np,
1293         struct nfs4_callback_globals *ncg)
1294 {
1295         mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1296         boolean_t need_rele = B_FALSE;
1297 
1298         /*
1299          * Caller must be holding mi_recovlock in read mode
1300          * to call here.  This is provided by start_op.
1301          * Delegation management requires to grab s_lock
1302          * first and then r_statev4_lock.
1303          */
1304 
1305         if (np == NULL) {
1306                 np = find_nfs4_server_all(mi, 1);
1307                 if (np == NULL)
1308                         return;
1309                 need_rele = B_TRUE;
1310         } else {
1311                 mutex_enter(&np->s_lock);
1312         }
1313 
1314         mutex_enter(&rp->r_statev4_lock);
1315 
1316         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1317                 mutex_exit(&rp->r_statev4_lock);
1318                 mutex_exit(&np->s_lock);
1319                 if (need_rele)
1320                         nfs4_server_rele(np);
1321                 return;
1322         }
1323 
1324         /*
1325          * Free the cred originally held when
1326          * the delegation was granted.  Caller must
1327          * hold this cred if it wants to use it after
1328          * this call.
1329          */
1330         crfree(rp->r_deleg_cred);
1331         rp->r_deleg_cred = NULL;
1332         rp->r_deleg_type = OPEN_DELEGATE_NONE;
1333         rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1334         rp->r_deleg_needs_recall = FALSE;
1335         rp->r_deleg_return_pending = FALSE;
1336 
1337         /*
1338          * Remove the rnode from the server's list and
1339          * update the ref counts.
1340          */
1341         list_remove(&np->s_deleg_list, rp);
1342         mutex_exit(&rp->r_statev4_lock);
1343         nfs4_dec_state_ref_count_nolock(np, mi);
1344         mutex_exit(&np->s_lock);
1345         /* removed list node removes a reference */
1346         nfs4_server_rele(np);
1347         if (need_rele)
1348                 nfs4_server_rele(np);
1349         if (ncg != NULL)
1350                 ncg->nfs4_callback_stats.delegations.value.ui64--;
1351 }
1352 
1353 void
1354 nfs4delegreturn_cleanup(rnode4_t *rp, nfs4_server_t *np)
1355 {
1356         struct nfs4_callback_globals *ncg;
1357 
1358         if (np != NULL) {
1359                 ncg = np->zone_globals;
1360         } else if (nfs_zone() == VTOMI4(RTOV4(rp))->mi_zone) {
1361                 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1362                 ASSERT(ncg != NULL);
1363         } else {
1364                 /*
1365                  * Request coming from the wrong zone.
1366                  */
1367                 ASSERT(getzoneid() == GLOBAL_ZONEID);
1368                 ncg = NULL;
1369         }
1370 
1371         nfs4delegreturn_cleanup_impl(rp, np, ncg);
1372 }
1373 
1374 static void
1375 nfs4delegreturn_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
1376         cred_t *cr, vnode_t *vp)
1377 {
1378         if (error != ETIMEDOUT && error != EINTR &&
1379             !NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
1380                 lost_rqstp->lr_op = 0;
1381                 return;
1382         }
1383 
1384         NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
1385             "nfs4close_save_lost_rqst: error %d", error));
1386 
1387         lost_rqstp->lr_op = OP_DELEGRETURN;
1388         /*
1389          * The vp is held and rele'd via the recovery code.
1390          * See nfs4_save_lost_rqst.
1391          */
1392         lost_rqstp->lr_vp = vp;
1393         lost_rqstp->lr_dvp = NULL;
1394         lost_rqstp->lr_oop = NULL;
1395         lost_rqstp->lr_osp = NULL;
1396         lost_rqstp->lr_lop = NULL;
1397         lost_rqstp->lr_cr = cr;
1398         lost_rqstp->lr_flk = NULL;
1399         lost_rqstp->lr_putfirst = FALSE;
1400 }
1401 
1402 static void
1403 nfs4delegreturn_otw(rnode4_t *rp, cred_t *cr, nfs4_error_t *ep)
1404 {
1405         COMPOUND4args_clnt args;
1406         COMPOUND4res_clnt res;
1407         nfs_argop4 argops[3];
1408         nfs4_ga_res_t *garp = NULL;
1409         hrtime_t t;
1410         int numops;
1411         int doqueue = 1;
1412 
1413         args.ctag = TAG_DELEGRETURN;
1414 
1415         numops = 3;             /* PUTFH, GETATTR, DELEGRETURN */
1416 
1417         args.array = argops;
1418         args.array_len = numops;
1419 
1420         argops[0].argop = OP_CPUTFH;
1421         argops[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
1422 
1423         argops[1].argop = OP_GETATTR;
1424         argops[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
1425         argops[1].nfs_argop4_u.opgetattr.mi = VTOMI4(RTOV4(rp));
1426 
1427         argops[2].argop = OP_DELEGRETURN;
1428         argops[2].nfs_argop4_u.opdelegreturn.deleg_stateid =
1429             rp->r_deleg_stateid;
1430 
1431         t = gethrtime();
1432         rfs4call(VTOMI4(RTOV4(rp)), &args, &res, cr, &doqueue, 0, ep);
1433 
1434         if (ep->error)
1435                 return;
1436 
1437         if (res.status == NFS4_OK) {
1438                 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res;
1439                 nfs4_attr_cache(RTOV4(rp), garp, t, cr, TRUE, NULL);
1440 
1441         }
1442         (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1443 }
1444 
1445 int
1446 nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr,
1447         struct nfs4_callback_globals *ncg)
1448 {
1449         vnode_t *vp = RTOV4(rp);
1450         mntinfo4_t *mi = VTOMI4(vp);
1451         nfs4_lost_rqst_t lost_rqst;
1452         nfs4_recov_state_t recov_state;
1453         bool_t needrecov = FALSE, recovonly, done = FALSE;
1454         nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1455 
1456         ncg->nfs4_callback_stats.delegreturn.value.ui64++;
1457 
1458         while (!done) {
1459                 e.error = nfs4_start_fop(mi, vp, NULL, OH_DELEGRETURN,
1460                     &recov_state, &recovonly);
1461 
1462                 if (e.error) {
1463                         if (flags & NFS4_DR_FORCE) {
1464                                 (void) nfs_rw_enter_sig(&mi->mi_recovlock,
1465                                     RW_READER, 0);
1466                                 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1467                                 nfs_rw_exit(&mi->mi_recovlock);
1468                         }
1469                         break;
1470                 }
1471 
1472                 /*
1473                  * Check to see if the delegation has already been
1474                  * returned by the recovery thread.   The state of
1475                  * the delegation cannot change at this point due
1476                  * to start_fop and the r_deleg_recall_lock.
1477                  */
1478                 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1479                         e.error = 0;
1480                         nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1481                         break;
1482                 }
1483 
1484                 if (recovonly) {
1485                         /*
1486                          * Delegation will be returned via the
1487                          * recovery framework.  Build a lost request
1488                          * structure, start recovery and get out.
1489                          */
1490                         nfs4_error_init(&e, EINTR);
1491                         nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1492                             cr, vp);
1493                         (void) nfs4_start_recovery(&e, mi, vp,
1494                             NULL, &rp->r_deleg_stateid,
1495                             lost_rqst.lr_op == OP_DELEGRETURN ?
1496                             &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1497                             NULL, NULL);
1498                         nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1499                         break;
1500                 }
1501 
1502                 nfs4delegreturn_otw(rp, cr, &e);
1503 
1504                 /*
1505                  * Ignore some errors on delegreturn; no point in marking
1506                  * the file dead on a state destroying operation.
1507                  */
1508                 if (e.error == 0 && (nfs4_recov_marks_dead(e.stat) ||
1509                     e.stat == NFS4ERR_BADHANDLE ||
1510                     e.stat == NFS4ERR_STALE))
1511                         needrecov = FALSE;
1512                 else
1513                         needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1514 
1515                 if (needrecov) {
1516                         nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1517                             cr, vp);
1518                         (void) nfs4_start_recovery(&e, mi, vp,
1519                             NULL, &rp->r_deleg_stateid,
1520                             lost_rqst.lr_op == OP_DELEGRETURN ?
1521                             &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1522                             NULL, NULL);
1523                 } else {
1524                         nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1525                         done = TRUE;
1526                 }
1527 
1528                 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1529         }
1530         return (e.error);
1531 }
1532 
1533 /*
1534  * nfs4_resend_delegreturn - used to drive the delegreturn
1535  * operation via the recovery thread.
1536  */
1537 void
1538 nfs4_resend_delegreturn(nfs4_lost_rqst_t *lorp, nfs4_error_t *ep,
1539         nfs4_server_t *np)
1540 {
1541         rnode4_t *rp = VTOR4(lorp->lr_vp);
1542 
1543         /* If the file failed recovery, just quit. */
1544         mutex_enter(&rp->r_statelock);
1545         if (rp->r_flags & R4RECOVERR) {
1546                 ep->error = EIO;
1547         }
1548         mutex_exit(&rp->r_statelock);
1549 
1550         if (!ep->error)
1551                 nfs4delegreturn_otw(rp, lorp->lr_cr, ep);
1552 
1553         /*
1554          * If recovery is now needed, then return the error
1555          * and status and let the recovery thread handle it,
1556          * including re-driving another delegreturn.  Otherwise,
1557          * just give up and clean up the delegation.
1558          */
1559         if (nfs4_needs_recovery(ep, TRUE, lorp->lr_vp->v_vfsp))
1560                 return;
1561 
1562         if (rp->r_deleg_type != OPEN_DELEGATE_NONE)
1563                 nfs4delegreturn_cleanup(rp, np);
1564 
1565         nfs4_error_zinit(ep);
1566 }
1567 
1568 /*
1569  * nfs4delegreturn - general function to return a delegation.
1570  *
1571  * NFS4_DR_FORCE - return the delegation even if start_op fails
1572  * NFS4_DR_PUSH - push modified data back to the server via VOP_PUTPAGE
1573  * NFS4_DR_DISCARD - discard the delegation w/o delegreturn
1574  * NFS4_DR_DID_OP - calling function already did nfs4_start_op
1575  * NFS4_DR_RECALL - delegreturned initiated via CB_RECALL
1576  * NFS4_DR_REOPEN - do file reopens, if applicable
1577  */
1578 static int
1579 nfs4delegreturn_impl(rnode4_t *rp, int flags, struct nfs4_callback_globals *ncg)
1580 {
1581         int error = 0;
1582         cred_t *cr = NULL;
1583         vnode_t *vp;
1584         bool_t needrecov = FALSE;
1585         bool_t rw_entered = FALSE;
1586         bool_t do_reopen;
1587 
1588         vp = RTOV4(rp);
1589 
1590         /*
1591          * If NFS4_DR_DISCARD is set by itself, take a short-cut and
1592          * discard without doing an otw DELEGRETURN.  This may only be used
1593          * by the recovery thread because it bypasses the synchronization
1594          * with r_deleg_recall_lock and mi->mi_recovlock.
1595          */
1596         if (flags == NFS4_DR_DISCARD) {
1597                 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1598                 return (0);
1599         }
1600 
1601         if (flags & NFS4_DR_DID_OP) {
1602                 /*
1603                  * Caller had already done start_op, which means the
1604                  * r_deleg_recall_lock is already held in READ mode
1605                  * so we cannot take it in write mode.  Return the
1606                  * delegation asynchronously.
1607                  *
1608                  * Remove the NFS4_DR_DID_OP flag so we don't
1609                  * get stuck looping through here.
1610                  */
1611                 VN_HOLD(vp);
1612                 nfs4delegreturn_async(rp, (flags & ~NFS4_DR_DID_OP), FALSE);
1613                 return (0);
1614         }
1615 
1616         /*
1617          * Verify we still have a delegation and crhold the credential.
1618          */
1619         mutex_enter(&rp->r_statev4_lock);
1620         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1621                 mutex_exit(&rp->r_statev4_lock);
1622                 goto out;
1623         }
1624         cr = rp->r_deleg_cred;
1625         ASSERT(cr != NULL);
1626         crhold(cr);
1627         mutex_exit(&rp->r_statev4_lock);
1628 
1629         /*
1630          * Push the modified data back to the server synchronously
1631          * before doing DELEGRETURN.
1632          */
1633         if (flags & NFS4_DR_PUSH)
1634                 (void) VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
1635 
1636         /*
1637          * Take r_deleg_recall_lock in WRITE mode, this will prevent
1638          * nfs4_is_otw_open_necessary from trying to use the delegation
1639          * while the DELEGRETURN is in progress.
1640          */
1641         (void) nfs_rw_enter_sig(&rp->r_deleg_recall_lock, RW_WRITER, FALSE);
1642 
1643         rw_entered = TRUE;
1644 
1645         if (rp->r_deleg_type == OPEN_DELEGATE_NONE)
1646                 goto out;
1647 
1648         if (flags & NFS4_DR_REOPEN) {
1649                 /*
1650                  * If R4RECOVERRP is already set, then skip re-opening
1651                  * the delegation open streams and go straight to doing
1652                  * delegreturn.  (XXX if the file has failed recovery, then the
1653                  * delegreturn attempt is likely to be futile.)
1654                  */
1655                 mutex_enter(&rp->r_statelock);
1656                 do_reopen = !(rp->r_flags & R4RECOVERRP);
1657                 mutex_exit(&rp->r_statelock);
1658 
1659                 if (do_reopen) {
1660                         error = deleg_reopen(vp, &needrecov, ncg, flags);
1661                         if (error != 0) {
1662                                 if ((flags & (NFS4_DR_FORCE | NFS4_DR_RECALL))
1663                                     == 0)
1664                                         goto out;
1665                         } else if (needrecov) {
1666                                 if ((flags & NFS4_DR_FORCE) == 0)
1667                                         goto out;
1668                         }
1669                 }
1670         }
1671 
1672         if (flags & NFS4_DR_DISCARD) {
1673                 mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1674 
1675                 mutex_enter(&rp->r_statelock);
1676                 /*
1677                  * deleg_return_pending is cleared inside of delegation_accept
1678                  * when a delegation is accepted.  if this flag has been
1679                  * cleared, then a new delegation has overwritten the one we
1680                  * were about to throw away.
1681                  */
1682                 if (!rp->r_deleg_return_pending) {
1683                         mutex_exit(&rp->r_statelock);
1684                         goto out;
1685                 }
1686                 mutex_exit(&rp->r_statelock);
1687                 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE);
1688                 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1689                 nfs_rw_exit(&mi->mi_recovlock);
1690         } else {
1691                 error = nfs4_do_delegreturn(rp, flags, cr, ncg);
1692         }
1693 
1694 out:
1695         if (cr)
1696                 crfree(cr);
1697         if (rw_entered)
1698                 nfs_rw_exit(&rp->r_deleg_recall_lock);
1699         return (error);
1700 }
1701 
1702 int
1703 nfs4delegreturn(rnode4_t *rp, int flags)
1704 {
1705         struct nfs4_callback_globals *ncg;
1706 
1707         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1708         ASSERT(ncg != NULL);
1709 
1710         return (nfs4delegreturn_impl(rp, flags, ncg));
1711 }
1712 
1713 void
1714 nfs4delegreturn_async(rnode4_t *rp, int flags, bool_t trunc)
1715 {
1716         struct cb_recall_pass *pp;
1717 
1718         pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
1719         pp->rp = rp;
1720         pp->flags = flags;
1721         pp->truncate = trunc;
1722 
1723         /*
1724          * Fire up a thread to do the actual delegreturn
1725          * Caller must guarantee that the rnode doesn't
1726          * vanish (by calling VN_HOLD).
1727          */
1728 
1729         (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
1730             minclsyspri);
1731 }
1732 
1733 static void
1734 delegreturn_all_thread(rpcprog_t *pp)
1735 {
1736         nfs4_server_t *np;
1737         bool_t found = FALSE;
1738         rpcprog_t prog;
1739         rnode4_t *rp;
1740         vnode_t *vp;
1741         zoneid_t zoneid = getzoneid();
1742         struct nfs4_callback_globals *ncg;
1743 
1744         NFS4_DEBUG(nfs4_drat_debug,
1745             (CE_NOTE, "delereturn_all_thread: prog %d\n", *pp));
1746 
1747         prog = *pp;
1748         kmem_free(pp, sizeof (*pp));
1749         pp = NULL;
1750 
1751         mutex_enter(&nfs4_server_lst_lock);
1752         for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
1753                 if (np->zoneid == zoneid && np->s_program == prog) {
1754                         mutex_enter(&np->s_lock);
1755                         found = TRUE;
1756                         break;
1757                 }
1758         }
1759         mutex_exit(&nfs4_server_lst_lock);
1760 
1761         /*
1762          * It's possible that the nfs4_server which was using this
1763          * program number has vanished since this thread is async.
1764          * If so, just return.  Your work here is finished, my friend.
1765          */
1766         if (!found)
1767                 goto out;
1768 
1769         ncg = np->zone_globals;
1770         while ((rp = list_head(&np->s_deleg_list)) != NULL) {
1771                 vp = RTOV4(rp);
1772                 VN_HOLD(vp);
1773                 mutex_exit(&np->s_lock);
1774                 (void) nfs4delegreturn_impl(rp, NFS4_DR_PUSH|NFS4_DR_REOPEN,
1775                     ncg);
1776                 VN_RELE(vp);
1777 
1778                 /* retake the s_lock for next trip through the loop */
1779                 mutex_enter(&np->s_lock);
1780         }
1781         mutex_exit(&np->s_lock);
1782 out:
1783         NFS4_DEBUG(nfs4_drat_debug,
1784             (CE_NOTE, "delereturn_all_thread: complete\n"));
1785         zthread_exit();
1786 }
1787 
1788 void
1789 nfs4_delegreturn_all(nfs4_server_t *sp)
1790 {
1791         rpcprog_t pro, *pp;
1792 
1793         mutex_enter(&sp->s_lock);
1794 
1795         /* Check to see if the delegation list is empty */
1796 
1797         if (list_head(&sp->s_deleg_list) == NULL) {
1798                 mutex_exit(&sp->s_lock);
1799                 return;
1800         }
1801         /*
1802          * Grab the program number; the async thread will use this
1803          * to find the nfs4_server.
1804          */
1805         pro = sp->s_program;
1806         mutex_exit(&sp->s_lock);
1807         pp = kmem_alloc(sizeof (rpcprog_t), KM_SLEEP);
1808         *pp = pro;
1809         (void) zthread_create(NULL, 0, delegreturn_all_thread, pp, 0,
1810             minclsyspri);
1811 }
1812 
1813 
1814 /*
1815  * Discard any delegations
1816  *
1817  * Iterate over the servers s_deleg_list and
1818  * for matching mount-point rnodes discard
1819  * the delegation.
1820  */
1821 void
1822 nfs4_deleg_discard(mntinfo4_t *mi, nfs4_server_t *sp)
1823 {
1824         rnode4_t *rp, *next;
1825         mntinfo4_t *r_mi;
1826         struct nfs4_callback_globals *ncg;
1827 
1828         ASSERT(mutex_owned(&sp->s_lock));
1829         ncg = sp->zone_globals;
1830 
1831         for (rp = list_head(&sp->s_deleg_list); rp != NULL; rp = next) {
1832                 r_mi = VTOMI4(RTOV4(rp));
1833                 next = list_next(&sp->s_deleg_list, rp);
1834 
1835                 if (r_mi != mi) {
1836                         /*
1837                          * Skip if this rnode is in not on the
1838                          * same mount-point
1839                          */
1840                         continue;
1841                 }
1842 
1843                 ASSERT(rp->r_deleg_type == OPEN_DELEGATE_READ);
1844 
1845 #ifdef DEBUG
1846                 if (nfs4_client_recov_debug) {
1847                         zprintf(getzoneid(),
1848                             "nfs4_deleg_discard: matched rnode %p "
1849                         "-- discarding delegation\n", (void *)rp);
1850                 }
1851 #endif
1852                 mutex_enter(&rp->r_statev4_lock);
1853                 /*
1854                  * Free the cred originally held when the delegation
1855                  * was granted. Also need to decrement the refcnt
1856                  * on this server for each delegation we discard
1857                  */
1858                 if (rp->r_deleg_cred)
1859                         crfree(rp->r_deleg_cred);
1860                 rp->r_deleg_cred = NULL;
1861                 rp->r_deleg_type = OPEN_DELEGATE_NONE;
1862                 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1863                 rp->r_deleg_needs_recall = FALSE;
1864                 ASSERT(sp->s_refcnt > 1);
1865                 sp->s_refcnt--;
1866                 list_remove(&sp->s_deleg_list, rp);
1867                 mutex_exit(&rp->r_statev4_lock);
1868                 nfs4_dec_state_ref_count_nolock(sp, mi);
1869                 ncg->nfs4_callback_stats.delegations.value.ui64--;
1870         }
1871 }
1872 
1873 /*
1874  * Reopen any open streams that were covered by the given file's
1875  * delegation.
1876  * Returns zero or an errno value.  If there was no error, *recovp
1877  * indicates whether recovery was initiated.
1878  */
1879 
1880 static int
1881 deleg_reopen(vnode_t *vp, bool_t *recovp, struct nfs4_callback_globals *ncg,
1882         int flags)
1883 {
1884         nfs4_open_stream_t *osp;
1885         nfs4_recov_state_t recov_state;
1886         bool_t needrecov = FALSE;
1887         mntinfo4_t *mi;
1888         rnode4_t *rp;
1889         nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1890         int claimnull;
1891 
1892         mi = VTOMI4(vp);
1893         rp = VTOR4(vp);
1894 
1895         recov_state.rs_flags = 0;
1896         recov_state.rs_num_retry_despite_err = 0;
1897 
1898 retry:
1899         if ((e.error = nfs4_start_op(mi, vp, NULL, &recov_state)) != 0) {
1900                 return (e.error);
1901         }
1902 
1903         /*
1904          * if we mean to discard the delegation, it must be BAD, so don't
1905          * use it when doing the reopen or it will fail too.
1906          */
1907         claimnull = (flags & NFS4_DR_DISCARD);
1908         /*
1909          * Loop through the open streams for this rnode to find
1910          * all of the ones created using the delegation state ID.
1911          * Each of these needs to be re-opened.
1912          */
1913 
1914         while ((osp = get_next_deleg_stream(rp, claimnull)) != NULL) {
1915 
1916                 if (claimnull) {
1917                         nfs4_reopen(vp, osp, &e, CLAIM_NULL, FALSE, FALSE);
1918                 } else {
1919                         ncg->nfs4_callback_stats.claim_cur.value.ui64++;
1920 
1921                         nfs4_reopen(vp, osp, &e, CLAIM_DELEGATE_CUR, FALSE,
1922                             FALSE);
1923                         if (e.error == 0 && e.stat == NFS4_OK)
1924                                 ncg->nfs4_callback_stats.
1925                                     claim_cur_ok.value.ui64++;
1926                 }
1927 
1928                 if (e.error == EAGAIN) {
1929                         nfs4_end_op(mi, vp, NULL, &recov_state, TRUE);
1930                         goto retry;
1931                 }
1932 
1933                 /*
1934                  * if error is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, then
1935                  * recovery has already been started inside of nfs4_reopen.
1936                  */
1937                 if (e.error == EINTR || e.error == ETIMEDOUT ||
1938                     NFS4_FRC_UNMT_ERR(e.error, vp->v_vfsp)) {
1939                         open_stream_rele(osp, rp);
1940                         break;
1941                 }
1942 
1943                 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1944 
1945                 if (e.error != 0 && !needrecov) {
1946                         /*
1947                          * Recovery is not possible, but don't give up yet;
1948                          * we'd still like to do delegreturn after
1949                          * reopening as many streams as possible.
1950                          * Continue processing the open streams.
1951                          */
1952 
1953                         ncg->nfs4_callback_stats.recall_failed.value.ui64++;
1954 
1955                 } else if (needrecov) {
1956                         /*
1957                          * Start recovery and bail out.  The recovery
1958                          * thread will take it from here.
1959                          */
1960                         (void) nfs4_start_recovery(&e, mi, vp, NULL, NULL,
1961                             NULL, OP_OPEN, NULL, NULL, NULL);
1962                         open_stream_rele(osp, rp);
1963                         *recovp = TRUE;
1964                         break;
1965                 }
1966 
1967                 open_stream_rele(osp, rp);
1968         }
1969 
1970         nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1971 
1972         return (e.error);
1973 }
1974 
1975 /*
1976  * get_next_deleg_stream - returns the next open stream which
1977  * represents a delegation for this rnode.  In order to assure
1978  * forward progress, the caller must guarantee that each open
1979  * stream returned is changed so that a future call won't return
1980  * it again.
1981  *
1982  * There are several ways for the open stream to change.  If the open
1983  * stream is !os_delegation, then we aren't interested in it.  Also, if
1984  * either os_failed_reopen or !os_valid, then don't return the osp.
1985  *
1986  * If claimnull is false (doing reopen CLAIM_DELEGATE_CUR) then return
1987  * the osp if it is an os_delegation open stream.  Also, if the rnode still
1988  * has r_deleg_return_pending, then return the os_delegation osp.  Lastly,
1989  * if the rnode's r_deleg_stateid is different from the osp's open_stateid,
1990  * then return the osp.
1991  *
1992  * We have already taken the 'r_deleg_recall_lock' as WRITER, which
1993  * prevents new OPENs from going OTW (as start_fop takes this
1994  * lock in READ mode); thus, no new open streams can be created
1995  * (which inherently means no new delegation open streams are
1996  * being created).
1997  */
1998 
1999 static nfs4_open_stream_t *
2000 get_next_deleg_stream(rnode4_t *rp, int claimnull)
2001 {
2002         nfs4_open_stream_t      *osp;
2003 
2004         ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_WRITER));
2005 
2006         /*
2007          * Search through the list of open streams looking for
2008          * one that was created while holding the delegation.
2009          */
2010         mutex_enter(&rp->r_os_lock);
2011         for (osp = list_head(&rp->r_open_streams); osp != NULL;
2012             osp = list_next(&rp->r_open_streams, osp)) {
2013                 mutex_enter(&osp->os_sync_lock);
2014                 if (!osp->os_delegation || osp->os_failed_reopen ||
2015                     !osp->os_valid) {
2016                         mutex_exit(&osp->os_sync_lock);
2017                         continue;
2018                 }
2019                 if (!claimnull || rp->r_deleg_return_pending ||
2020                     !stateid4_cmp(&osp->open_stateid, &rp->r_deleg_stateid)) {
2021                         osp->os_ref_count++;
2022                         mutex_exit(&osp->os_sync_lock);
2023                         mutex_exit(&rp->r_os_lock);
2024                         return (osp);
2025                 }
2026                 mutex_exit(&osp->os_sync_lock);
2027         }
2028         mutex_exit(&rp->r_os_lock);
2029 
2030         return (NULL);
2031 }
2032 
2033 static void
2034 nfs4delegreturn_thread(struct cb_recall_pass *args)
2035 {
2036         rnode4_t *rp;
2037         vnode_t *vp;
2038         cred_t *cr;
2039         int dtype, error, flags;
2040         bool_t rdirty, rip;
2041         kmutex_t cpr_lock;
2042         callb_cpr_t cpr_info;
2043         struct nfs4_callback_globals *ncg;
2044 
2045         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2046         ASSERT(ncg != NULL);
2047 
2048         mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
2049 
2050         CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,
2051             "nfsv4delegRtn");
2052 
2053         rp = args->rp;
2054         vp = RTOV4(rp);
2055 
2056         mutex_enter(&rp->r_statev4_lock);
2057         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2058                 mutex_exit(&rp->r_statev4_lock);
2059                 goto out;
2060         }
2061         mutex_exit(&rp->r_statev4_lock);
2062 
2063         /*
2064          * Take the read-write lock in read mode to prevent other
2065          * threads from modifying the data during the recall.  This
2066          * doesn't affect mmappers.
2067          */
2068         (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE);
2069 
2070         /* Proceed with delegreturn */
2071 
2072         mutex_enter(&rp->r_statev4_lock);
2073         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2074                 mutex_exit(&rp->r_statev4_lock);
2075                 nfs_rw_exit(&rp->r_rwlock);
2076                 goto out;
2077         }
2078         dtype = rp->r_deleg_type;
2079         cr = rp->r_deleg_cred;
2080         ASSERT(cr != NULL);
2081         crhold(cr);
2082         mutex_exit(&rp->r_statev4_lock);
2083 
2084         flags = args->flags;
2085 
2086         /*
2087          * If the file is being truncated at the server, then throw
2088          * away all of the pages, it doesn't matter what flavor of
2089          * delegation we have.
2090          */
2091 
2092         if (args->truncate) {
2093                 ncg->nfs4_callback_stats.recall_trunc.value.ui64++;
2094                 nfs4_invalidate_pages(vp, 0, cr);
2095         } else if (dtype == OPEN_DELEGATE_WRITE) {
2096 
2097                 mutex_enter(&rp->r_statelock);
2098                 rdirty = rp->r_flags & R4DIRTY;
2099                 mutex_exit(&rp->r_statelock);
2100 
2101                 if (rdirty) {
2102                         error = VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
2103 
2104                         if (error)
2105                                 CB_WARN1("nfs4delegreturn_thread:"
2106                                 " VOP_PUTPAGE: %d\n", error);
2107                 }
2108                 /* turn off NFS4_DR_PUSH because we just did that above. */
2109                 flags &= ~NFS4_DR_PUSH;
2110         }
2111 
2112         mutex_enter(&rp->r_statelock);
2113         rip =  rp->r_flags & R4RECOVERRP;
2114         mutex_exit(&rp->r_statelock);
2115 
2116         /* If a failed recovery is indicated, discard the pages */
2117 
2118         if (rip) {
2119 
2120                 error = VOP_PUTPAGE(vp, 0, 0, B_INVAL, cr, NULL);
2121 
2122                 if (error)
2123                         CB_WARN1("nfs4delegreturn_thread: VOP_PUTPAGE: %d\n",
2124                             error);
2125         }
2126 
2127         /*
2128          * Pass the flags to nfs4delegreturn_impl, but be sure not to pass
2129          * NFS4_DR_DID_OP, which just calls nfs4delegreturn_async again.
2130          */
2131         flags &= ~NFS4_DR_DID_OP;
2132 
2133         (void) nfs4delegreturn_impl(rp, flags, ncg);
2134 
2135         nfs_rw_exit(&rp->r_rwlock);
2136         crfree(cr);
2137 out:
2138         kmem_free(args, sizeof (struct cb_recall_pass));
2139         VN_RELE(vp);
2140         mutex_enter(&cpr_lock);
2141         CALLB_CPR_EXIT(&cpr_info);
2142         mutex_destroy(&cpr_lock);
2143         zthread_exit();
2144 }
2145 
2146 /*
2147  * This function has one assumption that the caller of this function is
2148  * either doing recovery (therefore cannot call nfs4_start_op) or has
2149  * already called nfs4_start_op().
2150  */
2151 void
2152 nfs4_delegation_accept(rnode4_t *rp, open_claim_type4 claim, OPEN4res *res,
2153         nfs4_ga_res_t *garp, cred_t *cr)
2154 {
2155         open_read_delegation4 *orp;
2156         open_write_delegation4 *owp;
2157         nfs4_server_t *np;
2158         bool_t already = FALSE;
2159         bool_t recall = FALSE;
2160         bool_t valid_garp = TRUE;
2161         bool_t delegation_granted = FALSE;
2162         bool_t dr_needed = FALSE;
2163         bool_t recov;
2164         int dr_flags = 0;
2165         long mapcnt;
2166         uint_t rflag;
2167         mntinfo4_t *mi;
2168         struct nfs4_callback_globals *ncg;
2169         open_delegation_type4 odt;
2170 
2171         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2172         ASSERT(ncg != NULL);
2173 
2174         mi = VTOMI4(RTOV4(rp));
2175 
2176         /*
2177          * Accept a delegation granted to the client via an OPEN.
2178          * Set the delegation fields in the rnode and insert the
2179          * rnode onto the list anchored in the nfs4_server_t.  The
2180          * proper locking order requires the nfs4_server_t first,
2181          * even though it may not be needed in all cases.
2182          *
2183          * NB: find_nfs4_server returns with s_lock held.
2184          */
2185 
2186         if ((np = find_nfs4_server(mi)) == NULL)
2187                 return;
2188 
2189         /* grab the statelock too, for examining r_mapcnt */
2190         mutex_enter(&rp->r_statelock);
2191         mutex_enter(&rp->r_statev4_lock);
2192 
2193         if (rp->r_deleg_type == OPEN_DELEGATE_READ ||
2194             rp->r_deleg_type == OPEN_DELEGATE_WRITE)
2195                 already = TRUE;
2196 
2197         odt = res->delegation.delegation_type;
2198 
2199         if (odt == OPEN_DELEGATE_READ) {
2200 
2201                 rp->r_deleg_type = res->delegation.delegation_type;
2202                 orp = &res->delegation.open_delegation4_u.read;
2203                 rp->r_deleg_stateid = orp->stateid;
2204                 rp->r_deleg_perms = orp->permissions;
2205                 if (claim == CLAIM_PREVIOUS)
2206                         if ((recall = orp->recall) != 0)
2207                                 dr_needed = TRUE;
2208 
2209                 delegation_granted = TRUE;
2210 
2211                 ncg->nfs4_callback_stats.delegations.value.ui64++;
2212                 ncg->nfs4_callback_stats.delegaccept_r.value.ui64++;
2213 
2214         } else if (odt == OPEN_DELEGATE_WRITE) {
2215 
2216                 rp->r_deleg_type = res->delegation.delegation_type;
2217                 owp = &res->delegation.open_delegation4_u.write;
2218                 rp->r_deleg_stateid = owp->stateid;
2219                 rp->r_deleg_perms = owp->permissions;
2220                 rp->r_deleg_limit = owp->space_limit;
2221                 if (claim == CLAIM_PREVIOUS)
2222                         if ((recall = owp->recall) != 0)
2223                                 dr_needed = TRUE;
2224 
2225                 delegation_granted = TRUE;
2226 
2227                 if (garp == NULL || !garp->n4g_change_valid) {
2228                         valid_garp = FALSE;
2229                         rp->r_deleg_change = 0;
2230                         rp->r_deleg_change_grant = 0;
2231                 } else {
2232                         rp->r_deleg_change = garp->n4g_change;
2233                         rp->r_deleg_change_grant = garp->n4g_change;
2234                 }
2235                 mapcnt = rp->r_mapcnt;
2236                 rflag = rp->r_flags;
2237 
2238                 /*
2239                  * Update the delegation change attribute if
2240                  * there are mappers for the file is dirty.  This
2241                  * might be the case during recovery after server
2242                  * reboot.
2243                  */
2244                 if (mapcnt > 0 || rflag & R4DIRTY)
2245                         rp->r_deleg_change++;
2246 
2247                 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2248                     "nfs4_delegation_accept: r_deleg_change: 0x%x\n",
2249                     (int)(rp->r_deleg_change >> 32)));
2250                 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2251                     "nfs4_delegation_accept: r_delg_change_grant: 0x%x\n",
2252                     (int)(rp->r_deleg_change_grant >> 32)));
2253 
2254 
2255                 ncg->nfs4_callback_stats.delegations.value.ui64++;
2256                 ncg->nfs4_callback_stats.delegaccept_rw.value.ui64++;
2257         } else if (already) {
2258                 /*
2259                  * No delegation granted.  If the rnode currently has
2260                  * has one, then consider it tainted and return it.
2261                  */
2262                 dr_needed = TRUE;
2263         }
2264 
2265         if (delegation_granted) {
2266                 /* Add the rnode to the list. */
2267                 if (!already) {
2268                         crhold(cr);
2269                         rp->r_deleg_cred = cr;
2270 
2271                         ASSERT(mutex_owned(&np->s_lock));
2272                         list_insert_head(&np->s_deleg_list, rp);
2273                         /* added list node gets a reference */
2274                         np->s_refcnt++;
2275                         nfs4_inc_state_ref_count_nolock(np, mi);
2276                 }
2277                 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
2278         }
2279 
2280         /*
2281          * We've now safely accepted the delegation, if any.  Drop the
2282          * locks and figure out what post-processing is needed.  We'd
2283          * like to retain r_statev4_lock, but nfs4_server_rele takes
2284          * s_lock which would be a lock ordering violation.
2285          */
2286         mutex_exit(&rp->r_statev4_lock);
2287         mutex_exit(&rp->r_statelock);
2288         mutex_exit(&np->s_lock);
2289         nfs4_server_rele(np);
2290 
2291         /*
2292          * Check to see if we are in recovery.  Remember that
2293          * this function is protected by start_op, so a recovery
2294          * cannot begin until we are out of here.
2295          */
2296         mutex_enter(&mi->mi_lock);
2297         recov = mi->mi_recovflags & MI4_RECOV_ACTIV;
2298         mutex_exit(&mi->mi_lock);
2299 
2300         mutex_enter(&rp->r_statev4_lock);
2301 
2302         if (nfs4_delegreturn_policy == IMMEDIATE || !valid_garp)
2303                 dr_needed = TRUE;
2304 
2305         if (dr_needed && rp->r_deleg_return_pending == FALSE) {
2306                 if (recov) {
2307                         /*
2308                          * We cannot call delegreturn from inside
2309                          * of recovery or VOP_PUTPAGE will hang
2310                          * due to nfs4_start_fop call in
2311                          * nfs4write.  Use dlistadd to add the
2312                          * rnode to the list of rnodes needing
2313                          * cleaning.  We do not need to do reopen
2314                          * here because recov_openfiles will do it.
2315                          * In the non-recall case, just discard the
2316                          * delegation as it is no longer valid.
2317                          */
2318                         if (recall)
2319                                 dr_flags = NFS4_DR_PUSH;
2320                         else
2321                                 dr_flags = NFS4_DR_PUSH|NFS4_DR_DISCARD;
2322 
2323                         nfs4_dlistadd(rp, ncg, dr_flags);
2324                         dr_flags = 0;
2325                 } else {
2326                         /*
2327                          * Push the modified data back to the server,
2328                          * reopen any delegation open streams, and return
2329                          * the delegation.  Drop the statev4_lock first!
2330                          */
2331                         dr_flags =  NFS4_DR_PUSH|NFS4_DR_DID_OP|NFS4_DR_REOPEN;
2332                 }
2333         }
2334         mutex_exit(&rp->r_statev4_lock);
2335         if (dr_flags)
2336                 (void) nfs4delegreturn_impl(rp, dr_flags, ncg);
2337 }
2338 
2339 /*
2340  * nfs4delegabandon - Abandon the delegation on an rnode4.  This code
2341  * is called when the client receives EXPIRED, BAD_STATEID, OLD_STATEID
2342  * or BADSEQID and the recovery code is unable to recover.  Push any
2343  * dirty data back to the server and return the delegation (if any).
2344  */
2345 
2346 void
2347 nfs4delegabandon(rnode4_t *rp)
2348 {
2349         vnode_t *vp;
2350         struct cb_recall_pass *pp;
2351         open_delegation_type4 dt;
2352 
2353         mutex_enter(&rp->r_statev4_lock);
2354         dt = rp->r_deleg_type;
2355         mutex_exit(&rp->r_statev4_lock);
2356 
2357         if (dt == OPEN_DELEGATE_NONE)
2358                 return;
2359 
2360         vp = RTOV4(rp);
2361         VN_HOLD(vp);
2362 
2363         pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
2364         pp->rp = rp;
2365         /*
2366          * Recovery on the file has failed and we want to return
2367          * the delegation.  We don't want to reopen files and
2368          * nfs4delegreturn_thread() figures out what to do about
2369          * the data.  The only thing to do is attempt to return
2370          * the delegation.
2371          */
2372         pp->flags = 0;
2373         pp->truncate = FALSE;
2374 
2375         /*
2376          * Fire up a thread to do the delegreturn; this is
2377          * necessary because we could be inside a GETPAGE or
2378          * PUTPAGE and we cannot do another one.
2379          */
2380 
2381         (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
2382             minclsyspri);
2383 }
2384 
2385 static int
2386 wait_for_recall1(vnode_t *vp, nfs4_op_hint_t op, nfs4_recov_state_t *rsp,
2387         int flg)
2388 {
2389         rnode4_t *rp;
2390         int error = 0;
2391 
2392 #ifdef lint
2393         op = op;
2394 #endif
2395 
2396         if (vp && vp->v_type == VREG) {
2397                 rp = VTOR4(vp);
2398 
2399                 /*
2400                  * Take r_deleg_recall_lock in read mode to synchronize
2401                  * with delegreturn.
2402                  */
2403                 error = nfs_rw_enter_sig(&rp->r_deleg_recall_lock,
2404                     RW_READER, INTR4(vp));
2405 
2406                 if (error == 0)
2407                         rsp->rs_flags |= flg;
2408 
2409         }
2410         return (error);
2411 }
2412 
2413 void
2414 nfs4_end_op_recall(vnode_t *vp1, vnode_t *vp2, nfs4_recov_state_t *rsp)
2415 {
2416         NFS4_DEBUG(nfs4_recall_debug,
2417             (CE_NOTE, "nfs4_end_op_recall: 0x%p, 0x%p\n",
2418             (void *)vp1, (void *)vp2));
2419 
2420         if (vp2 && rsp->rs_flags & NFS4_RS_RECALL_HELD2)
2421                 nfs_rw_exit(&VTOR4(vp2)->r_deleg_recall_lock);
2422         if (vp1 && rsp->rs_flags & NFS4_RS_RECALL_HELD1)
2423                 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2424 }
2425 
2426 int
2427 wait_for_recall(vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op,
2428         nfs4_recov_state_t *rsp)
2429 {
2430         int error;
2431 
2432         NFS4_DEBUG(nfs4_recall_debug,
2433             (CE_NOTE, "wait_for_recall:    0x%p, 0x%p\n",
2434             (void *)vp1, (void *) vp2));
2435 
2436         rsp->rs_flags &= ~(NFS4_RS_RECALL_HELD1|NFS4_RS_RECALL_HELD2);
2437 
2438         if ((error = wait_for_recall1(vp1, op, rsp, NFS4_RS_RECALL_HELD1)) != 0)
2439                 return (error);
2440 
2441         if ((error = wait_for_recall1(vp2, op, rsp, NFS4_RS_RECALL_HELD2))
2442             != 0) {
2443                 if (rsp->rs_flags & NFS4_RS_RECALL_HELD1) {
2444                         nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2445                         rsp->rs_flags &= ~NFS4_RS_RECALL_HELD1;
2446                 }
2447 
2448                 return (error);
2449         }
2450 
2451         return (0);
2452 }
2453 
2454 /*
2455  * nfs4_dlistadd - Add this rnode to a list of rnodes to be
2456  * DELEGRETURN'd at the end of recovery.
2457  */
2458 
2459 static void
2460 nfs4_dlistadd(rnode4_t *rp, struct nfs4_callback_globals *ncg, int flags)
2461 {
2462         struct nfs4_dnode *dp;
2463 
2464         ASSERT(mutex_owned(&rp->r_statev4_lock));
2465         /*
2466          * Mark the delegation as having a return pending.
2467          * This will prevent the use of the delegation stateID
2468          * by read, write, setattr and open.
2469          */
2470         rp->r_deleg_return_pending = TRUE;
2471         dp = kmem_alloc(sizeof (*dp), KM_SLEEP);
2472         VN_HOLD(RTOV4(rp));
2473         dp->rnodep = rp;
2474         dp->flags = flags;
2475         mutex_enter(&ncg->nfs4_dlist_lock);
2476         list_insert_head(&ncg->nfs4_dlist, dp);
2477 #ifdef  DEBUG
2478         ncg->nfs4_dlistadd_c++;
2479 #endif
2480         mutex_exit(&ncg->nfs4_dlist_lock);
2481 }
2482 
2483 /*
2484  * nfs4_dlistclean_impl - Do DELEGRETURN for each rnode on the list.
2485  * of files awaiting cleaning.  If the override_flags are non-zero
2486  * then use them rather than the flags that were set when the rnode
2487  * was added to the dlist.
2488  */
2489 static void
2490 nfs4_dlistclean_impl(struct nfs4_callback_globals *ncg, int override_flags)
2491 {
2492         rnode4_t *rp;
2493         struct nfs4_dnode *dp;
2494         int flags;
2495 
2496         ASSERT(override_flags == 0 || override_flags == NFS4_DR_DISCARD);
2497 
2498         mutex_enter(&ncg->nfs4_dlist_lock);
2499         while ((dp = list_head(&ncg->nfs4_dlist)) != NULL) {
2500 #ifdef  DEBUG
2501                 ncg->nfs4_dlistclean_c++;
2502 #endif
2503                 list_remove(&ncg->nfs4_dlist, dp);
2504                 mutex_exit(&ncg->nfs4_dlist_lock);
2505                 rp = dp->rnodep;
2506                 flags = (override_flags != 0) ? override_flags : dp->flags;
2507                 kmem_free(dp, sizeof (*dp));
2508                 (void) nfs4delegreturn_impl(rp, flags, ncg);
2509                 VN_RELE(RTOV4(rp));
2510                 mutex_enter(&ncg->nfs4_dlist_lock);
2511         }
2512         mutex_exit(&ncg->nfs4_dlist_lock);
2513 }
2514 
2515 void
2516 nfs4_dlistclean(void)
2517 {
2518         struct nfs4_callback_globals *ncg;
2519 
2520         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2521         ASSERT(ncg != NULL);
2522 
2523         nfs4_dlistclean_impl(ncg, 0);
2524 }