1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27 /* All Rights Reserved */
  28 
  29 #include <sys/param.h>
  30 #include <sys/types.h>
  31 #include <sys/systm.h>
  32 #include <sys/cred.h>
  33 #include <sys/vfs.h>
  34 #include <sys/vnode.h>
  35 #include <sys/pathname.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/kmem.h>
  38 #include <sys/kstat.h>
  39 #include <sys/mkdev.h>
  40 #include <sys/mount.h>
  41 #include <sys/statvfs.h>
  42 #include <sys/errno.h>
  43 #include <sys/debug.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/utsname.h>
  46 #include <sys/bootconf.h>
  47 #include <sys/modctl.h>
  48 #include <sys/acl.h>
  49 #include <sys/flock.h>
  50 #include <sys/kstr.h>
  51 #include <sys/stropts.h>
  52 #include <sys/strsubr.h>
  53 #include <sys/atomic.h>
  54 #include <sys/disp.h>
  55 #include <sys/policy.h>
  56 #include <sys/list.h>
  57 #include <sys/zone.h>
  58 
  59 #include <rpc/types.h>
  60 #include <rpc/auth.h>
  61 #include <rpc/rpcsec_gss.h>
  62 #include <rpc/clnt.h>
  63 #include <rpc/xdr.h>
  64 
  65 #include <nfs/nfs.h>
  66 #include <nfs/nfs_clnt.h>
  67 #include <nfs/mount.h>
  68 #include <nfs/nfs_acl.h>
  69 
  70 #include <fs/fs_subr.h>
  71 
  72 #include <nfs/nfs4.h>
  73 #include <nfs/rnode4.h>
  74 #include <nfs/nfs4_clnt.h>
  75 #include <nfs/nfssys.h>
  76 
  77 #ifdef  DEBUG
  78 /*
  79  * These are "special" state IDs and file handles that
  80  * match any delegation state ID or file handled.  This
  81  * is for testing purposes only.
  82  */
  83 
  84 stateid4 nfs4_deleg_any = { 0x7FFFFFF0 };
  85 char nfs4_deleg_fh[] = "\0377\0376\0375\0374";
  86 nfs_fh4 nfs4_deleg_anyfh = { sizeof (nfs4_deleg_fh)-1, nfs4_deleg_fh };
  87 nfsstat4 cb4_getattr_fail = NFS4_OK;
  88 nfsstat4 cb4_recall_fail = NFS4_OK;
  89 
  90 int nfs4_callback_debug;
  91 int nfs4_recall_debug;
  92 int nfs4_drat_debug;
  93 
  94 #endif
  95 
  96 #define CB_NOTE(x)      NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, x))
  97 #define CB_WARN(x)      NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x))
  98 #define CB_WARN1(x, y)  NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x, y))
  99 
 100 enum nfs4_delegreturn_policy nfs4_delegreturn_policy = INACTIVE;
 101 
 102 static zone_key_t nfs4_callback_zone_key;
 103 
 104 /*
 105  * NFS4_MAPSIZE is the number of bytes we are willing to consume
 106  * for the block allocation map when the server grants a NFS_LIMIT_BLOCK
 107  * style delegation.
 108  */
 109 
 110 #define NFS4_MAPSIZE    8192
 111 #define NFS4_MAPWORDS   NFS4_MAPSIZE/sizeof (uint_t)
 112 #define NbPW            (NBBY*sizeof (uint_t))
 113 
 114 static int nfs4_num_prognums = 1024;
 115 static SVC_CALLOUT_TABLE nfs4_cb_sct;
 116 
 117 struct nfs4_dnode {
 118         list_node_t     linkage;
 119         rnode4_t        *rnodep;
 120         int             flags;          /* Flags for nfs4delegreturn_impl() */
 121 };
 122 
 123 static const struct nfs4_callback_stats nfs4_callback_stats_tmpl = {
 124         { "delegations",        KSTAT_DATA_UINT64 },
 125         { "cb_getattr",         KSTAT_DATA_UINT64 },
 126         { "cb_recall",          KSTAT_DATA_UINT64 },
 127         { "cb_null",            KSTAT_DATA_UINT64 },
 128         { "cb_dispatch",        KSTAT_DATA_UINT64 },
 129         { "delegaccept_r",      KSTAT_DATA_UINT64 },
 130         { "delegaccept_rw",     KSTAT_DATA_UINT64 },
 131         { "delegreturn",        KSTAT_DATA_UINT64 },
 132         { "callbacks",          KSTAT_DATA_UINT64 },
 133         { "claim_cur",          KSTAT_DATA_UINT64 },
 134         { "claim_cur_ok",       KSTAT_DATA_UINT64 },
 135         { "recall_trunc",       KSTAT_DATA_UINT64 },
 136         { "recall_failed",      KSTAT_DATA_UINT64 },
 137         { "return_limit_write", KSTAT_DATA_UINT64 },
 138         { "return_limit_addmap", KSTAT_DATA_UINT64 },
 139         { "deleg_recover",      KSTAT_DATA_UINT64 },
 140         { "cb_illegal",         KSTAT_DATA_UINT64 }
 141 };
 142 
 143 struct nfs4_cb_port {
 144         list_node_t             linkage; /* linkage into per-zone port list */
 145         char                    netid[KNC_STRSIZE];
 146         char                    uaddr[KNC_STRSIZE];
 147         char                    protofmly[KNC_STRSIZE];
 148         char                    proto[KNC_STRSIZE];
 149 };
 150 
 151 static int cb_getattr_bytes;
 152 
 153 struct cb_recall_pass {
 154         rnode4_t        *rp;
 155         int             flags;          /* Flags for nfs4delegreturn_impl() */
 156         bool_t          truncate;
 157 };
 158 
 159 static nfs4_open_stream_t *get_next_deleg_stream(rnode4_t *, int);
 160 static void nfs4delegreturn_thread(struct cb_recall_pass *);
 161 static int deleg_reopen(vnode_t *, bool_t *, struct nfs4_callback_globals *,
 162     int);
 163 static void nfs4_dlistadd(rnode4_t *, struct nfs4_callback_globals *, int);
 164 static void nfs4_dlistclean_impl(struct nfs4_callback_globals *, int);
 165 static int nfs4delegreturn_impl(rnode4_t *, int,
 166     struct nfs4_callback_globals *);
 167 static void nfs4delegreturn_cleanup_impl(rnode4_t *, nfs4_server_t *,
 168     struct nfs4_callback_globals *);
 169 
 170 static void
 171 cb_getattr(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 172         struct compound_state *cs, struct nfs4_callback_globals *ncg)
 173 {
 174         CB_GETATTR4args *args = &argop->nfs_cb_argop4_u.opcbgetattr;
 175         CB_GETATTR4res *resp = &resop->nfs_cb_resop4_u.opcbgetattr;
 176         rnode4_t *rp;
 177         vnode_t *vp;
 178         bool_t found = FALSE;
 179         struct nfs4_server *sp;
 180         struct fattr4 *fap;
 181         rpc_inline_t *fdata;
 182         long mapcnt;
 183         fattr4_change change;
 184         fattr4_size size;
 185         uint_t rflag;
 186 
 187         ncg->nfs4_callback_stats.cb_getattr.value.ui64++;
 188 
 189 #ifdef DEBUG
 190         /*
 191          * error injection hook: set cb_getattr_fail global to
 192          * NFS4 pcol error to be returned
 193          */
 194         if (cb4_getattr_fail != NFS4_OK) {
 195                 *cs->statusp = resp->status = cb4_getattr_fail;
 196                 return;
 197         }
 198 #endif
 199 
 200         resp->obj_attributes.attrmask = 0;
 201 
 202         mutex_enter(&ncg->nfs4_cb_lock);
 203         sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 204         mutex_exit(&ncg->nfs4_cb_lock);
 205 
 206         if (nfs4_server_vlock(sp, 0) == FALSE) {
 207 
 208                 CB_WARN("cb_getattr: cannot find server\n");
 209 
 210                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 211                 return;
 212         }
 213 
 214         /*
 215          * In cb_compound, callback_ident was validated against rq_prog,
 216          * but we couldn't verify that it was set to the value we provided
 217          * at setclientid time (because we didn't have server struct yet).
 218          * Now we have the server struct, but don't have callback_ident
 219          * handy.  So, validate server struct program number against req
 220          * RPC's prog number.  At this point, we know the RPC prog num
 221          * is valid (else we wouldn't be here); however, we don't know
 222          * that it was the prog number we supplied to this server at
 223          * setclientid time.  If the prog numbers aren't equivalent, then
 224          * log the problem and fail the request because either cbserv
 225          * and/or cbclient are confused.  This will probably never happen.
 226          */
 227         if (sp->s_program != req->rq_prog) {
 228 #ifdef DEBUG
 229                 zcmn_err(getzoneid(), CE_WARN,
 230                     "cb_getattr: wrong server program number srv=%d req=%d\n",
 231                     sp->s_program, req->rq_prog);
 232 #else
 233                 zcmn_err(getzoneid(), CE_WARN,
 234                     "cb_getattr: wrong server program number\n");
 235 #endif
 236                 mutex_exit(&sp->s_lock);
 237                 nfs4_server_rele(sp);
 238                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 239                 return;
 240         }
 241 
 242         /*
 243          * Search the delegation list for a matching file handle;
 244          * mutex on sp prevents the list from changing.
 245          */
 246 
 247         rp = list_head(&sp->s_deleg_list);
 248         for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
 249                 nfs4_fhandle_t fhandle;
 250 
 251                 sfh4_copyval(rp->r_fh, &fhandle);
 252 
 253                 if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
 254                     bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
 255                     fhandle.fh_len) == 0)) {
 256 
 257                         found = TRUE;
 258                         break;
 259                 }
 260 #ifdef  DEBUG
 261                 if (nfs4_deleg_anyfh.nfs_fh4_len == args->fh.nfs_fh4_len &&
 262                     bcmp(nfs4_deleg_anyfh.nfs_fh4_val, args->fh.nfs_fh4_val,
 263                     args->fh.nfs_fh4_len) == 0) {
 264 
 265                         found = TRUE;
 266                         break;
 267                 }
 268 #endif
 269         }
 270 
 271         /*
 272          * VN_HOLD the vnode before releasing s_lock to guarantee
 273          * we have a valid vnode reference.
 274          */
 275         if (found == TRUE) {
 276                 vp = RTOV4(rp);
 277                 VN_HOLD(vp);
 278         }
 279 
 280         mutex_exit(&sp->s_lock);
 281         nfs4_server_rele(sp);
 282 
 283         if (found == FALSE) {
 284 
 285                 CB_WARN("cb_getattr: bad fhandle\n");
 286 
 287                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 288                 return;
 289         }
 290 
 291         /*
 292          * Figure out which attributes the server wants.  We only
 293          * offer FATTR4_CHANGE & FATTR4_SIZE; ignore the rest.
 294          */
 295         fdata = kmem_alloc(cb_getattr_bytes, KM_SLEEP);
 296 
 297         /*
 298          * Don't actually need to create XDR to encode these
 299          * simple data structures.
 300          * xdrmem_create(&xdr, fdata, cb_getattr_bytes, XDR_ENCODE);
 301          */
 302         fap = &resp->obj_attributes;
 303 
 304         fap->attrmask = 0;
 305         /* attrlist4_len starts at 0 and increases as attrs are processed */
 306         fap->attrlist4 = (char *)fdata;
 307         fap->attrlist4_len = 0;
 308 
 309         /* don't supply attrs if request was zero */
 310         if (args->attr_request != 0) {
 311                 if (args->attr_request & FATTR4_CHANGE_MASK) {
 312                         /*
 313                          * If the file is mmapped, then increment the change
 314                          * attribute and return it.  This will guarantee that
 315                          * the server will perceive that the file has changed
 316                          * if there is any chance that the client application
 317                          * has changed it.  Otherwise, just return the change
 318                          * attribute as it has been updated by nfs4write_deleg.
 319                          */
 320 
 321                         mutex_enter(&rp->r_statelock);
 322                         mapcnt = rp->r_mapcnt;
 323                         rflag = rp->r_flags;
 324                         mutex_exit(&rp->r_statelock);
 325 
 326                         mutex_enter(&rp->r_statev4_lock);
 327                         /*
 328                          * If object mapped, then always return new change.
 329                          * Otherwise, return change if object has dirty
 330                          * pages.  If object doesn't have any dirty pages,
 331                          * then all changes have been pushed to server, so
 332                          * reset change to grant change.
 333                          */
 334                         if (mapcnt)
 335                                 rp->r_deleg_change++;
 336                         else if (! (rflag & R4DIRTY))
 337                                 rp->r_deleg_change = rp->r_deleg_change_grant;
 338                         change = rp->r_deleg_change;
 339                         mutex_exit(&rp->r_statev4_lock);
 340 
 341                         /*
 342                          * Use inline XDR code directly, we know that we
 343                          * going to a memory buffer and it has enough
 344                          * space so it cannot fail.
 345                          */
 346                         IXDR_PUT_U_HYPER(fdata, change);
 347                         fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
 348                         fap->attrmask |= FATTR4_CHANGE_MASK;
 349                 }
 350 
 351                 if (args->attr_request & FATTR4_SIZE_MASK) {
 352                         /*
 353                          * Use an atomic add of 0 to fetch a consistent view
 354                          * of r_size; this avoids having to take rw_lock
 355                          * which could cause a deadlock.
 356                          */
 357                         size = atomic_add_64_nv((uint64_t *)&rp->r_size, 0);
 358 
 359                         /*
 360                          * Use inline XDR code directly, we know that we
 361                          * going to a memory buffer and it has enough
 362                          * space so it cannot fail.
 363                          */
 364                         IXDR_PUT_U_HYPER(fdata, size);
 365                         fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
 366                         fap->attrmask |= FATTR4_SIZE_MASK;
 367                 }
 368         }
 369 
 370         VN_RELE(vp);
 371 
 372         *cs->statusp = resp->status = NFS4_OK;
 373 }
 374 
 375 static void
 376 cb_getattr_free(nfs_cb_resop4 *resop)
 377 {
 378         if (resop->nfs_cb_resop4_u.opcbgetattr.obj_attributes.attrlist4)
 379                 kmem_free(resop->nfs_cb_resop4_u.opcbgetattr.
 380                     obj_attributes.attrlist4, cb_getattr_bytes);
 381 }
 382 
 383 static void
 384 cb_recall(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 385         struct compound_state *cs, struct nfs4_callback_globals *ncg)
 386 {
 387         CB_RECALL4args * args = &argop->nfs_cb_argop4_u.opcbrecall;
 388         CB_RECALL4res *resp = &resop->nfs_cb_resop4_u.opcbrecall;
 389         rnode4_t *rp;
 390         vnode_t *vp;
 391         struct nfs4_server *sp;
 392         bool_t found = FALSE;
 393 
 394         ncg->nfs4_callback_stats.cb_recall.value.ui64++;
 395 
 396         ASSERT(req->rq_prog >= NFS4_CALLBACK);
 397         ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
 398 
 399 #ifdef DEBUG
 400         /*
 401          * error injection hook: set cb_recall_fail global to
 402          * NFS4 pcol error to be returned
 403          */
 404         if (cb4_recall_fail != NFS4_OK) {
 405                 *cs->statusp = resp->status = cb4_recall_fail;
 406                 return;
 407         }
 408 #endif
 409 
 410         mutex_enter(&ncg->nfs4_cb_lock);
 411         sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 412         mutex_exit(&ncg->nfs4_cb_lock);
 413 
 414         if (nfs4_server_vlock(sp, 0) == FALSE) {
 415 
 416                 CB_WARN("cb_recall: cannot find server\n");
 417 
 418                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 419                 return;
 420         }
 421 
 422         /*
 423          * Search the delegation list for a matching file handle
 424          * AND stateid; mutex on sp prevents the list from changing.
 425          */
 426 
 427         rp = list_head(&sp->s_deleg_list);
 428         for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
 429                 mutex_enter(&rp->r_statev4_lock);
 430 
 431                 /* check both state id and file handle! */
 432 
 433                 if ((bcmp(&rp->r_deleg_stateid, &args->stateid,
 434                     sizeof (stateid4)) == 0)) {
 435                         nfs4_fhandle_t fhandle;
 436 
 437                         sfh4_copyval(rp->r_fh, &fhandle);
 438                         if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
 439                             bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
 440                             fhandle.fh_len) == 0)) {
 441 
 442                                 found = TRUE;
 443                                 break;
 444                         } else {
 445 #ifdef  DEBUG
 446                                 CB_WARN("cb_recall: stateid OK, bad fh");
 447 #endif
 448                         }
 449                 }
 450 #ifdef  DEBUG
 451                 if (bcmp(&args->stateid, &nfs4_deleg_any,
 452                     sizeof (stateid4)) == 0) {
 453 
 454                         found = TRUE;
 455                         break;
 456                 }
 457 #endif
 458                 mutex_exit(&rp->r_statev4_lock);
 459         }
 460 
 461         /*
 462          * VN_HOLD the vnode before releasing s_lock to guarantee
 463          * we have a valid vnode reference.  The async thread will
 464          * release the hold when it's done.
 465          */
 466         if (found == TRUE) {
 467                 mutex_exit(&rp->r_statev4_lock);
 468                 vp = RTOV4(rp);
 469                 VN_HOLD(vp);
 470         }
 471         mutex_exit(&sp->s_lock);
 472         nfs4_server_rele(sp);
 473 
 474         if (found == FALSE) {
 475 
 476                 CB_WARN("cb_recall: bad stateid\n");
 477 
 478                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
 479                 return;
 480         }
 481 
 482         /* Fire up a thread to do the delegreturn */
 483         nfs4delegreturn_async(rp, NFS4_DR_RECALL|NFS4_DR_REOPEN,
 484             args->truncate);
 485 
 486         *cs->statusp = resp->status = 0;
 487 }
 488 
 489 /* ARGSUSED */
 490 static void
 491 cb_recall_free(nfs_cb_resop4 *resop)
 492 {
 493         /* nothing to do here, cb_recall doesn't kmem_alloc */
 494 }
 495 
 496 /*
 497  * This function handles the CB_NULL proc call from an NFSv4 Server.
 498  *
 499  * We take note that the server has sent a CB_NULL for later processing
 500  * in the recovery logic. It is noted so we may pause slightly after the
 501  * setclientid and before reopening files. The pause is to allow the
 502  * NFSv4 Server time to receive the CB_NULL reply and adjust any of
 503  * its internal structures such that it has the opportunity to grant
 504  * delegations to reopened files.
 505  *
 506  */
 507 
 508 /* ARGSUSED */
 509 static void
 510 cb_null(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
 511     struct nfs4_callback_globals *ncg)
 512 {
 513         struct nfs4_server *sp;
 514 
 515         ncg->nfs4_callback_stats.cb_null.value.ui64++;
 516 
 517         ASSERT(req->rq_prog >= NFS4_CALLBACK);
 518         ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
 519 
 520         mutex_enter(&ncg->nfs4_cb_lock);
 521         sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 522         mutex_exit(&ncg->nfs4_cb_lock);
 523 
 524         if (nfs4_server_vlock(sp, 0) != FALSE) {
 525                 sp->s_flags |= N4S_CB_PINGED;
 526                 cv_broadcast(&sp->wait_cb_null);
 527                 mutex_exit(&sp->s_lock);
 528                 nfs4_server_rele(sp);
 529         }
 530 }
 531 
 532 /*
 533  * cb_illegal   args: void
 534  *              res : status (NFS4ERR_OP_CB_ILLEGAL)
 535  */
 536 /* ARGSUSED */
 537 static void
 538 cb_illegal(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 539         struct compound_state *cs, struct nfs4_callback_globals *ncg)
 540 {
 541         CB_ILLEGAL4res *resp = &resop->nfs_cb_resop4_u.opcbillegal;
 542 
 543         ncg->nfs4_callback_stats.cb_illegal.value.ui64++;
 544         resop->resop = OP_CB_ILLEGAL;
 545         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
 546 }
 547 
 548 static void
 549 cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
 550         struct nfs4_callback_globals *ncg)
 551 {
 552         uint_t i;
 553         struct compound_state cs;
 554         nfs_cb_argop4 *argop;
 555         nfs_cb_resop4 *resop, *new_res;
 556         uint_t op;
 557 
 558         bzero(&cs, sizeof (cs));
 559         cs.statusp = &resp->status;
 560         cs.cont = TRUE;
 561 
 562         /*
 563          * Form a reply tag by copying over the reqeuest tag.
 564          */
 565         resp->tag.utf8string_len = args->tag.utf8string_len;
 566         resp->tag.utf8string_val = kmem_alloc(resp->tag.utf8string_len,
 567             KM_SLEEP);
 568         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
 569             args->tag.utf8string_len);
 570 
 571         /*
 572          * XXX for now, minorversion should be zero
 573          */
 574         if (args->minorversion != CB4_MINORVERSION) {
 575                 resp->array_len = 0;
 576                 resp->array = NULL;
 577                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
 578                 return;
 579         }
 580 
 581 #ifdef DEBUG
 582         /*
 583          * Verify callback_ident.  It doesn't really matter if it's wrong
 584          * because we don't really use callback_ident -- we use prog number
 585          * of the RPC request instead.  In this case, just print a DEBUG
 586          * console message to reveal brokenness of cbclient (at bkoff/cthon).
 587          */
 588         if (args->callback_ident != req->rq_prog)
 589                 zcmn_err(getzoneid(), CE_WARN,
 590                     "cb_compound: cb_client using wrong "
 591                     "callback_ident(%d), should be %d",
 592                     args->callback_ident, req->rq_prog);
 593 #endif
 594 
 595         resp->array_len = args->array_len;
 596         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_cb_resop4),
 597             KM_SLEEP);
 598 
 599         for (i = 0; i < args->array_len && cs.cont; i++) {
 600 
 601                 argop = &args->array[i];
 602                 resop = &resp->array[i];
 603                 resop->resop = argop->argop;
 604                 op = (uint_t)resop->resop;
 605 
 606                 switch (op) {
 607 
 608                 case OP_CB_GETATTR:
 609 
 610                         cb_getattr(argop, resop, req, &cs, ncg);
 611                         break;
 612 
 613                 case OP_CB_RECALL:
 614 
 615                         cb_recall(argop, resop, req, &cs, ncg);
 616                         break;
 617 
 618                 case OP_CB_ILLEGAL:
 619 
 620                         /* fall through */
 621 
 622                 default:
 623                         /*
 624                          * Handle OP_CB_ILLEGAL and any undefined opcode.
 625                          * Currently, the XDR code will return BADXDR
 626                          * if cb op doesn't decode to legal value, so
 627                          * it really only handles OP_CB_ILLEGAL.
 628                          */
 629                         op = OP_CB_ILLEGAL;
 630                         cb_illegal(argop, resop, req, &cs, ncg);
 631                 }
 632 
 633                 if (*cs.statusp != NFS4_OK)
 634                         cs.cont = FALSE;
 635 
 636                 /*
 637                  * If not at last op, and if we are to stop, then
 638                  * compact the results array.
 639                  */
 640                 if ((i + 1) < args->array_len && !cs.cont) {
 641 
 642                         new_res = kmem_alloc(
 643                             (i+1) * sizeof (nfs_cb_resop4), KM_SLEEP);
 644                         bcopy(resp->array,
 645                             new_res, (i+1) * sizeof (nfs_cb_resop4));
 646                         kmem_free(resp->array,
 647                             args->array_len * sizeof (nfs_cb_resop4));
 648 
 649                         resp->array_len =  i + 1;
 650                         resp->array = new_res;
 651                 }
 652         }
 653 
 654 }
 655 
 656 static void
 657 cb_compound_free(CB_COMPOUND4res *resp)
 658 {
 659         uint_t i, op;
 660         nfs_cb_resop4 *resop;
 661 
 662         if (resp->tag.utf8string_val) {
 663                 UTF8STRING_FREE(resp->tag)
 664         }
 665 
 666         for (i = 0; i < resp->array_len; i++) {
 667 
 668                 resop = &resp->array[i];
 669                 op = (uint_t)resop->resop;
 670 
 671                 switch (op) {
 672 
 673                 case OP_CB_GETATTR:
 674 
 675                         cb_getattr_free(resop);
 676                         break;
 677 
 678                 case OP_CB_RECALL:
 679 
 680                         cb_recall_free(resop);
 681                         break;
 682 
 683                 default:
 684                         break;
 685                 }
 686         }
 687 
 688         if (resp->array != NULL) {
 689                 kmem_free(resp->array,
 690                     resp->array_len * sizeof (nfs_cb_resop4));
 691         }
 692 }
 693 
 694 static void
 695 cb_dispatch(struct svc_req *req, SVCXPRT *xprt)
 696 {
 697         CB_COMPOUND4args args;
 698         CB_COMPOUND4res res;
 699         struct nfs4_callback_globals *ncg;
 700 
 701         bool_t (*xdr_args)(), (*xdr_res)();
 702         void (*proc)(CB_COMPOUND4args *, CB_COMPOUND4res *, struct svc_req *,
 703             struct nfs4_callback_globals *);
 704         void (*freeproc)(CB_COMPOUND4res *);
 705 
 706         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
 707         ASSERT(ncg != NULL);
 708 
 709         ncg->nfs4_callback_stats.cb_dispatch.value.ui64++;
 710 
 711         switch (req->rq_proc) {
 712         case CB_NULL:
 713                 xdr_args = xdr_void;
 714                 xdr_res = xdr_void;
 715                 proc = cb_null;
 716                 freeproc = NULL;
 717                 break;
 718 
 719         case CB_COMPOUND:
 720                 xdr_args = xdr_CB_COMPOUND4args_clnt;
 721                 xdr_res = xdr_CB_COMPOUND4res;
 722                 proc = cb_compound;
 723                 freeproc = cb_compound_free;
 724                 break;
 725 
 726         default:
 727                 CB_WARN("cb_dispatch: no proc\n");
 728                 svcerr_noproc(xprt);
 729                 return;
 730         }
 731 
 732         args.tag.utf8string_val = NULL;
 733         args.array = NULL;
 734 
 735         if (!SVC_GETARGS(xprt, xdr_args, (caddr_t)&args)) {
 736 
 737                 CB_WARN("cb_dispatch: cannot getargs\n");
 738                 svcerr_decode(xprt);
 739                 return;
 740         }
 741 
 742         (*proc)(&args, &res, req, ncg);
 743 
 744         if (svc_sendreply(xprt, xdr_res, (caddr_t)&res) == FALSE) {
 745 
 746                 CB_WARN("cb_dispatch: bad sendreply\n");
 747                 svcerr_systemerr(xprt);
 748         }
 749 
 750         if (freeproc)
 751                 (*freeproc)(&res);
 752 
 753         if (!SVC_FREEARGS(xprt, xdr_args, (caddr_t)&args)) {
 754 
 755                 CB_WARN("cb_dispatch: bad freeargs\n");
 756         }
 757 }
 758 
 759 static rpcprog_t
 760 nfs4_getnextprogram(struct nfs4_callback_globals *ncg)
 761 {
 762         int i, j;
 763 
 764         j = ncg->nfs4_program_hint;
 765         for (i = 0; i < nfs4_num_prognums; i++, j++) {
 766 
 767                 if (j >= nfs4_num_prognums)
 768                         j = 0;
 769 
 770                 if (ncg->nfs4prog2server[j] == NULL) {
 771                         ncg->nfs4_program_hint = j+1;
 772                         return (j+NFS4_CALLBACK);
 773                 }
 774         }
 775 
 776         return (0);
 777 }
 778 
 779 void
 780 nfs4callback_destroy(nfs4_server_t *np)
 781 {
 782         struct nfs4_callback_globals *ncg;
 783         int i;
 784 
 785         if (np->s_program == 0)
 786                 return;
 787 
 788         ncg = np->zone_globals;
 789         i = np->s_program - NFS4_CALLBACK;
 790 
 791         mutex_enter(&ncg->nfs4_cb_lock);
 792 
 793         ASSERT(ncg->nfs4prog2server[i] == np);
 794 
 795         ncg->nfs4prog2server[i] = NULL;
 796 
 797         if (i < ncg->nfs4_program_hint)
 798                 ncg->nfs4_program_hint = i;
 799 
 800         mutex_exit(&ncg->nfs4_cb_lock);
 801 }
 802 
 803 /*
 804  * nfs4_setport - This function saves a netid and univeral address for
 805  * the callback program.  These values will be used during setclientid.
 806  */
 807 static void
 808 nfs4_setport(char *netid, char *uaddr, char *protofmly, char *proto,
 809         struct nfs4_callback_globals *ncg)
 810 {
 811         struct nfs4_cb_port *p;
 812         bool_t found = FALSE;
 813 
 814         ASSERT(MUTEX_HELD(&ncg->nfs4_cb_lock));
 815 
 816         p = list_head(&ncg->nfs4_cb_ports);
 817         for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
 818                 if (strcmp(p->netid, netid) == 0) {
 819                         found = TRUE;
 820                         break;
 821                 }
 822         }
 823         if (found == TRUE)
 824                 (void) strcpy(p->uaddr, uaddr);
 825         else {
 826                 p = kmem_alloc(sizeof (*p), KM_SLEEP);
 827 
 828                 (void) strcpy(p->uaddr, uaddr);
 829                 (void) strcpy(p->netid, netid);
 830                 (void) strcpy(p->protofmly, protofmly);
 831                 (void) strcpy(p->proto, proto);
 832                 list_insert_head(&ncg->nfs4_cb_ports, p);
 833         }
 834 }
 835 
 836 /*
 837  * nfs4_cb_args - This function is used to construct the callback
 838  * portion of the arguments needed for setclientid.
 839  */
 840 
 841 void
 842 nfs4_cb_args(nfs4_server_t *np, struct knetconfig *knc, SETCLIENTID4args *args)
 843 {
 844         struct nfs4_cb_port *p;
 845         bool_t found = FALSE;
 846         rpcprog_t pgm;
 847         struct nfs4_callback_globals *ncg = np->zone_globals;
 848 
 849         /*
 850          * This server structure may already have a program number
 851          * assigned to it.  This happens when the client has to
 852          * re-issue SETCLIENTID.  Just re-use the information.
 853          */
 854         if (np->s_program >= NFS4_CALLBACK &&
 855             np->s_program < NFS4_CALLBACK + nfs4_num_prognums)
 856                 nfs4callback_destroy(np);
 857 
 858         mutex_enter(&ncg->nfs4_cb_lock);
 859 
 860         p = list_head(&ncg->nfs4_cb_ports);
 861         for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
 862                 if (strcmp(p->protofmly, knc->knc_protofmly) == 0 &&
 863                     strcmp(p->proto, knc->knc_proto) == 0) {
 864                         found = TRUE;
 865                         break;
 866                 }
 867         }
 868 
 869         if (found == FALSE) {
 870 
 871                 NFS4_DEBUG(nfs4_callback_debug,
 872                     (CE_WARN, "nfs4_cb_args: could not find netid for %s/%s\n",
 873                     knc->knc_protofmly, knc->knc_proto));
 874 
 875                 args->callback.cb_program = 0;
 876                 args->callback.cb_location.r_netid = NULL;
 877                 args->callback.cb_location.r_addr = NULL;
 878                 args->callback_ident = 0;
 879                 mutex_exit(&ncg->nfs4_cb_lock);
 880                 return;
 881         }
 882 
 883         if ((pgm = nfs4_getnextprogram(ncg)) == 0) {
 884                 CB_WARN("nfs4_cb_args: out of program numbers\n");
 885 
 886                 args->callback.cb_program = 0;
 887                 args->callback.cb_location.r_netid = NULL;
 888                 args->callback.cb_location.r_addr = NULL;
 889                 args->callback_ident = 0;
 890                 mutex_exit(&ncg->nfs4_cb_lock);
 891                 return;
 892         }
 893 
 894         ncg->nfs4prog2server[pgm-NFS4_CALLBACK] = np;
 895         args->callback.cb_program = pgm;
 896         args->callback.cb_location.r_netid = p->netid;
 897         args->callback.cb_location.r_addr = p->uaddr;
 898         args->callback_ident = pgm;
 899 
 900         np->s_program = pgm;
 901 
 902         mutex_exit(&ncg->nfs4_cb_lock);
 903 }
 904 
 905 static int
 906 nfs4_dquery(struct nfs4_svc_args *arg, model_t model)
 907 {
 908         file_t *fp;
 909         vnode_t *vp;
 910         rnode4_t *rp;
 911         int error;
 912         STRUCT_HANDLE(nfs4_svc_args, uap);
 913 
 914         STRUCT_SET_HANDLE(uap, model, arg);
 915 
 916         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 917                 return (EBADF);
 918 
 919         vp = fp->f_vnode;
 920 
 921         if (vp == NULL || vp->v_type != VREG ||
 922             !vn_matchops(vp, nfs4_vnodeops)) {
 923                 releasef(STRUCT_FGET(uap, fd));
 924                 return (EBADF);
 925         }
 926 
 927         rp = VTOR4(vp);
 928 
 929         /*
 930          * I can't convince myself that we need locking here.  The
 931          * rnode cannot disappear and the value returned is instantly
 932          * stale anway, so why bother?
 933          */
 934 
 935         error = suword32(STRUCT_FGETP(uap, netid), rp->r_deleg_type);
 936         releasef(STRUCT_FGET(uap, fd));
 937         return (error);
 938 }
 939 
 940 
 941 /*
 942  * NFS4 client system call.  This service does the
 943  * necessary initialization for the callback program.
 944  * This is fashioned after the server side interaction
 945  * between nfsd and the kernel.  On the client, the
 946  * mount command forks and the child process does the
 947  * necessary interaction with the kernel.
 948  *
 949  * uap->fd is the fd of an open transport provider
 950  */
 951 int
 952 nfs4_svc(struct nfs4_svc_args *arg, model_t model)
 953 {
 954         file_t *fp;
 955         int error;
 956         int readsize;
 957         char buf[KNC_STRSIZE], uaddr[KNC_STRSIZE];
 958         char protofmly[KNC_STRSIZE], proto[KNC_STRSIZE];
 959         size_t len;
 960         STRUCT_HANDLE(nfs4_svc_args, uap);
 961         struct netbuf addrmask;
 962         int cmd;
 963         SVCMASTERXPRT *cb_xprt;
 964         struct nfs4_callback_globals *ncg;
 965 
 966 #ifdef lint
 967         model = model;          /* STRUCT macros don't always refer to it */
 968 #endif
 969 
 970         STRUCT_SET_HANDLE(uap, model, arg);
 971 
 972         if (STRUCT_FGET(uap, cmd) == NFS4_DQUERY)
 973                 return (nfs4_dquery(arg, model));
 974 
 975         if (secpolicy_nfs(CRED()) != 0)
 976                 return (EPERM);
 977 
 978         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 979                 return (EBADF);
 980 
 981         /*
 982          * Set read buffer size to rsize
 983          * and add room for RPC headers.
 984          */
 985         readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
 986         if (readsize < RPC_MAXDATASIZE)
 987                 readsize = RPC_MAXDATASIZE;
 988 
 989         error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
 990             KNC_STRSIZE, &len);
 991         if (error) {
 992                 releasef(STRUCT_FGET(uap, fd));
 993                 return (error);
 994         }
 995 
 996         cmd = STRUCT_FGET(uap, cmd);
 997 
 998         if (cmd & NFS4_KRPC_START) {
 999                 addrmask.len = STRUCT_FGET(uap, addrmask.len);
1000                 addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
1001                 addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
1002                 error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
1003                     addrmask.len);
1004                 if (error) {
1005                         releasef(STRUCT_FGET(uap, fd));
1006                         kmem_free(addrmask.buf, addrmask.maxlen);
1007                         return (error);
1008                 }
1009         }
1010         else
1011                 addrmask.buf = NULL;
1012 
1013         error = copyinstr((const char *)STRUCT_FGETP(uap, addr), uaddr,
1014             sizeof (uaddr), &len);
1015         if (error) {
1016                 releasef(STRUCT_FGET(uap, fd));
1017                 if (addrmask.buf)
1018                         kmem_free(addrmask.buf, addrmask.maxlen);
1019                 return (error);
1020         }
1021 
1022         error = copyinstr((const char *)STRUCT_FGETP(uap, protofmly), protofmly,
1023             sizeof (protofmly), &len);
1024         if (error) {
1025                 releasef(STRUCT_FGET(uap, fd));
1026                 if (addrmask.buf)
1027                         kmem_free(addrmask.buf, addrmask.maxlen);
1028                 return (error);
1029         }
1030 
1031         error = copyinstr((const char *)STRUCT_FGETP(uap, proto), proto,
1032             sizeof (proto), &len);
1033         if (error) {
1034                 releasef(STRUCT_FGET(uap, fd));
1035                 if (addrmask.buf)
1036                         kmem_free(addrmask.buf, addrmask.maxlen);
1037                 return (error);
1038         }
1039 
1040         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1041         ASSERT(ncg != NULL);
1042 
1043         mutex_enter(&ncg->nfs4_cb_lock);
1044         if (cmd & NFS4_SETPORT)
1045                 nfs4_setport(buf, uaddr, protofmly, proto, ncg);
1046 
1047         if (cmd & NFS4_KRPC_START) {
1048                 error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &cb_xprt,
1049                     &nfs4_cb_sct, NULL, NFS_CB_SVCPOOL_ID, FALSE);
1050                 if (error) {
1051                         CB_WARN1("nfs4_svc: svc_tli_kcreate failed %d\n",
1052                             error);
1053                         kmem_free(addrmask.buf, addrmask.maxlen);
1054                 }
1055         }
1056 
1057         mutex_exit(&ncg->nfs4_cb_lock);
1058         releasef(STRUCT_FGET(uap, fd));
1059         return (error);
1060 }
1061 
1062 struct nfs4_callback_globals *
1063 nfs4_get_callback_globals(void)
1064 {
1065         return (zone_getspecific(nfs4_callback_zone_key, nfs_zone()));
1066 }
1067 
1068 static void *
1069 nfs4_callback_init_zone(zoneid_t zoneid)
1070 {
1071         kstat_t *nfs4_callback_kstat;
1072         struct nfs4_callback_globals *ncg;
1073 
1074         ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP);
1075 
1076         ncg->nfs4prog2server = kmem_zalloc(nfs4_num_prognums *
1077             sizeof (struct nfs4_server *), KM_SLEEP);
1078 
1079         /* initialize the dlist */
1080         mutex_init(&ncg->nfs4_dlist_lock, NULL, MUTEX_DEFAULT, NULL);
1081         list_create(&ncg->nfs4_dlist, sizeof (struct nfs4_dnode),
1082             offsetof(struct nfs4_dnode, linkage));
1083 
1084         /* initialize cb_port list */
1085         mutex_init(&ncg->nfs4_cb_lock, NULL, MUTEX_DEFAULT, NULL);
1086         list_create(&ncg->nfs4_cb_ports, sizeof (struct nfs4_cb_port),
1087             offsetof(struct nfs4_cb_port, linkage));
1088 
1089         /* get our own copy of the kstats */
1090         bcopy(&nfs4_callback_stats_tmpl, &ncg->nfs4_callback_stats,
1091             sizeof (nfs4_callback_stats_tmpl));
1092         /* register "nfs:0:nfs4_callback_stats" for this zone */
1093         if ((nfs4_callback_kstat =
1094             kstat_create_zone("nfs", 0, "nfs4_callback_stats", "misc",
1095             KSTAT_TYPE_NAMED,
1096             sizeof (ncg->nfs4_callback_stats) / sizeof (kstat_named_t),
1097             KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE,
1098             zoneid)) != NULL) {
1099                 nfs4_callback_kstat->ks_data = &ncg->nfs4_callback_stats;
1100                 kstat_install(nfs4_callback_kstat);
1101         }
1102         return (ncg);
1103 }
1104 
1105 static void
1106 nfs4_discard_delegations(struct nfs4_callback_globals *ncg)
1107 {
1108         nfs4_server_t *sp;
1109         int i, num_removed;
1110 
1111         /*
1112          * It's OK here to just run through the registered "programs", as
1113          * servers without programs won't have any delegations to handle.
1114          */
1115         for (i = 0; i < nfs4_num_prognums; i++) {
1116                 rnode4_t *rp;
1117 
1118                 mutex_enter(&ncg->nfs4_cb_lock);
1119                 sp = ncg->nfs4prog2server[i];
1120                 mutex_exit(&ncg->nfs4_cb_lock);
1121 
1122                 if (nfs4_server_vlock(sp, 1) == FALSE)
1123                         continue;
1124                 num_removed = 0;
1125                 while ((rp = list_head(&sp->s_deleg_list)) != NULL) {
1126                         mutex_enter(&rp->r_statev4_lock);
1127                         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1128                                 /*
1129                                  * We need to take matters into our own hands,
1130                                  * as nfs4delegreturn_cleanup_impl() won't
1131                                  * remove this from the list.
1132                                  */
1133                                 list_remove(&sp->s_deleg_list, rp);
1134                                 mutex_exit(&rp->r_statev4_lock);
1135                                 nfs4_dec_state_ref_count_nolock(sp,
1136                                     VTOMI4(RTOV4(rp)));
1137                                 num_removed++;
1138                                 continue;
1139                         }
1140                         mutex_exit(&rp->r_statev4_lock);
1141                         VN_HOLD(RTOV4(rp));
1142                         mutex_exit(&sp->s_lock);
1143                         /*
1144                          * The following will remove the node from the list.
1145                          */
1146                         nfs4delegreturn_cleanup_impl(rp, sp, ncg);
1147                         VN_RELE(RTOV4(rp));
1148                         mutex_enter(&sp->s_lock);
1149                 }
1150                 mutex_exit(&sp->s_lock);
1151                 /* each removed list node reles a reference */
1152                 while (num_removed-- > 0)
1153                         nfs4_server_rele(sp);
1154                 /* remove our reference for nfs4_server_vlock */
1155                 nfs4_server_rele(sp);
1156         }
1157 }
1158 
1159 /* ARGSUSED */
1160 static void
1161 nfs4_callback_shutdown_zone(zoneid_t zoneid, void *data)
1162 {
1163         struct nfs4_callback_globals *ncg = data;
1164 
1165         /*
1166          * Clean pending delegation return list.
1167          */
1168         nfs4_dlistclean_impl(ncg, NFS4_DR_DISCARD);
1169 
1170         /*
1171          * Discard all delegations.
1172          */
1173         nfs4_discard_delegations(ncg);
1174 }
1175 
1176 static void
1177 nfs4_callback_fini_zone(zoneid_t zoneid, void *data)
1178 {
1179         struct nfs4_callback_globals *ncg = data;
1180         struct nfs4_cb_port *p;
1181         nfs4_server_t *sp, *next;
1182         nfs4_server_t freelist;
1183         int i;
1184 
1185         kstat_delete_byname_zone("nfs", 0, "nfs4_callback_stats", zoneid);
1186 
1187         /*
1188          * Discard all delegations that may have crept in since we did the
1189          * _shutdown.
1190          */
1191         nfs4_discard_delegations(ncg);
1192         /*
1193          * We're completely done with this zone and all associated
1194          * nfs4_server_t's.  Any remaining nfs4_server_ts should only have one
1195          * more reference outstanding -- the reference we didn't release in
1196          * nfs4_renew_lease_thread().
1197          *
1198          * Here we need to run through the global nfs4_server_lst as we need to
1199          * deal with nfs4_server_ts without programs, as they also have threads
1200          * created for them, and so have outstanding references that we need to
1201          * release.
1202          */
1203         freelist.forw = &freelist;
1204         freelist.back = &freelist;
1205         mutex_enter(&nfs4_server_lst_lock);
1206         sp = nfs4_server_lst.forw;
1207         while (sp != &nfs4_server_lst) {
1208                 next = sp->forw;
1209                 if (sp->zoneid == zoneid) {
1210                         remque(sp);
1211                         insque(sp, &freelist);
1212                 }
1213                 sp = next;
1214         }
1215         mutex_exit(&nfs4_server_lst_lock);
1216 
1217         sp = freelist.forw;
1218         while (sp != &freelist) {
1219                 next = sp->forw;
1220                 nfs4_server_rele(sp);   /* free the list's reference */
1221                 sp = next;
1222         }
1223 
1224 #ifdef DEBUG
1225         for (i = 0; i < nfs4_num_prognums; i++) {
1226                 ASSERT(ncg->nfs4prog2server[i] == NULL);
1227         }
1228 #endif
1229         kmem_free(ncg->nfs4prog2server, nfs4_num_prognums *
1230             sizeof (struct nfs4_server *));
1231 
1232         mutex_enter(&ncg->nfs4_cb_lock);
1233         while ((p = list_head(&ncg->nfs4_cb_ports)) != NULL) {
1234                 list_remove(&ncg->nfs4_cb_ports, p);
1235                 kmem_free(p, sizeof (*p));
1236         }
1237         list_destroy(&ncg->nfs4_cb_ports);
1238         mutex_destroy(&ncg->nfs4_cb_lock);
1239         list_destroy(&ncg->nfs4_dlist);
1240         mutex_destroy(&ncg->nfs4_dlist_lock);
1241         kmem_free(ncg, sizeof (*ncg));
1242 }
1243 
1244 void
1245 nfs4_callback_init(void)
1246 {
1247         int i;
1248         SVC_CALLOUT *nfs4_cb_sc;
1249 
1250         /* initialize the callback table */
1251         nfs4_cb_sc = kmem_alloc(nfs4_num_prognums *
1252             sizeof (SVC_CALLOUT), KM_SLEEP);
1253 
1254         for (i = 0; i < nfs4_num_prognums; i++) {
1255                 nfs4_cb_sc[i].sc_prog = NFS4_CALLBACK+i;
1256                 nfs4_cb_sc[i].sc_versmin = NFS_CB;
1257                 nfs4_cb_sc[i].sc_versmax = NFS_CB;
1258                 nfs4_cb_sc[i].sc_dispatch = cb_dispatch;
1259         }
1260 
1261         nfs4_cb_sct.sct_size = nfs4_num_prognums;
1262         nfs4_cb_sct.sct_free = FALSE;
1263         nfs4_cb_sct.sct_sc = nfs4_cb_sc;
1264 
1265         /*
1266          * Compute max bytes required for dyamically allocated parts
1267          * of cb_getattr reply.  Only size and change are supported now.
1268          * If CB_GETATTR is changed to reply with additional attrs,
1269          * additional sizes must be added below.
1270          *
1271          * fattr4_change + fattr4_size == uint64_t + uint64_t
1272          */
1273         cb_getattr_bytes = 2 * BYTES_PER_XDR_UNIT + 2 * BYTES_PER_XDR_UNIT;
1274 
1275         zone_key_create(&nfs4_callback_zone_key, nfs4_callback_init_zone,
1276             nfs4_callback_shutdown_zone, nfs4_callback_fini_zone);
1277 }
1278 
1279 void
1280 nfs4_callback_fini(void)
1281 {
1282 }
1283 
1284 /*
1285  * NB: This function can be called from the *wrong* zone (ie, the zone that
1286  * 'rp' belongs to and the caller's zone may not be the same).  This can happen
1287  * if the zone is going away and we get called from nfs4_async_inactive().  In
1288  * this case the globals will be NULL and we won't update the counters, which
1289  * doesn't matter as the zone is going away anyhow.
1290  */
1291 static void
1292 nfs4delegreturn_cleanup_impl(rnode4_t *rp, nfs4_server_t *np,
1293         struct nfs4_callback_globals *ncg)
1294 {
1295         mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1296         boolean_t need_rele = B_FALSE;
1297 
1298         /*
1299          * Caller must be holding mi_recovlock in read mode
1300          * to call here.  This is provided by start_op.
1301          * Delegation management requires to grab s_lock
1302          * first and then r_statev4_lock.
1303          */
1304 
1305         if (np == NULL) {
1306                 np = find_nfs4_server_all(mi, 1);
1307                 if (np == NULL)
1308                         return;
1309                 need_rele = B_TRUE;
1310         } else {
1311                 mutex_enter(&np->s_lock);
1312         }
1313 
1314         mutex_enter(&rp->r_statev4_lock);
1315 
1316         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1317                 mutex_exit(&rp->r_statev4_lock);
1318                 mutex_exit(&np->s_lock);
1319                 if (need_rele)
1320                         nfs4_server_rele(np);
1321                 return;
1322         }
1323 
1324         /*
1325          * Free the cred originally held when
1326          * the delegation was granted.  Caller must
1327          * hold this cred if it wants to use it after
1328          * this call.
1329          */
1330         crfree(rp->r_deleg_cred);
1331         rp->r_deleg_cred = NULL;
1332         rp->r_deleg_type = OPEN_DELEGATE_NONE;
1333         rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1334         rp->r_deleg_needs_recall = FALSE;
1335         rp->r_deleg_return_pending = FALSE;
1336 
1337         /*
1338          * Remove the rnode from the server's list and
1339          * update the ref counts.
1340          */
1341         list_remove(&np->s_deleg_list, rp);
1342         mutex_exit(&rp->r_statev4_lock);
1343         nfs4_dec_state_ref_count_nolock(np, mi);
1344         mutex_exit(&np->s_lock);
1345         /* removed list node removes a reference */
1346         nfs4_server_rele(np);
1347         if (need_rele)
1348                 nfs4_server_rele(np);
1349         if (ncg != NULL)
1350                 ncg->nfs4_callback_stats.delegations.value.ui64--;
1351 }
1352 
1353 void
1354 nfs4delegreturn_cleanup(rnode4_t *rp, nfs4_server_t *np)
1355 {
1356         struct nfs4_callback_globals *ncg;
1357 
1358         if (np != NULL) {
1359                 ncg = np->zone_globals;
1360         } else if (nfs_zone() == VTOMI4(RTOV4(rp))->mi_zone) {
1361                 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1362                 ASSERT(ncg != NULL);
1363         } else {
1364                 /*
1365                  * Request coming from the wrong zone.
1366                  */
1367                 ASSERT(getzoneid() == GLOBAL_ZONEID);
1368                 ncg = NULL;
1369         }
1370 
1371         nfs4delegreturn_cleanup_impl(rp, np, ncg);
1372 }
1373 
1374 static void
1375 nfs4delegreturn_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
1376         cred_t *cr, vnode_t *vp)
1377 {
1378         if (error != ETIMEDOUT && error != EINTR &&
1379             !NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
1380                 lost_rqstp->lr_op = 0;
1381                 return;
1382         }
1383 
1384         NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
1385             "nfs4close_save_lost_rqst: error %d", error));
1386 
1387         lost_rqstp->lr_op = OP_DELEGRETURN;
1388         /*
1389          * The vp is held and rele'd via the recovery code.
1390          * See nfs4_save_lost_rqst.
1391          */
1392         lost_rqstp->lr_vp = vp;
1393         lost_rqstp->lr_dvp = NULL;
1394         lost_rqstp->lr_oop = NULL;
1395         lost_rqstp->lr_osp = NULL;
1396         lost_rqstp->lr_lop = NULL;
1397         lost_rqstp->lr_cr = cr;
1398         lost_rqstp->lr_flk = NULL;
1399         lost_rqstp->lr_putfirst = FALSE;
1400 }
1401 
1402 static void
1403 nfs4delegreturn_otw(rnode4_t *rp, cred_t *cr, nfs4_error_t *ep)
1404 {
1405         COMPOUND4args_clnt args;
1406         COMPOUND4res_clnt res;
1407         nfs_argop4 argops[3];
1408         nfs4_ga_res_t *garp = NULL;
1409         hrtime_t t;
1410         int numops;
1411         int doqueue = 1;
1412 
1413         args.ctag = TAG_DELEGRETURN;
1414 
1415         numops = 3;             /* PUTFH, GETATTR, DELEGRETURN */
1416 
1417         args.array = argops;
1418         args.array_len = numops;
1419 
1420         argops[0].argop = OP_CPUTFH;
1421         argops[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
1422 
1423         argops[1].argop = OP_GETATTR;
1424         argops[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
1425         argops[1].nfs_argop4_u.opgetattr.mi = VTOMI4(RTOV4(rp));
1426 
1427         argops[2].argop = OP_DELEGRETURN;
1428         argops[2].nfs_argop4_u.opdelegreturn.deleg_stateid =
1429             rp->r_deleg_stateid;
1430 
1431         t = gethrtime();
1432         rfs4call(VTOMI4(RTOV4(rp)), &args, &res, cr, &doqueue, 0, ep);
1433 
1434         if (ep->error)
1435                 return;
1436 
1437         if (res.status == NFS4_OK) {
1438                 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res;
1439                 nfs4_attr_cache(RTOV4(rp), garp, t, cr, TRUE, NULL);
1440 
1441         }
1442         (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1443 }
1444 
1445 int
1446 nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr,
1447         struct nfs4_callback_globals *ncg)
1448 {
1449         vnode_t *vp = RTOV4(rp);
1450         mntinfo4_t *mi = VTOMI4(vp);
1451         nfs4_lost_rqst_t lost_rqst;
1452         nfs4_recov_state_t recov_state;
1453         bool_t needrecov = FALSE, recovonly, done = FALSE;
1454         nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1455 
1456         ncg->nfs4_callback_stats.delegreturn.value.ui64++;
1457 
1458         while (!done) {
1459                 e.error = nfs4_start_fop(mi, vp, NULL, OH_DELEGRETURN,
1460                     &recov_state, &recovonly);
1461 
1462                 if (e.error) {
1463                         if (flags & NFS4_DR_FORCE) {
1464                                 (void) nfs_rw_enter_sig(&mi->mi_recovlock,
1465                                     RW_READER, 0);
1466                                 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1467                                 nfs_rw_exit(&mi->mi_recovlock);
1468                         }
1469                         break;
1470                 }
1471 
1472                 /*
1473                  * Check to see if the delegation has already been
1474                  * returned by the recovery thread.   The state of
1475                  * the delegation cannot change at this point due
1476                  * to start_fop and the r_deleg_recall_lock.
1477                  */
1478                 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1479                         e.error = 0;
1480                         nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1481                         break;
1482                 }
1483 
1484                 if (recovonly) {
1485                         /*
1486                          * Delegation will be returned via the
1487                          * recovery framework.  Build a lost request
1488                          * structure, start recovery and get out.
1489                          */
1490                         nfs4_error_init(&e, EINTR);
1491                         nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1492                             cr, vp);
1493                         (void) nfs4_start_recovery(&e, mi, vp,
1494                             NULL, &rp->r_deleg_stateid,
1495                             lost_rqst.lr_op == OP_DELEGRETURN ?
1496                             &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1497                             NULL, NULL);
1498                         nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1499                         break;
1500                 }
1501 
1502                 nfs4delegreturn_otw(rp, cr, &e);
1503 
1504                 /*
1505                  * Ignore some errors on delegreturn; no point in marking
1506                  * the file dead on a state destroying operation.
1507                  */
1508                 if (e.error == 0 && (nfs4_recov_marks_dead(e.stat) ||
1509                     e.stat == NFS4ERR_BADHANDLE ||
1510                     e.stat == NFS4ERR_STALE ||
1511                     (e.stat == NFS4ERR_STALE_STATEID &&
1512                      !(rp->r_flags & R4HASHED))))
1513                         needrecov = FALSE;
1514                 else
1515                         needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1516 
1517                 if (needrecov) {
1518                         nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1519                             cr, vp);
1520                         (void) nfs4_start_recovery(&e, mi, vp,
1521                             NULL, &rp->r_deleg_stateid,
1522                             lost_rqst.lr_op == OP_DELEGRETURN ?
1523                             &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1524                             NULL, NULL);
1525                 } else {
1526                         nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1527                         done = TRUE;
1528                 }
1529 
1530                 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1531         }
1532         return (e.error);
1533 }
1534 
1535 /*
1536  * nfs4_resend_delegreturn - used to drive the delegreturn
1537  * operation via the recovery thread.
1538  */
1539 void
1540 nfs4_resend_delegreturn(nfs4_lost_rqst_t *lorp, nfs4_error_t *ep,
1541         nfs4_server_t *np)
1542 {
1543         rnode4_t *rp = VTOR4(lorp->lr_vp);
1544 
1545         /* If the file failed recovery, just quit. */
1546         mutex_enter(&rp->r_statelock);
1547         if (rp->r_flags & R4RECOVERR) {
1548                 ep->error = EIO;
1549         }
1550         mutex_exit(&rp->r_statelock);
1551 
1552         if (!ep->error)
1553                 nfs4delegreturn_otw(rp, lorp->lr_cr, ep);
1554 
1555         /*
1556          * If recovery is now needed, then return the error
1557          * and status and let the recovery thread handle it,
1558          * including re-driving another delegreturn.  Otherwise,
1559          * just give up and clean up the delegation.
1560          */
1561         if (nfs4_needs_recovery(ep, TRUE, lorp->lr_vp->v_vfsp))
1562                 return;
1563 
1564         if (rp->r_deleg_type != OPEN_DELEGATE_NONE)
1565                 nfs4delegreturn_cleanup(rp, np);
1566 
1567         nfs4_error_zinit(ep);
1568 }
1569 
1570 /*
1571  * nfs4delegreturn - general function to return a delegation.
1572  *
1573  * NFS4_DR_FORCE - return the delegation even if start_op fails
1574  * NFS4_DR_PUSH - push modified data back to the server via VOP_PUTPAGE
1575  * NFS4_DR_DISCARD - discard the delegation w/o delegreturn
1576  * NFS4_DR_DID_OP - calling function already did nfs4_start_op
1577  * NFS4_DR_RECALL - delegreturned initiated via CB_RECALL
1578  * NFS4_DR_REOPEN - do file reopens, if applicable
1579  */
1580 static int
1581 nfs4delegreturn_impl(rnode4_t *rp, int flags, struct nfs4_callback_globals *ncg)
1582 {
1583         int error = 0;
1584         cred_t *cr = NULL;
1585         vnode_t *vp;
1586         bool_t needrecov = FALSE;
1587         bool_t rw_entered = FALSE;
1588         bool_t do_reopen;
1589 
1590         vp = RTOV4(rp);
1591 
1592         /*
1593          * If NFS4_DR_DISCARD is set by itself, take a short-cut and
1594          * discard without doing an otw DELEGRETURN.  This may only be used
1595          * by the recovery thread because it bypasses the synchronization
1596          * with r_deleg_recall_lock and mi->mi_recovlock.
1597          */
1598         if (flags == NFS4_DR_DISCARD) {
1599                 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1600                 return (0);
1601         }
1602 
1603         if (flags & NFS4_DR_DID_OP) {
1604                 /*
1605                  * Caller had already done start_op, which means the
1606                  * r_deleg_recall_lock is already held in READ mode
1607                  * so we cannot take it in write mode.  Return the
1608                  * delegation asynchronously.
1609                  *
1610                  * Remove the NFS4_DR_DID_OP flag so we don't
1611                  * get stuck looping through here.
1612                  */
1613                 VN_HOLD(vp);
1614                 nfs4delegreturn_async(rp, (flags & ~NFS4_DR_DID_OP), FALSE);
1615                 return (0);
1616         }
1617 
1618         /*
1619          * Verify we still have a delegation and crhold the credential.
1620          */
1621         mutex_enter(&rp->r_statev4_lock);
1622         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1623                 mutex_exit(&rp->r_statev4_lock);
1624                 goto out;
1625         }
1626         cr = rp->r_deleg_cred;
1627         ASSERT(cr != NULL);
1628         crhold(cr);
1629         mutex_exit(&rp->r_statev4_lock);
1630 
1631         /*
1632          * Push the modified data back to the server synchronously
1633          * before doing DELEGRETURN.
1634          */
1635         if (flags & NFS4_DR_PUSH)
1636                 (void) VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
1637 
1638         /*
1639          * Take r_deleg_recall_lock in WRITE mode, this will prevent
1640          * nfs4_is_otw_open_necessary from trying to use the delegation
1641          * while the DELEGRETURN is in progress.
1642          */
1643         (void) nfs_rw_enter_sig(&rp->r_deleg_recall_lock, RW_WRITER, FALSE);
1644 
1645         rw_entered = TRUE;
1646 
1647         if (rp->r_deleg_type == OPEN_DELEGATE_NONE)
1648                 goto out;
1649 
1650         if (flags & NFS4_DR_REOPEN) {
1651                 /*
1652                  * If R4RECOVERRP is already set, then skip re-opening
1653                  * the delegation open streams and go straight to doing
1654                  * delegreturn.  (XXX if the file has failed recovery, then the
1655                  * delegreturn attempt is likely to be futile.)
1656                  */
1657                 mutex_enter(&rp->r_statelock);
1658                 do_reopen = !(rp->r_flags & R4RECOVERRP);
1659                 mutex_exit(&rp->r_statelock);
1660 
1661                 if (do_reopen) {
1662                         error = deleg_reopen(vp, &needrecov, ncg, flags);
1663                         if (error != 0) {
1664                                 if ((flags & (NFS4_DR_FORCE | NFS4_DR_RECALL))
1665                                     == 0)
1666                                         goto out;
1667                         } else if (needrecov) {
1668                                 if ((flags & NFS4_DR_FORCE) == 0)
1669                                         goto out;
1670                         }
1671                 }
1672         }
1673 
1674         if (flags & NFS4_DR_DISCARD) {
1675                 mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1676 
1677                 mutex_enter(&rp->r_statelock);
1678                 /*
1679                  * deleg_return_pending is cleared inside of delegation_accept
1680                  * when a delegation is accepted.  if this flag has been
1681                  * cleared, then a new delegation has overwritten the one we
1682                  * were about to throw away.
1683                  */
1684                 if (!rp->r_deleg_return_pending) {
1685                         mutex_exit(&rp->r_statelock);
1686                         goto out;
1687                 }
1688                 mutex_exit(&rp->r_statelock);
1689                 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE);
1690                 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1691                 nfs_rw_exit(&mi->mi_recovlock);
1692         } else {
1693                 error = nfs4_do_delegreturn(rp, flags, cr, ncg);
1694         }
1695 
1696 out:
1697         if (cr)
1698                 crfree(cr);
1699         if (rw_entered)
1700                 nfs_rw_exit(&rp->r_deleg_recall_lock);
1701         return (error);
1702 }
1703 
1704 int
1705 nfs4delegreturn(rnode4_t *rp, int flags)
1706 {
1707         struct nfs4_callback_globals *ncg;
1708 
1709         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1710         ASSERT(ncg != NULL);
1711 
1712         return (nfs4delegreturn_impl(rp, flags, ncg));
1713 }
1714 
1715 void
1716 nfs4delegreturn_async(rnode4_t *rp, int flags, bool_t trunc)
1717 {
1718         struct cb_recall_pass *pp;
1719 
1720         pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
1721         pp->rp = rp;
1722         pp->flags = flags;
1723         pp->truncate = trunc;
1724 
1725         /*
1726          * Fire up a thread to do the actual delegreturn
1727          * Caller must guarantee that the rnode doesn't
1728          * vanish (by calling VN_HOLD).
1729          */
1730 
1731         (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
1732             minclsyspri);
1733 }
1734 
1735 static void
1736 delegreturn_all_thread(rpcprog_t *pp)
1737 {
1738         nfs4_server_t *np;
1739         bool_t found = FALSE;
1740         rpcprog_t prog;
1741         rnode4_t *rp;
1742         vnode_t *vp;
1743         zoneid_t zoneid = getzoneid();
1744         struct nfs4_callback_globals *ncg;
1745 
1746         NFS4_DEBUG(nfs4_drat_debug,
1747             (CE_NOTE, "delereturn_all_thread: prog %d\n", *pp));
1748 
1749         prog = *pp;
1750         kmem_free(pp, sizeof (*pp));
1751         pp = NULL;
1752 
1753         mutex_enter(&nfs4_server_lst_lock);
1754         for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
1755                 if (np->zoneid == zoneid && np->s_program == prog) {
1756                         mutex_enter(&np->s_lock);
1757                         found = TRUE;
1758                         break;
1759                 }
1760         }
1761         mutex_exit(&nfs4_server_lst_lock);
1762 
1763         /*
1764          * It's possible that the nfs4_server which was using this
1765          * program number has vanished since this thread is async.
1766          * If so, just return.  Your work here is finished, my friend.
1767          */
1768         if (!found)
1769                 goto out;
1770 
1771         ncg = np->zone_globals;
1772         while ((rp = list_head(&np->s_deleg_list)) != NULL) {
1773                 vp = RTOV4(rp);
1774                 VN_HOLD(vp);
1775                 mutex_exit(&np->s_lock);
1776                 (void) nfs4delegreturn_impl(rp, NFS4_DR_PUSH|NFS4_DR_REOPEN,
1777                     ncg);
1778                 VN_RELE(vp);
1779 
1780                 /* retake the s_lock for next trip through the loop */
1781                 mutex_enter(&np->s_lock);
1782         }
1783         mutex_exit(&np->s_lock);
1784 out:
1785         NFS4_DEBUG(nfs4_drat_debug,
1786             (CE_NOTE, "delereturn_all_thread: complete\n"));
1787         zthread_exit();
1788 }
1789 
1790 void
1791 nfs4_delegreturn_all(nfs4_server_t *sp)
1792 {
1793         rpcprog_t pro, *pp;
1794 
1795         mutex_enter(&sp->s_lock);
1796 
1797         /* Check to see if the delegation list is empty */
1798 
1799         if (list_head(&sp->s_deleg_list) == NULL) {
1800                 mutex_exit(&sp->s_lock);
1801                 return;
1802         }
1803         /*
1804          * Grab the program number; the async thread will use this
1805          * to find the nfs4_server.
1806          */
1807         pro = sp->s_program;
1808         mutex_exit(&sp->s_lock);
1809         pp = kmem_alloc(sizeof (rpcprog_t), KM_SLEEP);
1810         *pp = pro;
1811         (void) zthread_create(NULL, 0, delegreturn_all_thread, pp, 0,
1812             minclsyspri);
1813 }
1814 
1815 
1816 /*
1817  * Discard any delegations
1818  *
1819  * Iterate over the servers s_deleg_list and
1820  * for matching mount-point rnodes discard
1821  * the delegation.
1822  */
1823 void
1824 nfs4_deleg_discard(mntinfo4_t *mi, nfs4_server_t *sp)
1825 {
1826         rnode4_t *rp, *next;
1827         mntinfo4_t *r_mi;
1828         struct nfs4_callback_globals *ncg;
1829 
1830         ASSERT(mutex_owned(&sp->s_lock));
1831         ncg = sp->zone_globals;
1832 
1833         for (rp = list_head(&sp->s_deleg_list); rp != NULL; rp = next) {
1834                 r_mi = VTOMI4(RTOV4(rp));
1835                 next = list_next(&sp->s_deleg_list, rp);
1836 
1837                 if (r_mi != mi) {
1838                         /*
1839                          * Skip if this rnode is in not on the
1840                          * same mount-point
1841                          */
1842                         continue;
1843                 }
1844 
1845                 ASSERT(rp->r_deleg_type == OPEN_DELEGATE_READ);
1846 
1847 #ifdef DEBUG
1848                 if (nfs4_client_recov_debug) {
1849                         zprintf(getzoneid(),
1850                             "nfs4_deleg_discard: matched rnode %p "
1851                         "-- discarding delegation\n", (void *)rp);
1852                 }
1853 #endif
1854                 mutex_enter(&rp->r_statev4_lock);
1855                 /*
1856                  * Free the cred originally held when the delegation
1857                  * was granted. Also need to decrement the refcnt
1858                  * on this server for each delegation we discard
1859                  */
1860                 if (rp->r_deleg_cred)
1861                         crfree(rp->r_deleg_cred);
1862                 rp->r_deleg_cred = NULL;
1863                 rp->r_deleg_type = OPEN_DELEGATE_NONE;
1864                 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1865                 rp->r_deleg_needs_recall = FALSE;
1866                 ASSERT(sp->s_refcnt > 1);
1867                 sp->s_refcnt--;
1868                 list_remove(&sp->s_deleg_list, rp);
1869                 mutex_exit(&rp->r_statev4_lock);
1870                 nfs4_dec_state_ref_count_nolock(sp, mi);
1871                 ncg->nfs4_callback_stats.delegations.value.ui64--;
1872         }
1873 }
1874 
1875 /*
1876  * Reopen any open streams that were covered by the given file's
1877  * delegation.
1878  * Returns zero or an errno value.  If there was no error, *recovp
1879  * indicates whether recovery was initiated.
1880  */
1881 
1882 static int
1883 deleg_reopen(vnode_t *vp, bool_t *recovp, struct nfs4_callback_globals *ncg,
1884         int flags)
1885 {
1886         nfs4_open_stream_t *osp;
1887         nfs4_recov_state_t recov_state;
1888         bool_t needrecov = FALSE;
1889         mntinfo4_t *mi;
1890         rnode4_t *rp;
1891         nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1892         int claimnull;
1893 
1894         mi = VTOMI4(vp);
1895         rp = VTOR4(vp);
1896 
1897         recov_state.rs_flags = 0;
1898         recov_state.rs_num_retry_despite_err = 0;
1899 
1900 retry:
1901         if ((e.error = nfs4_start_op(mi, vp, NULL, &recov_state)) != 0) {
1902                 return (e.error);
1903         }
1904 
1905         /*
1906          * if we mean to discard the delegation, it must be BAD, so don't
1907          * use it when doing the reopen or it will fail too.
1908          */
1909         claimnull = (flags & NFS4_DR_DISCARD);
1910         /*
1911          * Loop through the open streams for this rnode to find
1912          * all of the ones created using the delegation state ID.
1913          * Each of these needs to be re-opened.
1914          */
1915 
1916         while ((osp = get_next_deleg_stream(rp, claimnull)) != NULL) {
1917 
1918                 if (claimnull) {
1919                         nfs4_reopen(vp, osp, &e, CLAIM_NULL, FALSE, FALSE);
1920                 } else {
1921                         ncg->nfs4_callback_stats.claim_cur.value.ui64++;
1922 
1923                         nfs4_reopen(vp, osp, &e, CLAIM_DELEGATE_CUR, FALSE,
1924                             FALSE);
1925                         if (e.error == 0 && e.stat == NFS4_OK)
1926                                 ncg->nfs4_callback_stats.
1927                                     claim_cur_ok.value.ui64++;
1928                 }
1929 
1930                 if (e.error == EAGAIN) {
1931                         nfs4_end_op(mi, vp, NULL, &recov_state, TRUE);
1932                         goto retry;
1933                 }
1934 
1935                 /*
1936                  * if error is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, then
1937                  * recovery has already been started inside of nfs4_reopen.
1938                  */
1939                 if (e.error == EINTR || e.error == ETIMEDOUT ||
1940                     NFS4_FRC_UNMT_ERR(e.error, vp->v_vfsp)) {
1941                         open_stream_rele(osp, rp);
1942                         break;
1943                 }
1944 
1945                 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1946 
1947                 if (e.error != 0 && !needrecov) {
1948                         /*
1949                          * Recovery is not possible, but don't give up yet;
1950                          * we'd still like to do delegreturn after
1951                          * reopening as many streams as possible.
1952                          * Continue processing the open streams.
1953                          */
1954 
1955                         ncg->nfs4_callback_stats.recall_failed.value.ui64++;
1956 
1957                 } else if (needrecov) {
1958                         /*
1959                          * Start recovery and bail out.  The recovery
1960                          * thread will take it from here.
1961                          */
1962                         (void) nfs4_start_recovery(&e, mi, vp, NULL, NULL,
1963                             NULL, OP_OPEN, NULL, NULL, NULL);
1964                         open_stream_rele(osp, rp);
1965                         *recovp = TRUE;
1966                         break;
1967                 }
1968 
1969                 open_stream_rele(osp, rp);
1970         }
1971 
1972         nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1973 
1974         return (e.error);
1975 }
1976 
1977 /*
1978  * get_next_deleg_stream - returns the next open stream which
1979  * represents a delegation for this rnode.  In order to assure
1980  * forward progress, the caller must guarantee that each open
1981  * stream returned is changed so that a future call won't return
1982  * it again.
1983  *
1984  * There are several ways for the open stream to change.  If the open
1985  * stream is !os_delegation, then we aren't interested in it.  Also, if
1986  * either os_failed_reopen or !os_valid, then don't return the osp.
1987  *
1988  * If claimnull is false (doing reopen CLAIM_DELEGATE_CUR) then return
1989  * the osp if it is an os_delegation open stream.  Also, if the rnode still
1990  * has r_deleg_return_pending, then return the os_delegation osp.  Lastly,
1991  * if the rnode's r_deleg_stateid is different from the osp's open_stateid,
1992  * then return the osp.
1993  *
1994  * We have already taken the 'r_deleg_recall_lock' as WRITER, which
1995  * prevents new OPENs from going OTW (as start_fop takes this
1996  * lock in READ mode); thus, no new open streams can be created
1997  * (which inherently means no new delegation open streams are
1998  * being created).
1999  */
2000 
2001 static nfs4_open_stream_t *
2002 get_next_deleg_stream(rnode4_t *rp, int claimnull)
2003 {
2004         nfs4_open_stream_t      *osp;
2005 
2006         ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_WRITER));
2007 
2008         /*
2009          * Search through the list of open streams looking for
2010          * one that was created while holding the delegation.
2011          */
2012         mutex_enter(&rp->r_os_lock);
2013         for (osp = list_head(&rp->r_open_streams); osp != NULL;
2014             osp = list_next(&rp->r_open_streams, osp)) {
2015                 mutex_enter(&osp->os_sync_lock);
2016                 if (!osp->os_delegation || osp->os_failed_reopen ||
2017                     !osp->os_valid) {
2018                         mutex_exit(&osp->os_sync_lock);
2019                         continue;
2020                 }
2021                 if (!claimnull || rp->r_deleg_return_pending ||
2022                     !stateid4_cmp(&osp->open_stateid, &rp->r_deleg_stateid)) {
2023                         osp->os_ref_count++;
2024                         mutex_exit(&osp->os_sync_lock);
2025                         mutex_exit(&rp->r_os_lock);
2026                         return (osp);
2027                 }
2028                 mutex_exit(&osp->os_sync_lock);
2029         }
2030         mutex_exit(&rp->r_os_lock);
2031 
2032         return (NULL);
2033 }
2034 
2035 static void
2036 nfs4delegreturn_thread(struct cb_recall_pass *args)
2037 {
2038         rnode4_t *rp;
2039         vnode_t *vp;
2040         cred_t *cr;
2041         int dtype, error, flags;
2042         bool_t rdirty, rip;
2043         kmutex_t cpr_lock;
2044         callb_cpr_t cpr_info;
2045         struct nfs4_callback_globals *ncg;
2046 
2047         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2048         ASSERT(ncg != NULL);
2049 
2050         mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
2051 
2052         CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,
2053             "nfsv4delegRtn");
2054 
2055         rp = args->rp;
2056         vp = RTOV4(rp);
2057 
2058         mutex_enter(&rp->r_statev4_lock);
2059         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2060                 mutex_exit(&rp->r_statev4_lock);
2061                 goto out;
2062         }
2063         mutex_exit(&rp->r_statev4_lock);
2064 
2065         /*
2066          * Take the read-write lock in read mode to prevent other
2067          * threads from modifying the data during the recall.  This
2068          * doesn't affect mmappers.
2069          */
2070         (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE);
2071 
2072         /* Proceed with delegreturn */
2073 
2074         mutex_enter(&rp->r_statev4_lock);
2075         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2076                 mutex_exit(&rp->r_statev4_lock);
2077                 nfs_rw_exit(&rp->r_rwlock);
2078                 goto out;
2079         }
2080         dtype = rp->r_deleg_type;
2081         cr = rp->r_deleg_cred;
2082         ASSERT(cr != NULL);
2083         crhold(cr);
2084         mutex_exit(&rp->r_statev4_lock);
2085 
2086         flags = args->flags;
2087 
2088         /*
2089          * If the file is being truncated at the server, then throw
2090          * away all of the pages, it doesn't matter what flavor of
2091          * delegation we have.
2092          */
2093 
2094         if (args->truncate) {
2095                 ncg->nfs4_callback_stats.recall_trunc.value.ui64++;
2096                 nfs4_invalidate_pages(vp, 0, cr);
2097         } else if (dtype == OPEN_DELEGATE_WRITE) {
2098 
2099                 mutex_enter(&rp->r_statelock);
2100                 rdirty = rp->r_flags & R4DIRTY;
2101                 mutex_exit(&rp->r_statelock);
2102 
2103                 if (rdirty) {
2104                         error = VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
2105 
2106                         if (error)
2107                                 CB_WARN1("nfs4delegreturn_thread:"
2108                                 " VOP_PUTPAGE: %d\n", error);
2109                 }
2110                 /* turn off NFS4_DR_PUSH because we just did that above. */
2111                 flags &= ~NFS4_DR_PUSH;
2112         }
2113 
2114         mutex_enter(&rp->r_statelock);
2115         rip =  rp->r_flags & R4RECOVERRP;
2116         mutex_exit(&rp->r_statelock);
2117 
2118         /* If a failed recovery is indicated, discard the pages */
2119 
2120         if (rip) {
2121 
2122                 error = VOP_PUTPAGE(vp, 0, 0, B_INVAL, cr, NULL);
2123 
2124                 if (error)
2125                         CB_WARN1("nfs4delegreturn_thread: VOP_PUTPAGE: %d\n",
2126                             error);
2127         }
2128 
2129         /*
2130          * Pass the flags to nfs4delegreturn_impl, but be sure not to pass
2131          * NFS4_DR_DID_OP, which just calls nfs4delegreturn_async again.
2132          */
2133         flags &= ~NFS4_DR_DID_OP;
2134 
2135         (void) nfs4delegreturn_impl(rp, flags, ncg);
2136 
2137         nfs_rw_exit(&rp->r_rwlock);
2138         crfree(cr);
2139 out:
2140         kmem_free(args, sizeof (struct cb_recall_pass));
2141         VN_RELE(vp);
2142         mutex_enter(&cpr_lock);
2143         CALLB_CPR_EXIT(&cpr_info);
2144         mutex_destroy(&cpr_lock);
2145         zthread_exit();
2146 }
2147 
2148 /*
2149  * This function has one assumption that the caller of this function is
2150  * either doing recovery (therefore cannot call nfs4_start_op) or has
2151  * already called nfs4_start_op().
2152  */
2153 void
2154 nfs4_delegation_accept(rnode4_t *rp, open_claim_type4 claim, OPEN4res *res,
2155         nfs4_ga_res_t *garp, cred_t *cr)
2156 {
2157         open_read_delegation4 *orp;
2158         open_write_delegation4 *owp;
2159         nfs4_server_t *np;
2160         bool_t already = FALSE;
2161         bool_t recall = FALSE;
2162         bool_t valid_garp = TRUE;
2163         bool_t delegation_granted = FALSE;
2164         bool_t dr_needed = FALSE;
2165         bool_t recov;
2166         int dr_flags = 0;
2167         long mapcnt;
2168         uint_t rflag;
2169         mntinfo4_t *mi;
2170         struct nfs4_callback_globals *ncg;
2171         open_delegation_type4 odt;
2172 
2173         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2174         ASSERT(ncg != NULL);
2175 
2176         mi = VTOMI4(RTOV4(rp));
2177 
2178         /*
2179          * Accept a delegation granted to the client via an OPEN.
2180          * Set the delegation fields in the rnode and insert the
2181          * rnode onto the list anchored in the nfs4_server_t.  The
2182          * proper locking order requires the nfs4_server_t first,
2183          * even though it may not be needed in all cases.
2184          *
2185          * NB: find_nfs4_server returns with s_lock held.
2186          */
2187 
2188         if ((np = find_nfs4_server(mi)) == NULL)
2189                 return;
2190 
2191         /* grab the statelock too, for examining r_mapcnt */
2192         mutex_enter(&rp->r_statelock);
2193         mutex_enter(&rp->r_statev4_lock);
2194 
2195         if (rp->r_deleg_type == OPEN_DELEGATE_READ ||
2196             rp->r_deleg_type == OPEN_DELEGATE_WRITE)
2197                 already = TRUE;
2198 
2199         odt = res->delegation.delegation_type;
2200 
2201         if (odt == OPEN_DELEGATE_READ) {
2202 
2203                 rp->r_deleg_type = res->delegation.delegation_type;
2204                 orp = &res->delegation.open_delegation4_u.read;
2205                 rp->r_deleg_stateid = orp->stateid;
2206                 rp->r_deleg_perms = orp->permissions;
2207                 if (claim == CLAIM_PREVIOUS)
2208                         if ((recall = orp->recall) != 0)
2209                                 dr_needed = TRUE;
2210 
2211                 delegation_granted = TRUE;
2212 
2213                 ncg->nfs4_callback_stats.delegations.value.ui64++;
2214                 ncg->nfs4_callback_stats.delegaccept_r.value.ui64++;
2215 
2216         } else if (odt == OPEN_DELEGATE_WRITE) {
2217 
2218                 rp->r_deleg_type = res->delegation.delegation_type;
2219                 owp = &res->delegation.open_delegation4_u.write;
2220                 rp->r_deleg_stateid = owp->stateid;
2221                 rp->r_deleg_perms = owp->permissions;
2222                 rp->r_deleg_limit = owp->space_limit;
2223                 if (claim == CLAIM_PREVIOUS)
2224                         if ((recall = owp->recall) != 0)
2225                                 dr_needed = TRUE;
2226 
2227                 delegation_granted = TRUE;
2228 
2229                 if (garp == NULL || !garp->n4g_change_valid) {
2230                         valid_garp = FALSE;
2231                         rp->r_deleg_change = 0;
2232                         rp->r_deleg_change_grant = 0;
2233                 } else {
2234                         rp->r_deleg_change = garp->n4g_change;
2235                         rp->r_deleg_change_grant = garp->n4g_change;
2236                 }
2237                 mapcnt = rp->r_mapcnt;
2238                 rflag = rp->r_flags;
2239 
2240                 /*
2241                  * Update the delegation change attribute if
2242                  * there are mappers for the file is dirty.  This
2243                  * might be the case during recovery after server
2244                  * reboot.
2245                  */
2246                 if (mapcnt > 0 || rflag & R4DIRTY)
2247                         rp->r_deleg_change++;
2248 
2249                 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2250                     "nfs4_delegation_accept: r_deleg_change: 0x%x\n",
2251                     (int)(rp->r_deleg_change >> 32)));
2252                 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2253                     "nfs4_delegation_accept: r_delg_change_grant: 0x%x\n",
2254                     (int)(rp->r_deleg_change_grant >> 32)));
2255 
2256 
2257                 ncg->nfs4_callback_stats.delegations.value.ui64++;
2258                 ncg->nfs4_callback_stats.delegaccept_rw.value.ui64++;
2259         } else if (already) {
2260                 /*
2261                  * No delegation granted.  If the rnode currently has
2262                  * has one, then consider it tainted and return it.
2263                  */
2264                 dr_needed = TRUE;
2265         }
2266 
2267         if (delegation_granted) {
2268                 /* Add the rnode to the list. */
2269                 if (!already) {
2270                         crhold(cr);
2271                         rp->r_deleg_cred = cr;
2272 
2273                         ASSERT(mutex_owned(&np->s_lock));
2274                         list_insert_head(&np->s_deleg_list, rp);
2275                         /* added list node gets a reference */
2276                         np->s_refcnt++;
2277                         nfs4_inc_state_ref_count_nolock(np, mi);
2278                 }
2279                 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
2280         }
2281 
2282         /*
2283          * We've now safely accepted the delegation, if any.  Drop the
2284          * locks and figure out what post-processing is needed.  We'd
2285          * like to retain r_statev4_lock, but nfs4_server_rele takes
2286          * s_lock which would be a lock ordering violation.
2287          */
2288         mutex_exit(&rp->r_statev4_lock);
2289         mutex_exit(&rp->r_statelock);
2290         mutex_exit(&np->s_lock);
2291         nfs4_server_rele(np);
2292 
2293         /*
2294          * Check to see if we are in recovery.  Remember that
2295          * this function is protected by start_op, so a recovery
2296          * cannot begin until we are out of here.
2297          */
2298         mutex_enter(&mi->mi_lock);
2299         recov = mi->mi_recovflags & MI4_RECOV_ACTIV;
2300         mutex_exit(&mi->mi_lock);
2301 
2302         mutex_enter(&rp->r_statev4_lock);
2303 
2304         if (nfs4_delegreturn_policy == IMMEDIATE || !valid_garp)
2305                 dr_needed = TRUE;
2306 
2307         if (dr_needed && rp->r_deleg_return_pending == FALSE) {
2308                 if (recov) {
2309                         /*
2310                          * We cannot call delegreturn from inside
2311                          * of recovery or VOP_PUTPAGE will hang
2312                          * due to nfs4_start_fop call in
2313                          * nfs4write.  Use dlistadd to add the
2314                          * rnode to the list of rnodes needing
2315                          * cleaning.  We do not need to do reopen
2316                          * here because recov_openfiles will do it.
2317                          * In the non-recall case, just discard the
2318                          * delegation as it is no longer valid.
2319                          */
2320                         if (recall)
2321                                 dr_flags = NFS4_DR_PUSH;
2322                         else
2323                                 dr_flags = NFS4_DR_PUSH|NFS4_DR_DISCARD;
2324 
2325                         nfs4_dlistadd(rp, ncg, dr_flags);
2326                         dr_flags = 0;
2327                 } else {
2328                         /*
2329                          * Push the modified data back to the server,
2330                          * reopen any delegation open streams, and return
2331                          * the delegation.  Drop the statev4_lock first!
2332                          */
2333                         dr_flags =  NFS4_DR_PUSH|NFS4_DR_DID_OP|NFS4_DR_REOPEN;
2334                 }
2335         }
2336         mutex_exit(&rp->r_statev4_lock);
2337         if (dr_flags)
2338                 (void) nfs4delegreturn_impl(rp, dr_flags, ncg);
2339 }
2340 
2341 /*
2342  * nfs4delegabandon - Abandon the delegation on an rnode4.  This code
2343  * is called when the client receives EXPIRED, BAD_STATEID, OLD_STATEID
2344  * or BADSEQID and the recovery code is unable to recover.  Push any
2345  * dirty data back to the server and return the delegation (if any).
2346  */
2347 
2348 void
2349 nfs4delegabandon(rnode4_t *rp)
2350 {
2351         vnode_t *vp;
2352         struct cb_recall_pass *pp;
2353         open_delegation_type4 dt;
2354 
2355         mutex_enter(&rp->r_statev4_lock);
2356         dt = rp->r_deleg_type;
2357         mutex_exit(&rp->r_statev4_lock);
2358 
2359         if (dt == OPEN_DELEGATE_NONE)
2360                 return;
2361 
2362         vp = RTOV4(rp);
2363         VN_HOLD(vp);
2364 
2365         pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
2366         pp->rp = rp;
2367         /*
2368          * Recovery on the file has failed and we want to return
2369          * the delegation.  We don't want to reopen files and
2370          * nfs4delegreturn_thread() figures out what to do about
2371          * the data.  The only thing to do is attempt to return
2372          * the delegation.
2373          */
2374         pp->flags = 0;
2375         pp->truncate = FALSE;
2376 
2377         /*
2378          * Fire up a thread to do the delegreturn; this is
2379          * necessary because we could be inside a GETPAGE or
2380          * PUTPAGE and we cannot do another one.
2381          */
2382 
2383         (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
2384             minclsyspri);
2385 }
2386 
2387 static int
2388 wait_for_recall1(vnode_t *vp, nfs4_op_hint_t op, nfs4_recov_state_t *rsp,
2389         int flg)
2390 {
2391         rnode4_t *rp;
2392         int error = 0;
2393 
2394 #ifdef lint
2395         op = op;
2396 #endif
2397 
2398         if (vp && vp->v_type == VREG) {
2399                 rp = VTOR4(vp);
2400 
2401                 /*
2402                  * Take r_deleg_recall_lock in read mode to synchronize
2403                  * with delegreturn.
2404                  */
2405                 error = nfs_rw_enter_sig(&rp->r_deleg_recall_lock,
2406                     RW_READER, INTR4(vp));
2407 
2408                 if (error == 0)
2409                         rsp->rs_flags |= flg;
2410 
2411         }
2412         return (error);
2413 }
2414 
2415 void
2416 nfs4_end_op_recall(vnode_t *vp1, vnode_t *vp2, nfs4_recov_state_t *rsp)
2417 {
2418         NFS4_DEBUG(nfs4_recall_debug,
2419             (CE_NOTE, "nfs4_end_op_recall: 0x%p, 0x%p\n",
2420             (void *)vp1, (void *)vp2));
2421 
2422         if (vp2 && rsp->rs_flags & NFS4_RS_RECALL_HELD2)
2423                 nfs_rw_exit(&VTOR4(vp2)->r_deleg_recall_lock);
2424         if (vp1 && rsp->rs_flags & NFS4_RS_RECALL_HELD1)
2425                 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2426 }
2427 
2428 int
2429 wait_for_recall(vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op,
2430         nfs4_recov_state_t *rsp)
2431 {
2432         int error;
2433 
2434         NFS4_DEBUG(nfs4_recall_debug,
2435             (CE_NOTE, "wait_for_recall:    0x%p, 0x%p\n",
2436             (void *)vp1, (void *) vp2));
2437 
2438         rsp->rs_flags &= ~(NFS4_RS_RECALL_HELD1|NFS4_RS_RECALL_HELD2);
2439 
2440         if ((error = wait_for_recall1(vp1, op, rsp, NFS4_RS_RECALL_HELD1)) != 0)
2441                 return (error);
2442 
2443         if ((error = wait_for_recall1(vp2, op, rsp, NFS4_RS_RECALL_HELD2))
2444             != 0) {
2445                 if (rsp->rs_flags & NFS4_RS_RECALL_HELD1) {
2446                         nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2447                         rsp->rs_flags &= ~NFS4_RS_RECALL_HELD1;
2448                 }
2449 
2450                 return (error);
2451         }
2452 
2453         return (0);
2454 }
2455 
2456 /*
2457  * nfs4_dlistadd - Add this rnode to a list of rnodes to be
2458  * DELEGRETURN'd at the end of recovery.
2459  */
2460 
2461 static void
2462 nfs4_dlistadd(rnode4_t *rp, struct nfs4_callback_globals *ncg, int flags)
2463 {
2464         struct nfs4_dnode *dp;
2465 
2466         ASSERT(mutex_owned(&rp->r_statev4_lock));
2467         /*
2468          * Mark the delegation as having a return pending.
2469          * This will prevent the use of the delegation stateID
2470          * by read, write, setattr and open.
2471          */
2472         rp->r_deleg_return_pending = TRUE;
2473         dp = kmem_alloc(sizeof (*dp), KM_SLEEP);
2474         VN_HOLD(RTOV4(rp));
2475         dp->rnodep = rp;
2476         dp->flags = flags;
2477         mutex_enter(&ncg->nfs4_dlist_lock);
2478         list_insert_head(&ncg->nfs4_dlist, dp);
2479 #ifdef  DEBUG
2480         ncg->nfs4_dlistadd_c++;
2481 #endif
2482         mutex_exit(&ncg->nfs4_dlist_lock);
2483 }
2484 
2485 /*
2486  * nfs4_dlistclean_impl - Do DELEGRETURN for each rnode on the list.
2487  * of files awaiting cleaning.  If the override_flags are non-zero
2488  * then use them rather than the flags that were set when the rnode
2489  * was added to the dlist.
2490  */
2491 static void
2492 nfs4_dlistclean_impl(struct nfs4_callback_globals *ncg, int override_flags)
2493 {
2494         rnode4_t *rp;
2495         struct nfs4_dnode *dp;
2496         int flags;
2497 
2498         ASSERT(override_flags == 0 || override_flags == NFS4_DR_DISCARD);
2499 
2500         mutex_enter(&ncg->nfs4_dlist_lock);
2501         while ((dp = list_head(&ncg->nfs4_dlist)) != NULL) {
2502 #ifdef  DEBUG
2503                 ncg->nfs4_dlistclean_c++;
2504 #endif
2505                 list_remove(&ncg->nfs4_dlist, dp);
2506                 mutex_exit(&ncg->nfs4_dlist_lock);
2507                 rp = dp->rnodep;
2508                 flags = (override_flags != 0) ? override_flags : dp->flags;
2509                 kmem_free(dp, sizeof (*dp));
2510                 (void) nfs4delegreturn_impl(rp, flags, ncg);
2511                 VN_RELE(RTOV4(rp));
2512                 mutex_enter(&ncg->nfs4_dlist_lock);
2513         }
2514         mutex_exit(&ncg->nfs4_dlist_lock);
2515 }
2516 
2517 void
2518 nfs4_dlistclean(void)
2519 {
2520         struct nfs4_callback_globals *ncg;
2521 
2522         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2523         ASSERT(ncg != NULL);
2524 
2525         nfs4_dlistclean_impl(ncg, 0);
2526 }