1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All Rights Reserved
  29  */
  30 
  31 /*
  32  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  33  */
  34 
  35 #include <sys/param.h>
  36 #include <sys/types.h>
  37 #include <sys/systm.h>
  38 #include <sys/cred.h>
  39 #include <sys/proc.h>
  40 #include <sys/user.h>
  41 #include <sys/time.h>
  42 #include <sys/buf.h>
  43 #include <sys/vfs.h>
  44 #include <sys/vnode.h>
  45 #include <sys/socket.h>
  46 #include <sys/uio.h>
  47 #include <sys/tiuser.h>
  48 #include <sys/swap.h>
  49 #include <sys/errno.h>
  50 #include <sys/debug.h>
  51 #include <sys/kmem.h>
  52 #include <sys/kstat.h>
  53 #include <sys/cmn_err.h>
  54 #include <sys/vtrace.h>
  55 #include <sys/session.h>
  56 #include <sys/dnlc.h>
  57 #include <sys/bitmap.h>
  58 #include <sys/acl.h>
  59 #include <sys/ddi.h>
  60 #include <sys/pathname.h>
  61 #include <sys/flock.h>
  62 #include <sys/dirent.h>
  63 #include <sys/flock.h>
  64 #include <sys/callb.h>
  65 #include <sys/sdt.h>
  66 
  67 #include <vm/pvn.h>
  68 
  69 #include <rpc/types.h>
  70 #include <rpc/xdr.h>
  71 #include <rpc/auth.h>
  72 #include <rpc/rpcsec_gss.h>
  73 #include <rpc/clnt.h>
  74 
  75 #include <nfs/nfs.h>
  76 #include <nfs/nfs_clnt.h>
  77 #include <nfs/nfs_acl.h>
  78 
  79 #include <nfs/nfs4.h>
  80 #include <nfs/rnode4.h>
  81 #include <nfs/nfs4_clnt.h>
  82 
  83 /*
  84  * The hash queues for the access to active and cached rnodes
  85  * are organized as doubly linked lists.  A reader/writer lock
  86  * for each hash bucket is used to control access and to synchronize
  87  * lookups, additions, and deletions from the hash queue.
  88  *
  89  * The rnode freelist is organized as a doubly linked list with
  90  * a head pointer.  Additions and deletions are synchronized via
  91  * a single mutex.
  92  *
  93  * In order to add an rnode to the free list, it must be hashed into
  94  * a hash queue and the exclusive lock to the hash queue be held.
  95  * If an rnode is not hashed into a hash queue, then it is destroyed
  96  * because it represents no valuable information that can be reused
  97  * about the file.  The exclusive lock to the hash queue must be
  98  * held in order to prevent a lookup in the hash queue from finding
  99  * the rnode and using it and assuming that the rnode is not on the
 100  * freelist.  The lookup in the hash queue will have the hash queue
 101  * locked, either exclusive or shared.
 102  *
 103  * The vnode reference count for each rnode is not allowed to drop
 104  * below 1.  This prevents external entities, such as the VM
 105  * subsystem, from acquiring references to vnodes already on the
 106  * freelist and then trying to place them back on the freelist
 107  * when their reference is released.  This means that the when an
 108  * rnode is looked up in the hash queues, then either the rnode
 109  * is removed from the freelist and that reference is transferred to
 110  * the new reference or the vnode reference count must be incremented
 111  * accordingly.  The mutex for the freelist must be held in order to
 112  * accurately test to see if the rnode is on the freelist or not.
 113  * The hash queue lock might be held shared and it is possible that
 114  * two different threads may race to remove the rnode from the
 115  * freelist.  This race can be resolved by holding the mutex for the
 116  * freelist.  Please note that the mutex for the freelist does not
 117  * need to be held if the rnode is not on the freelist.  It can not be
 118  * placed on the freelist due to the requirement that the thread
 119  * putting the rnode on the freelist must hold the exclusive lock
 120  * to the hash queue and the thread doing the lookup in the hash
 121  * queue is holding either a shared or exclusive lock to the hash
 122  * queue.
 123  *
 124  * The lock ordering is:
 125  *
 126  *      hash bucket lock -> vnode lock
 127  *      hash bucket lock -> freelist lock -> r_statelock
 128  */
 129 r4hashq_t *rtable4;
 130 
 131 static kmutex_t rp4freelist_lock;
 132 static rnode4_t *rp4freelist = NULL;
 133 static long rnode4_new = 0;
 134 int rtable4size;
 135 static int rtable4mask;
 136 static struct kmem_cache *rnode4_cache;
 137 static int rnode4_hashlen = 4;
 138 
 139 static void     r4inactive(rnode4_t *, cred_t *);
 140 static vnode_t  *make_rnode4(nfs4_sharedfh_t *, r4hashq_t *, struct vfs *,
 141                     struct vnodeops *,
 142                     int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
 143                     cred_t *),
 144                     int *, cred_t *);
 145 static void     rp4_rmfree(rnode4_t *);
 146 int             nfs4_free_data_reclaim(rnode4_t *);
 147 static int      nfs4_active_data_reclaim(rnode4_t *);
 148 static int      nfs4_free_reclaim(void);
 149 static int      nfs4_active_reclaim(void);
 150 static int      nfs4_rnode_reclaim(void);
 151 static void     nfs4_reclaim(void *);
 152 static int      isrootfh(nfs4_sharedfh_t *, rnode4_t *);
 153 static void     uninit_rnode4(rnode4_t *);
 154 static void     destroy_rnode4(rnode4_t *);
 155 static void     r4_stub_set(rnode4_t *, nfs4_stub_type_t);
 156 
 157 #ifdef DEBUG
 158 static int r4_check_for_dups = 0; /* Flag to enable dup rnode detection. */
 159 static int nfs4_rnode_debug = 0;
 160 /* if nonzero, kmem_cache_free() rnodes rather than place on freelist */
 161 static int nfs4_rnode_nofreelist = 0;
 162 /* give messages on colliding shared filehandles */
 163 static void     r4_dup_check(rnode4_t *, vfs_t *);
 164 #endif
 165 
 166 /*
 167  * If the vnode has pages, run the list and check for any that are
 168  * still dangling.  We call this routine before putting an rnode on
 169  * the free list.
 170  */
 171 static int
 172 nfs4_dross_pages(vnode_t *vp)
 173 {
 174         page_t *pp;
 175         kmutex_t *vphm;
 176 
 177         vphm = page_vnode_mutex(vp);
 178         mutex_enter(vphm);
 179         if ((pp = vp->v_pages) != NULL) {
 180                 do {
 181                         if (pp->p_hash != PVN_VPLIST_HASH_TAG &&
 182                             pp->p_fsdata != C_NOCOMMIT) {
 183                                 mutex_exit(vphm);
 184                                 return (1);
 185                         }
 186                 } while ((pp = pp->p_vpnext) != vp->v_pages);
 187         }
 188         mutex_exit(vphm);
 189 
 190         return (0);
 191 }
 192 
 193 /*
 194  * Flush any pages left on this rnode.
 195  */
 196 static void
 197 r4flushpages(rnode4_t *rp, cred_t *cr)
 198 {
 199         vnode_t *vp;
 200         int error;
 201 
 202         /*
 203          * Before freeing anything, wait until all asynchronous
 204          * activity is done on this rnode.  This will allow all
 205          * asynchronous read ahead and write behind i/o's to
 206          * finish.
 207          */
 208         mutex_enter(&rp->r_statelock);
 209         while (rp->r_count > 0)
 210                 cv_wait(&rp->r_cv, &rp->r_statelock);
 211         mutex_exit(&rp->r_statelock);
 212 
 213         /*
 214          * Flush and invalidate all pages associated with the vnode.
 215          */
 216         vp = RTOV4(rp);
 217         if (nfs4_has_pages(vp)) {
 218                 ASSERT(vp->v_type != VCHR);
 219                 if ((rp->r_flags & R4DIRTY) && !rp->r_error) {
 220                         error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr, NULL);
 221                         if (error && (error == ENOSPC || error == EDQUOT)) {
 222                                 mutex_enter(&rp->r_statelock);
 223                                 if (!rp->r_error)
 224                                         rp->r_error = error;
 225                                 mutex_exit(&rp->r_statelock);
 226                         }
 227                 }
 228                 nfs4_invalidate_pages(vp, (u_offset_t)0, cr);
 229         }
 230 }
 231 
 232 /*
 233  * Free the resources associated with an rnode.
 234  */
 235 static void
 236 r4inactive(rnode4_t *rp, cred_t *cr)
 237 {
 238         vnode_t *vp;
 239         char *contents;
 240         int size;
 241         vsecattr_t *vsp;
 242         vnode_t *xattr;
 243 
 244         r4flushpages(rp, cr);
 245 
 246         vp = RTOV4(rp);
 247 
 248         /*
 249          * Free any held caches which may be
 250          * associated with this rnode.
 251          */
 252         mutex_enter(&rp->r_statelock);
 253         contents = rp->r_symlink.contents;
 254         size = rp->r_symlink.size;
 255         rp->r_symlink.contents = NULL;
 256         vsp = rp->r_secattr;
 257         rp->r_secattr = NULL;
 258         xattr = rp->r_xattr_dir;
 259         rp->r_xattr_dir = NULL;
 260         mutex_exit(&rp->r_statelock);
 261 
 262         /*
 263          * Free the access cache entries.
 264          */
 265         (void) nfs4_access_purge_rp(rp);
 266 
 267         /*
 268          * Free the readdir cache entries.
 269          */
 270         nfs4_purge_rddir_cache(vp);
 271 
 272         /*
 273          * Free the symbolic link cache.
 274          */
 275         if (contents != NULL) {
 276 
 277                 kmem_free((void *)contents, size);
 278         }
 279 
 280         /*
 281          * Free any cached ACL.
 282          */
 283         if (vsp != NULL)
 284                 nfs4_acl_free_cache(vsp);
 285 
 286         /*
 287          * Release the cached xattr_dir
 288          */
 289         if (xattr != NULL)
 290                 VN_RELE(xattr);
 291 }
 292 
 293 /*
 294  * We have seen a case that the fh passed in is for "." which
 295  * should be a VROOT node, however, the fh is different from the
 296  * root fh stored in the mntinfo4_t. The invalid fh might be
 297  * from a misbehaved server and will panic the client system at
 298  * a later time. To avoid the panic, we drop the bad fh, use
 299  * the root fh from mntinfo4_t, and print an error message
 300  * for attention.
 301  */
 302 nfs4_sharedfh_t *
 303 badrootfh_check(nfs4_sharedfh_t *fh, nfs4_fname_t *nm, mntinfo4_t *mi,
 304     int *wasbad)
 305 {
 306         char *s;
 307 
 308         *wasbad = 0;
 309         s = fn_name(nm);
 310         ASSERT(strcmp(s, "..") != 0);
 311 
 312         if ((s[0] == '.' && s[1] == '\0') && fh &&
 313             !SFH4_SAME(mi->mi_rootfh, fh)) {
 314 #ifdef DEBUG
 315                 nfs4_fhandle_t fhandle;
 316 
 317                 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
 318                     "Server %s returns a different "
 319                     "root filehandle for the path %s:",
 320                     mi->mi_curr_serv->sv_hostname,
 321                     mi->mi_curr_serv->sv_path);
 322 
 323                 /* print the bad fh */
 324                 fhandle.fh_len = fh->sfh_fh.nfs_fh4_len;
 325                 bcopy(fh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
 326                     fhandle.fh_len);
 327                 nfs4_printfhandle(&fhandle);
 328 
 329                 /* print mi_rootfh */
 330                 fhandle.fh_len = mi->mi_rootfh->sfh_fh.nfs_fh4_len;
 331                 bcopy(mi->mi_rootfh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
 332                     fhandle.fh_len);
 333                 nfs4_printfhandle(&fhandle);
 334 #endif
 335                 /* use mi_rootfh instead; fh will be rele by the caller */
 336                 fh = mi->mi_rootfh;
 337                 *wasbad = 1;
 338         }
 339 
 340         kmem_free(s, MAXNAMELEN);
 341         return (fh);
 342 }
 343 
 344 void
 345 r4_do_attrcache(vnode_t *vp, nfs4_ga_res_t *garp, int newnode,
 346     hrtime_t t, cred_t *cr, int index)
 347 {
 348         int is_stub;
 349         vattr_t *attr;
 350         /*
 351          * Don't add to attrcache if time overflow, but
 352          * no need to check because either attr is null or the time
 353          * values in it were processed by nfs4_time_ntov(), which checks
 354          * for time overflows.
 355          */
 356         attr = garp ? &garp->n4g_va : NULL;
 357 
 358         if (attr) {
 359                 if (!newnode) {
 360                         rw_exit(&rtable4[index].r_lock);
 361 #ifdef DEBUG
 362                         if (vp->v_type != attr->va_type &&
 363                             vp->v_type != VNON && attr->va_type != VNON) {
 364                                 zcmn_err(VTOMI4(vp)->mi_zone->zone_id, CE_WARN,
 365                                     "makenfs4node: type (%d) doesn't "
 366                                     "match type of found node at %p (%d)",
 367                                     attr->va_type, (void *)vp, vp->v_type);
 368                         }
 369 #endif
 370                         nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL);
 371                 } else {
 372                         rnode4_t *rp = VTOR4(vp);
 373 
 374                         vp->v_type = attr->va_type;
 375                         vp->v_rdev = attr->va_rdev;
 376 
 377                         /*
 378                          * Turn this object into a "stub" object if we
 379                          * crossed an underlying server fs boundary.
 380                          * To make this check, during mount we save the
 381                          * fsid of the server object being mounted.
 382                          * Here we compare this object's server fsid
 383                          * with the fsid we saved at mount.  If they
 384                          * are different, we crossed server fs boundary.
 385                          *
 386                          * The stub type is set (or not) at rnode
 387                          * creation time and it never changes for life
 388                          * of the rnode.
 389                          *
 390                          * This stub will be for a mirror-mount, rather than
 391                          * a referral (the latter also sets R4SRVSTUB).
 392                          *
 393                          * The stub type is also set during RO failover,
 394                          * nfs4_remap_file().
 395                          *
 396                          * We don't bother with taking r_state_lock to
 397                          * set the stub type because this is a new rnode
 398                          * and we're holding the hash bucket r_lock RW_WRITER.
 399                          * No other thread could have obtained access
 400                          * to this rnode.
 401                          */
 402                         is_stub = 0;
 403                         if (garp->n4g_fsid_valid) {
 404                                 fattr4_fsid ga_fsid = garp->n4g_fsid;
 405                                 servinfo4_t *svp = rp->r_server;
 406 
 407                                 rp->r_srv_fsid = ga_fsid;
 408 
 409                                 (void) nfs_rw_enter_sig(&svp->sv_lock,
 410                                     RW_READER, 0);
 411                                 if (!FATTR4_FSID_EQ(&ga_fsid, &svp->sv_fsid))
 412                                         is_stub = 1;
 413                                 nfs_rw_exit(&svp->sv_lock);
 414                         }
 415 
 416                         if (is_stub)
 417                                 r4_stub_mirrormount(rp);
 418                         else
 419                                 r4_stub_none(rp);
 420 
 421                         /* Can not cache partial attr */
 422                         if (attr->va_mask == AT_ALL)
 423                                 nfs4_attrcache_noinval(vp, garp, t);
 424                         else
 425                                 PURGE_ATTRCACHE4(vp);
 426 
 427                         rw_exit(&rtable4[index].r_lock);
 428                 }
 429         } else {
 430                 if (newnode) {
 431                         PURGE_ATTRCACHE4(vp);
 432                 }
 433                 rw_exit(&rtable4[index].r_lock);
 434         }
 435 }
 436 
 437 /*
 438  * Find or create an rnode based primarily on filehandle.  To be
 439  * used when dvp (vnode for parent directory) is not available;
 440  * otherwise, makenfs4node() should be used.
 441  *
 442  * The nfs4_fname_t argument *npp is consumed and nulled out.
 443  */
 444 
 445 vnode_t *
 446 makenfs4node_by_fh(nfs4_sharedfh_t *sfh, nfs4_sharedfh_t *psfh,
 447     nfs4_fname_t **npp, nfs4_ga_res_t *garp,
 448     mntinfo4_t *mi, cred_t *cr, hrtime_t t)
 449 {
 450         vfs_t *vfsp = mi->mi_vfsp;
 451         int newnode = 0;
 452         vnode_t *vp;
 453         rnode4_t *rp;
 454         svnode_t *svp;
 455         nfs4_fname_t *name, *svpname;
 456         int index;
 457 
 458         ASSERT(npp && *npp);
 459         name = *npp;
 460         *npp = NULL;
 461 
 462         index = rtable4hash(sfh);
 463         rw_enter(&rtable4[index].r_lock, RW_READER);
 464 
 465         vp = make_rnode4(sfh, &rtable4[index], vfsp,
 466             nfs4_vnodeops, nfs4_putapage, &newnode, cr);
 467 
 468         svp = VTOSV(vp);
 469         rp = VTOR4(vp);
 470         if (newnode) {
 471                 svp->sv_forw = svp->sv_back = svp;
 472                 svp->sv_name = name;
 473                 if (psfh != NULL)
 474                         sfh4_hold(psfh);
 475                 svp->sv_dfh = psfh;
 476         } else {
 477                 /*
 478                  * It is possible that due to a server
 479                  * side rename fnames have changed.
 480                  * update the fname here.
 481                  */
 482                 mutex_enter(&rp->r_svlock);
 483                 svpname = svp->sv_name;
 484                 if (svp->sv_name != name) {
 485                         svp->sv_name = name;
 486                         mutex_exit(&rp->r_svlock);
 487                         fn_rele(&svpname);
 488                 } else {
 489                         mutex_exit(&rp->r_svlock);
 490                         fn_rele(&name);
 491                 }
 492         }
 493 
 494         ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
 495         r4_do_attrcache(vp, garp, newnode, t, cr, index);
 496         ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
 497 
 498         return (vp);
 499 }
 500 
 501 /*
 502  * Find or create a vnode for the given filehandle, filesystem, parent, and
 503  * name.  The reference to nm is consumed, so the caller must first do an
 504  * fn_hold() if it wants to continue using nm after this call.
 505  */
 506 vnode_t *
 507 makenfs4node(nfs4_sharedfh_t *fh, nfs4_ga_res_t *garp, struct vfs *vfsp,
 508     hrtime_t t, cred_t *cr, vnode_t *dvp, nfs4_fname_t *nm)
 509 {
 510         vnode_t *vp;
 511         int newnode;
 512         int index;
 513         mntinfo4_t *mi = VFTOMI4(vfsp);
 514         int had_badfh = 0;
 515         rnode4_t *rp;
 516 
 517         ASSERT(dvp != NULL);
 518 
 519         fh = badrootfh_check(fh, nm, mi, &had_badfh);
 520 
 521         index = rtable4hash(fh);
 522         rw_enter(&rtable4[index].r_lock, RW_READER);
 523 
 524         /*
 525          * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
 526          */
 527         vp = make_rnode4(fh, &rtable4[index], vfsp, nfs4_vnodeops,
 528             nfs4_putapage, &newnode, cr);
 529 
 530         rp = VTOR4(vp);
 531         sv_activate(&vp, dvp, &nm, newnode);
 532         if (dvp->v_flag & V_XATTRDIR) {
 533                 mutex_enter(&rp->r_statelock);
 534                 rp->r_flags |= R4ISXATTR;
 535                 mutex_exit(&rp->r_statelock);
 536         }
 537 
 538         /* if getting a bad file handle, do not cache the attributes. */
 539         if (had_badfh) {
 540                 rw_exit(&rtable4[index].r_lock);
 541                 return (vp);
 542         }
 543 
 544         ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
 545         r4_do_attrcache(vp, garp, newnode, t, cr, index);
 546         ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
 547 
 548         return (vp);
 549 }
 550 
 551 /*
 552  * Hash on address of filehandle object.
 553  * XXX totally untuned.
 554  */
 555 
 556 int
 557 rtable4hash(nfs4_sharedfh_t *fh)
 558 {
 559         return (((uintptr_t)fh / sizeof (*fh)) & rtable4mask);
 560 }
 561 
 562 /*
 563  * Find or create the vnode for the given filehandle and filesystem.
 564  * *newnode is set to zero if the vnode already existed; non-zero if it had
 565  * to be created.
 566  *
 567  * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
 568  */
 569 
 570 static vnode_t *
 571 make_rnode4(nfs4_sharedfh_t *fh, r4hashq_t *rhtp, struct vfs *vfsp,
 572     struct vnodeops *vops,
 573     int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *),
 574     int *newnode, cred_t *cr)
 575 {
 576         rnode4_t *rp;
 577         rnode4_t *trp;
 578         vnode_t *vp;
 579         mntinfo4_t *mi;
 580 
 581         ASSERT(RW_READ_HELD(&rhtp->r_lock));
 582 
 583         mi = VFTOMI4(vfsp);
 584 
 585 start:
 586         if ((rp = r4find(rhtp, fh, vfsp)) != NULL) {
 587                 vp = RTOV4(rp);
 588                 *newnode = 0;
 589                 return (vp);
 590         }
 591         rw_exit(&rhtp->r_lock);
 592 
 593         mutex_enter(&rp4freelist_lock);
 594 
 595         if (rp4freelist != NULL && rnode4_new >= nrnode) {
 596                 rp = rp4freelist;
 597                 rp4_rmfree(rp);
 598                 mutex_exit(&rp4freelist_lock);
 599 
 600                 vp = RTOV4(rp);
 601 
 602                 if (rp->r_flags & R4HASHED) {
 603                         rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
 604                         mutex_enter(&vp->v_lock);
 605                         if (vp->v_count > 1) {
 606                                 vp->v_count--;
 607                                 mutex_exit(&vp->v_lock);
 608                                 rw_exit(&rp->r_hashq->r_lock);
 609                                 rw_enter(&rhtp->r_lock, RW_READER);
 610                                 goto start;
 611                         }
 612                         mutex_exit(&vp->v_lock);
 613                         rp4_rmhash_locked(rp);
 614                         rw_exit(&rp->r_hashq->r_lock);
 615                 }
 616 
 617                 r4inactive(rp, cr);
 618 
 619                 mutex_enter(&vp->v_lock);
 620                 if (vp->v_count > 1) {
 621                         vp->v_count--;
 622                         mutex_exit(&vp->v_lock);
 623                         rw_enter(&rhtp->r_lock, RW_READER);
 624                         goto start;
 625                 }
 626                 mutex_exit(&vp->v_lock);
 627                 vn_invalid(vp);
 628 
 629                 /*
 630                  * destroy old locks before bzero'ing and
 631                  * recreating the locks below.
 632                  */
 633                 uninit_rnode4(rp);
 634 
 635                 /*
 636                  * Make sure that if rnode is recycled then
 637                  * VFS count is decremented properly before
 638                  * reuse.
 639                  */
 640                 VFS_RELE(vp->v_vfsp);
 641                 vn_reinit(vp);
 642         } else {
 643                 vnode_t *new_vp;
 644 
 645                 mutex_exit(&rp4freelist_lock);
 646 
 647                 rp = kmem_cache_alloc(rnode4_cache, KM_SLEEP);
 648                 new_vp = vn_alloc(KM_SLEEP);
 649 
 650                 atomic_add_long((ulong_t *)&rnode4_new, 1);
 651 #ifdef DEBUG
 652                 clstat4_debug.nrnode.value.ui64++;
 653 #endif
 654                 vp = new_vp;
 655         }
 656 
 657         bzero(rp, sizeof (*rp));
 658         rp->r_vnode = vp;
 659         nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL);
 660         nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL);
 661         mutex_init(&rp->r_svlock, NULL, MUTEX_DEFAULT, NULL);
 662         mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL);
 663         mutex_init(&rp->r_statev4_lock, NULL, MUTEX_DEFAULT, NULL);
 664         mutex_init(&rp->r_os_lock, NULL, MUTEX_DEFAULT, NULL);
 665         rp->created_v4 = 0;
 666         list_create(&rp->r_open_streams, sizeof (nfs4_open_stream_t),
 667             offsetof(nfs4_open_stream_t, os_node));
 668         rp->r_lo_head.lo_prev_rnode = &rp->r_lo_head;
 669         rp->r_lo_head.lo_next_rnode = &rp->r_lo_head;
 670         cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL);
 671         cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL);
 672         rp->r_flags = R4READDIRWATTR;
 673         rp->r_fh = fh;
 674         rp->r_hashq = rhtp;
 675         sfh4_hold(rp->r_fh);
 676         rp->r_server = mi->mi_curr_serv;
 677         rp->r_deleg_type = OPEN_DELEGATE_NONE;
 678         rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
 679         nfs_rw_init(&rp->r_deleg_recall_lock, NULL, RW_DEFAULT, NULL);
 680 
 681         rddir4_cache_create(rp);
 682         rp->r_putapage = putapage;
 683         vn_setops(vp, vops);
 684         vp->v_data = (caddr_t)rp;
 685         vp->v_vfsp = vfsp;
 686         VFS_HOLD(vfsp);
 687         vp->v_type = VNON;
 688         vp->v_flag |= VMODSORT;
 689         if (isrootfh(fh, rp))
 690                 vp->v_flag = VROOT;
 691         vn_exists(vp);
 692 
 693         /*
 694          * There is a race condition if someone else
 695          * alloc's the rnode while no locks are held, so we
 696          * check again and recover if found.
 697          */
 698         rw_enter(&rhtp->r_lock, RW_WRITER);
 699         if ((trp = r4find(rhtp, fh, vfsp)) != NULL) {
 700                 vp = RTOV4(trp);
 701                 *newnode = 0;
 702                 rw_exit(&rhtp->r_lock);
 703                 rp4_addfree(rp, cr);
 704                 rw_enter(&rhtp->r_lock, RW_READER);
 705                 return (vp);
 706         }
 707         rp4_addhash(rp);
 708         *newnode = 1;
 709         return (vp);
 710 }
 711 
 712 static void
 713 uninit_rnode4(rnode4_t *rp)
 714 {
 715         vnode_t *vp = RTOV4(rp);
 716 
 717         ASSERT(rp != NULL);
 718         ASSERT(vp != NULL);
 719         ASSERT(vp->v_count == 1);
 720         ASSERT(rp->r_count == 0);
 721         ASSERT(rp->r_mapcnt == 0);
 722         if (rp->r_flags & R4LODANGLERS) {
 723                 nfs4_flush_lock_owners(rp);
 724         }
 725         ASSERT(rp->r_lo_head.lo_next_rnode == &rp->r_lo_head);
 726         ASSERT(rp->r_lo_head.lo_prev_rnode == &rp->r_lo_head);
 727         ASSERT(!(rp->r_flags & R4HASHED));
 728         ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
 729         nfs4_clear_open_streams(rp);
 730         list_destroy(&rp->r_open_streams);
 731 
 732         /*
 733          * Destroy the rddir cache first since we need to grab the r_statelock.
 734          */
 735         mutex_enter(&rp->r_statelock);
 736         rddir4_cache_destroy(rp);
 737         mutex_exit(&rp->r_statelock);
 738         sv_uninit(&rp->r_svnode);
 739         sfh4_rele(&rp->r_fh);
 740         nfs_rw_destroy(&rp->r_rwlock);
 741         nfs_rw_destroy(&rp->r_lkserlock);
 742         mutex_destroy(&rp->r_statelock);
 743         mutex_destroy(&rp->r_statev4_lock);
 744         mutex_destroy(&rp->r_os_lock);
 745         cv_destroy(&rp->r_cv);
 746         cv_destroy(&rp->r_commit.c_cv);
 747         nfs_rw_destroy(&rp->r_deleg_recall_lock);
 748         if (rp->r_flags & R4DELMAPLIST)
 749                 list_destroy(&rp->r_indelmap);
 750 }
 751 
 752 /*
 753  * Put an rnode on the free list.
 754  *
 755  * Rnodes which were allocated above and beyond the normal limit
 756  * are immediately freed.
 757  */
 758 void
 759 rp4_addfree(rnode4_t *rp, cred_t *cr)
 760 {
 761         vnode_t *vp;
 762         vnode_t *xattr;
 763         struct vfs *vfsp;
 764 
 765         vp = RTOV4(rp);
 766         ASSERT(vp->v_count >= 1);
 767         ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
 768 
 769         /*
 770          * If we have too many rnodes allocated and there are no
 771          * references to this rnode, or if the rnode is no longer
 772          * accessible by it does not reside in the hash queues,
 773          * or if an i/o error occurred while writing to the file,
 774          * then just free it instead of putting it on the rnode
 775          * freelist.
 776          */
 777         vfsp = vp->v_vfsp;
 778         if (((rnode4_new > nrnode || !(rp->r_flags & R4HASHED) ||
 779 #ifdef DEBUG
 780             (nfs4_rnode_nofreelist != 0) ||
 781 #endif
 782             rp->r_error || (rp->r_flags & R4RECOVERR) ||
 783             (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) {
 784                 if (rp->r_flags & R4HASHED) {
 785                         rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
 786                         mutex_enter(&vp->v_lock);
 787                         if (vp->v_count > 1) {
 788                                 vp->v_count--;
 789                                 mutex_exit(&vp->v_lock);
 790                                 rw_exit(&rp->r_hashq->r_lock);
 791                                 return;
 792                         }
 793                         mutex_exit(&vp->v_lock);
 794                         rp4_rmhash_locked(rp);
 795                         rw_exit(&rp->r_hashq->r_lock);
 796                 }
 797 
 798                 /*
 799                  * Make sure we don't have a delegation on this rnode
 800                  * before destroying it.
 801                  */
 802                 if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
 803                         (void) nfs4delegreturn(rp,
 804                             NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
 805                 }
 806 
 807                 r4inactive(rp, cr);
 808 
 809                 /*
 810                  * Recheck the vnode reference count.  We need to
 811                  * make sure that another reference has not been
 812                  * acquired while we were not holding v_lock.  The
 813                  * rnode is not in the rnode hash queues; one
 814                  * way for a reference to have been acquired
 815                  * is for a VOP_PUTPAGE because the rnode was marked
 816                  * with R4DIRTY or for a modified page.  This
 817                  * reference may have been acquired before our call
 818                  * to r4inactive.  The i/o may have been completed,
 819                  * thus allowing r4inactive to complete, but the
 820                  * reference to the vnode may not have been released
 821                  * yet.  In any case, the rnode can not be destroyed
 822                  * until the other references to this vnode have been
 823                  * released.  The other references will take care of
 824                  * either destroying the rnode or placing it on the
 825                  * rnode freelist.  If there are no other references,
 826                  * then the rnode may be safely destroyed.
 827                  */
 828                 mutex_enter(&vp->v_lock);
 829                 if (vp->v_count > 1) {
 830                         vp->v_count--;
 831                         mutex_exit(&vp->v_lock);
 832                         return;
 833                 }
 834                 mutex_exit(&vp->v_lock);
 835 
 836                 destroy_rnode4(rp);
 837                 return;
 838         }
 839 
 840         /*
 841          * Lock the hash queue and then recheck the reference count
 842          * to ensure that no other threads have acquired a reference
 843          * to indicate that the rnode should not be placed on the
 844          * freelist.  If another reference has been acquired, then
 845          * just release this one and let the other thread complete
 846          * the processing of adding this rnode to the freelist.
 847          */
 848 again:
 849         rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
 850 
 851         mutex_enter(&vp->v_lock);
 852         if (vp->v_count > 1) {
 853                 vp->v_count--;
 854                 mutex_exit(&vp->v_lock);
 855                 rw_exit(&rp->r_hashq->r_lock);
 856                 return;
 857         }
 858         mutex_exit(&vp->v_lock);
 859 
 860         /*
 861          * Make sure we don't put an rnode with a delegation
 862          * on the free list.
 863          */
 864         if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
 865                 rw_exit(&rp->r_hashq->r_lock);
 866                 (void) nfs4delegreturn(rp,
 867                     NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
 868                 goto again;
 869         }
 870 
 871         /*
 872          * Now that we have the hash queue lock, and we know there
 873          * are not anymore references on the vnode, check to make
 874          * sure there aren't any open streams still on the rnode.
 875          * If so, drop the hash queue lock, remove the open streams,
 876          * and recheck the v_count.
 877          */
 878         mutex_enter(&rp->r_os_lock);
 879         if (list_head(&rp->r_open_streams) != NULL) {
 880                 mutex_exit(&rp->r_os_lock);
 881                 rw_exit(&rp->r_hashq->r_lock);
 882                 if (nfs_zone() != VTOMI4(vp)->mi_zone)
 883                         nfs4_clear_open_streams(rp);
 884                 else
 885                         (void) nfs4close_all(vp, cr);
 886                 goto again;
 887         }
 888         mutex_exit(&rp->r_os_lock);
 889 
 890         /*
 891          * Before we put it on the freelist, make sure there are no pages.
 892          * If there are, flush and commit of all of the dirty and
 893          * uncommitted pages, assuming the file system isn't read only.
 894          */
 895         if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY) && nfs4_dross_pages(vp)) {
 896                 rw_exit(&rp->r_hashq->r_lock);
 897                 r4flushpages(rp, cr);
 898                 goto again;
 899         }
 900 
 901         /*
 902          * Before we put it on the freelist, make sure there is no
 903          * active xattr directory cached, the freelist will not
 904          * have its entries r4inactive'd if there is still an active
 905          * rnode, thus nothing in the freelist can hold another
 906          * rnode active.
 907          */
 908         xattr = rp->r_xattr_dir;
 909         rp->r_xattr_dir = NULL;
 910 
 911         /*
 912          * If there is no cached data or metadata for this file, then
 913          * put the rnode on the front of the freelist so that it will
 914          * be reused before other rnodes which may have cached data or
 915          * metadata associated with them.
 916          */
 917         mutex_enter(&rp4freelist_lock);
 918         if (rp4freelist == NULL) {
 919                 rp->r_freef = rp;
 920                 rp->r_freeb = rp;
 921                 rp4freelist = rp;
 922         } else {
 923                 rp->r_freef = rp4freelist;
 924                 rp->r_freeb = rp4freelist->r_freeb;
 925                 rp4freelist->r_freeb->r_freef = rp;
 926                 rp4freelist->r_freeb = rp;
 927                 if (!nfs4_has_pages(vp) && rp->r_dir == NULL &&
 928                     rp->r_symlink.contents == NULL && rp->r_secattr == NULL)
 929                         rp4freelist = rp;
 930         }
 931         mutex_exit(&rp4freelist_lock);
 932 
 933         rw_exit(&rp->r_hashq->r_lock);
 934 
 935         if (xattr)
 936                 VN_RELE(xattr);
 937 }
 938 
 939 /*
 940  * Remove an rnode from the free list.
 941  *
 942  * The caller must be holding rp4freelist_lock and the rnode
 943  * must be on the freelist.
 944  */
 945 static void
 946 rp4_rmfree(rnode4_t *rp)
 947 {
 948 
 949         ASSERT(MUTEX_HELD(&rp4freelist_lock));
 950         ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL);
 951 
 952         if (rp == rp4freelist) {
 953                 rp4freelist = rp->r_freef;
 954                 if (rp == rp4freelist)
 955                         rp4freelist = NULL;
 956         }
 957         rp->r_freeb->r_freef = rp->r_freef;
 958         rp->r_freef->r_freeb = rp->r_freeb;
 959 
 960         rp->r_freef = rp->r_freeb = NULL;
 961 }
 962 
 963 /*
 964  * Put a rnode in the hash table.
 965  *
 966  * The caller must be holding the exclusive hash queue lock
 967  */
 968 void
 969 rp4_addhash(rnode4_t *rp)
 970 {
 971         ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
 972         ASSERT(!(rp->r_flags & R4HASHED));
 973 
 974 #ifdef DEBUG
 975         r4_dup_check(rp, RTOV4(rp)->v_vfsp);
 976 #endif
 977 
 978         rp->r_hashf = rp->r_hashq->r_hashf;
 979         rp->r_hashq->r_hashf = rp;
 980         rp->r_hashb = (rnode4_t *)rp->r_hashq;
 981         rp->r_hashf->r_hashb = rp;
 982 
 983         mutex_enter(&rp->r_statelock);
 984         rp->r_flags |= R4HASHED;
 985         mutex_exit(&rp->r_statelock);
 986 }
 987 
 988 /*
 989  * Remove a rnode from the hash table.
 990  *
 991  * The caller must be holding the hash queue lock.
 992  */
 993 void
 994 rp4_rmhash_locked(rnode4_t *rp)
 995 {
 996         ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
 997         ASSERT(rp->r_flags & R4HASHED);
 998 
 999         rp->r_hashb->r_hashf = rp->r_hashf;
1000         rp->r_hashf->r_hashb = rp->r_hashb;
1001 
1002         mutex_enter(&rp->r_statelock);
1003         rp->r_flags &= ~R4HASHED;
1004         mutex_exit(&rp->r_statelock);
1005 }
1006 
1007 /*
1008  * Remove a rnode from the hash table.
1009  *
1010  * The caller must not be holding the hash queue lock.
1011  */
1012 void
1013 rp4_rmhash(rnode4_t *rp)
1014 {
1015         rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
1016         rp4_rmhash_locked(rp);
1017         rw_exit(&rp->r_hashq->r_lock);
1018 }
1019 
1020 /*
1021  * Lookup a rnode by fhandle.  Ignores rnodes that had failed recovery.
1022  * Returns NULL if no match.  If an rnode is returned, the reference count
1023  * on the master vnode is incremented.
1024  *
1025  * The caller must be holding the hash queue lock, either shared or exclusive.
1026  */
1027 rnode4_t *
1028 r4find(r4hashq_t *rhtp, nfs4_sharedfh_t *fh, struct vfs *vfsp)
1029 {
1030         rnode4_t *rp;
1031         vnode_t *vp;
1032 
1033         ASSERT(RW_LOCK_HELD(&rhtp->r_lock));
1034 
1035         for (rp = rhtp->r_hashf; rp != (rnode4_t *)rhtp; rp = rp->r_hashf) {
1036                 vp = RTOV4(rp);
1037                 if (vp->v_vfsp == vfsp && SFH4_SAME(rp->r_fh, fh)) {
1038 
1039                         mutex_enter(&rp->r_statelock);
1040                         if (rp->r_flags & R4RECOVERR) {
1041                                 mutex_exit(&rp->r_statelock);
1042                                 continue;
1043                         }
1044                         mutex_exit(&rp->r_statelock);
1045 #ifdef DEBUG
1046                         r4_dup_check(rp, vfsp);
1047 #endif
1048                         if (rp->r_freef != NULL) {
1049                                 mutex_enter(&rp4freelist_lock);
1050                                 /*
1051                                  * If the rnode is on the freelist,
1052                                  * then remove it and use that reference
1053                                  * as the new reference.  Otherwise,
1054                                  * need to increment the reference count.
1055                                  */
1056                                 if (rp->r_freef != NULL) {
1057                                         rp4_rmfree(rp);
1058                                         mutex_exit(&rp4freelist_lock);
1059                                 } else {
1060                                         mutex_exit(&rp4freelist_lock);
1061                                         VN_HOLD(vp);
1062                                 }
1063                         } else
1064                                 VN_HOLD(vp);
1065 
1066                         /*
1067                          * if root vnode, set v_flag to indicate that
1068                          */
1069                         if (isrootfh(fh, rp)) {
1070                                 if (!(vp->v_flag & VROOT)) {
1071                                         mutex_enter(&vp->v_lock);
1072                                         vp->v_flag |= VROOT;
1073                                         mutex_exit(&vp->v_lock);
1074                                 }
1075                         }
1076                         return (rp);
1077                 }
1078         }
1079         return (NULL);
1080 }
1081 
1082 /*
1083  * Lookup an rnode by fhandle. Just a wrapper for r4find()
1084  * that assumes the caller hasn't already got the lock
1085  * on the hash bucket.
1086  */
1087 rnode4_t *
1088 r4find_unlocked(nfs4_sharedfh_t *fh, struct vfs *vfsp)
1089 {
1090         rnode4_t *rp;
1091         int index;
1092 
1093         index = rtable4hash(fh);
1094         rw_enter(&rtable4[index].r_lock, RW_READER);
1095         rp = r4find(&rtable4[index], fh, vfsp);
1096         rw_exit(&rtable4[index].r_lock);
1097 
1098         return (rp);
1099 }
1100 
1101 /*
1102  * Return >0 if there is a active vnode belonging to this vfs in the
1103  * rtable4 cache.
1104  *
1105  * Several of these checks are done without holding the usual
1106  * locks.  This is safe because destroy_rtable(), rp_addfree(),
1107  * etc. will redo the necessary checks before actually destroying
1108  * any rnodes.
1109  */
1110 int
1111 check_rtable4(struct vfs *vfsp)
1112 {
1113         rnode4_t *rp;
1114         vnode_t *vp;
1115         int busy = NFSV4_RTABLE4_OK;
1116         int index;
1117 
1118         for (index = 0; index < rtable4size; index++) {
1119                 rw_enter(&rtable4[index].r_lock, RW_READER);
1120 
1121                 for (rp = rtable4[index].r_hashf;
1122                     rp != (rnode4_t *)(&rtable4[index]);
1123                     rp = rp->r_hashf) {
1124 
1125                         vp = RTOV4(rp);
1126                         if (vp->v_vfsp == vfsp) {
1127                                 if (rp->r_freef == NULL) {
1128                                         busy = NFSV4_RTABLE4_NOT_FREE_LIST;
1129                                 } else if (nfs4_has_pages(vp) &&
1130                                     (rp->r_flags & R4DIRTY)) {
1131                                         busy = NFSV4_RTABLE4_DIRTY_PAGES;
1132                                 } else if (rp->r_count > 0) {
1133                                         busy = NFSV4_RTABLE4_POS_R_COUNT;
1134                                 }
1135 
1136                                 if (busy != NFSV4_RTABLE4_OK) {
1137 #ifdef DEBUG
1138                                         char *path;
1139 
1140                                         path = fn_path(rp->r_svnode.sv_name);
1141                                         DTRACE_NFSV4_3(rnode__e__debug,
1142                                             int, busy, char *, path,
1143                                             rnode4_t *, rp);
1144                                         kmem_free(path, strlen(path)+1);
1145 #endif
1146                                         rw_exit(&rtable4[index].r_lock);
1147                                         return (busy);
1148                                 }
1149                         }
1150                 }
1151                 rw_exit(&rtable4[index].r_lock);
1152         }
1153         return (busy);
1154 }
1155 
1156 /*
1157  * Destroy inactive vnodes from the hash queues which
1158  * belong to this vfs. All of the vnodes should be inactive.
1159  * It is essential that we destroy all rnodes in case of
1160  * forced unmount as well as in normal unmount case.
1161  */
1162 
1163 void
1164 destroy_rtable4(struct vfs *vfsp, cred_t *cr)
1165 {
1166         int index;
1167         vnode_t *vp;
1168         rnode4_t *rp, *r_hashf, *rlist;
1169 
1170         rlist = NULL;
1171 
1172         for (index = 0; index < rtable4size; index++) {
1173                 rw_enter(&rtable4[index].r_lock, RW_WRITER);
1174                 for (rp = rtable4[index].r_hashf;
1175                     rp != (rnode4_t *)(&rtable4[index]);
1176                     rp = r_hashf) {
1177                         /* save the hash pointer before destroying */
1178                         r_hashf = rp->r_hashf;
1179 
1180                         vp = RTOV4(rp);
1181                         if (vp->v_vfsp == vfsp) {
1182                                 mutex_enter(&rp4freelist_lock);
1183                                 if (rp->r_freef != NULL) {
1184                                         rp4_rmfree(rp);
1185                                         mutex_exit(&rp4freelist_lock);
1186                                         rp4_rmhash_locked(rp);
1187                                         rp->r_hashf = rlist;
1188                                         rlist = rp;
1189                                 } else
1190                                         mutex_exit(&rp4freelist_lock);
1191                         }
1192                 }
1193                 rw_exit(&rtable4[index].r_lock);
1194         }
1195 
1196         for (rp = rlist; rp != NULL; rp = r_hashf) {
1197                 r_hashf = rp->r_hashf;
1198                 /*
1199                  * This call to rp4_addfree will end up destroying the
1200                  * rnode, but in a safe way with the appropriate set
1201                  * of checks done.
1202                  */
1203                 rp4_addfree(rp, cr);
1204         }
1205 }
1206 
1207 /*
1208  * This routine destroys all the resources of an rnode
1209  * and finally the rnode itself.
1210  */
1211 static void
1212 destroy_rnode4(rnode4_t *rp)
1213 {
1214         vnode_t *vp;
1215         vfs_t *vfsp;
1216 
1217         ASSERT(rp->r_deleg_type == OPEN_DELEGATE_NONE);
1218 
1219         vp = RTOV4(rp);
1220         vfsp = vp->v_vfsp;
1221 
1222         uninit_rnode4(rp);
1223         atomic_add_long((ulong_t *)&rnode4_new, -1);
1224 #ifdef DEBUG
1225         clstat4_debug.nrnode.value.ui64--;
1226 #endif
1227         kmem_cache_free(rnode4_cache, rp);
1228         vn_invalid(vp);
1229         vn_free(vp);
1230         VFS_RELE(vfsp);
1231 }
1232 
1233 /*
1234  * Invalidate the attributes on all rnodes forcing the next getattr
1235  * to go over the wire.  Used to flush stale uid and gid mappings.
1236  * Maybe done on a per vfsp, or all rnodes (vfsp == NULL)
1237  */
1238 void
1239 nfs4_rnode_invalidate(struct vfs *vfsp)
1240 {
1241         int index;
1242         rnode4_t *rp;
1243         vnode_t *vp;
1244 
1245         /*
1246          * Walk the hash queues looking for rnodes.
1247          */
1248         for (index = 0; index < rtable4size; index++) {
1249                 rw_enter(&rtable4[index].r_lock, RW_READER);
1250                 for (rp = rtable4[index].r_hashf;
1251                     rp != (rnode4_t *)(&rtable4[index]);
1252                     rp = rp->r_hashf) {
1253                         vp = RTOV4(rp);
1254                         if (vfsp != NULL && vp->v_vfsp != vfsp)
1255                                 continue;
1256 
1257                         if (!mutex_tryenter(&rp->r_statelock))
1258                                 continue;
1259 
1260                         /*
1261                          * Expire the attributes by resetting the change
1262                          * and attr timeout.
1263                          */
1264                         rp->r_change = 0;
1265                         PURGE_ATTRCACHE4_LOCKED(rp);
1266                         mutex_exit(&rp->r_statelock);
1267                 }
1268                 rw_exit(&rtable4[index].r_lock);
1269         }
1270 }
1271 
1272 /*
1273  * Flush all vnodes in this (or every) vfs.
1274  * Used by nfs_sync and by nfs_unmount.
1275  */
1276 void
1277 r4flush(struct vfs *vfsp, cred_t *cr)
1278 {
1279         int index;
1280         rnode4_t *rp;
1281         vnode_t *vp, **vplist;
1282         long num, cnt;
1283 
1284         /*
1285          * Check to see whether there is anything to do.
1286          */
1287         num = rnode4_new;
1288         if (num == 0)
1289                 return;
1290 
1291         /*
1292          * Allocate a slot for all currently active rnodes on the
1293          * supposition that they all may need flushing.
1294          */
1295         vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1296         cnt = 0;
1297 
1298         /*
1299          * Walk the hash queues looking for rnodes with page
1300          * lists associated with them.  Make a list of these
1301          * files.
1302          */
1303         for (index = 0; index < rtable4size; index++) {
1304                 rw_enter(&rtable4[index].r_lock, RW_READER);
1305                 for (rp = rtable4[index].r_hashf;
1306                     rp != (rnode4_t *)(&rtable4[index]);
1307                     rp = rp->r_hashf) {
1308                         vp = RTOV4(rp);
1309                         /*
1310                          * Don't bother sync'ing a vp if it
1311                          * is part of virtual swap device or
1312                          * if VFS is read-only
1313                          */
1314                         if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1315                                 continue;
1316                         /*
1317                          * If flushing all mounted file systems or
1318                          * the vnode belongs to this vfs, has pages
1319                          * and is marked as either dirty or mmap'd,
1320                          * hold and add this vnode to the list of
1321                          * vnodes to flush.
1322                          */
1323                         if ((vfsp == NULL || vp->v_vfsp == vfsp) &&
1324                             nfs4_has_pages(vp) &&
1325                             ((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) {
1326                                 VN_HOLD(vp);
1327                                 vplist[cnt++] = vp;
1328                                 if (cnt == num) {
1329                                         rw_exit(&rtable4[index].r_lock);
1330                                         goto toomany;
1331                                 }
1332                         }
1333                 }
1334                 rw_exit(&rtable4[index].r_lock);
1335         }
1336 toomany:
1337 
1338         /*
1339          * Flush and release all of the files on the list.
1340          */
1341         while (cnt-- > 0) {
1342                 vp = vplist[cnt];
1343                 (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
1344                 VN_RELE(vp);
1345         }
1346 
1347         /*
1348          * Free the space allocated to hold the list.
1349          */
1350         kmem_free(vplist, num * sizeof (*vplist));
1351 }
1352 
1353 int
1354 nfs4_free_data_reclaim(rnode4_t *rp)
1355 {
1356         char *contents;
1357         vnode_t *xattr;
1358         int size;
1359         vsecattr_t *vsp;
1360         int freed;
1361         bool_t rdc = FALSE;
1362 
1363         /*
1364          * Free any held caches which may
1365          * be associated with this rnode.
1366          */
1367         mutex_enter(&rp->r_statelock);
1368         if (rp->r_dir != NULL)
1369                 rdc = TRUE;
1370         contents = rp->r_symlink.contents;
1371         size = rp->r_symlink.size;
1372         rp->r_symlink.contents = NULL;
1373         vsp = rp->r_secattr;
1374         rp->r_secattr = NULL;
1375         xattr = rp->r_xattr_dir;
1376         rp->r_xattr_dir = NULL;
1377         mutex_exit(&rp->r_statelock);
1378 
1379         /*
1380          * Free the access cache entries.
1381          */
1382         freed = nfs4_access_purge_rp(rp);
1383 
1384         if (rdc == FALSE && contents == NULL && vsp == NULL && xattr == NULL)
1385                 return (freed);
1386 
1387         /*
1388          * Free the readdir cache entries, incompletely if we can't block.
1389          */
1390         nfs4_purge_rddir_cache(RTOV4(rp));
1391 
1392         /*
1393          * Free the symbolic link cache.
1394          */
1395         if (contents != NULL) {
1396 
1397                 kmem_free((void *)contents, size);
1398         }
1399 
1400         /*
1401          * Free any cached ACL.
1402          */
1403         if (vsp != NULL)
1404                 nfs4_acl_free_cache(vsp);
1405 
1406         /*
1407          * Release the xattr directory vnode
1408          */
1409         if (xattr != NULL)
1410                 VN_RELE(xattr);
1411 
1412         return (1);
1413 }
1414 
1415 static int
1416 nfs4_active_data_reclaim(rnode4_t *rp)
1417 {
1418         char *contents;
1419         vnode_t *xattr = NULL;
1420         int size;
1421         vsecattr_t *vsp;
1422         int freed;
1423         bool_t rdc = FALSE;
1424 
1425         /*
1426          * Free any held credentials and caches which
1427          * may be associated with this rnode.
1428          */
1429         if (!mutex_tryenter(&rp->r_statelock))
1430                 return (0);
1431         contents = rp->r_symlink.contents;
1432         size = rp->r_symlink.size;
1433         rp->r_symlink.contents = NULL;
1434         vsp = rp->r_secattr;
1435         rp->r_secattr = NULL;
1436         if (rp->r_dir != NULL)
1437                 rdc = TRUE;
1438         /*
1439          * To avoid a deadlock, do not free r_xattr_dir cache if it is hashed
1440          * on the same r_hashq queue. We are not mandated to free all caches.
1441          * VN_RELE(rp->r_xattr_dir) will be done sometime later - e.g. when the
1442          * rnode 'rp' is freed or put on the free list.
1443          *
1444          * We will retain NFS4_XATTR_DIR_NOTSUPP because:
1445          * - it has no associated rnode4_t (its v_data is NULL),
1446          * - it is preallocated statically and will never go away,
1447          * so we cannot save anything by releasing it.
1448          */
1449         if (rp->r_xattr_dir && rp->r_xattr_dir != NFS4_XATTR_DIR_NOTSUPP &&
1450             VTOR4(rp->r_xattr_dir)->r_hashq != rp->r_hashq) {
1451                 xattr = rp->r_xattr_dir;
1452                 rp->r_xattr_dir = NULL;
1453         }
1454         mutex_exit(&rp->r_statelock);
1455 
1456         /*
1457          * Free the access cache entries.
1458          */
1459         freed = nfs4_access_purge_rp(rp);
1460 
1461         if (contents == NULL && vsp == NULL && rdc == FALSE && xattr == NULL)
1462                 return (freed);
1463 
1464         /*
1465          * Free the symbolic link cache.
1466          */
1467         if (contents != NULL) {
1468 
1469                 kmem_free((void *)contents, size);
1470         }
1471 
1472         /*
1473          * Free any cached ACL.
1474          */
1475         if (vsp != NULL)
1476                 nfs4_acl_free_cache(vsp);
1477 
1478         nfs4_purge_rddir_cache(RTOV4(rp));
1479 
1480         /*
1481          * Release the xattr directory vnode
1482          */
1483         if (xattr != NULL)
1484                 VN_RELE(xattr);
1485 
1486         return (1);
1487 }
1488 
1489 static int
1490 nfs4_free_reclaim(void)
1491 {
1492         int freed;
1493         rnode4_t *rp;
1494 
1495 #ifdef DEBUG
1496         clstat4_debug.f_reclaim.value.ui64++;
1497 #endif
1498         freed = 0;
1499         mutex_enter(&rp4freelist_lock);
1500         rp = rp4freelist;
1501         if (rp != NULL) {
1502                 do {
1503                         if (nfs4_free_data_reclaim(rp))
1504                                 freed = 1;
1505                 } while ((rp = rp->r_freef) != rp4freelist);
1506         }
1507         mutex_exit(&rp4freelist_lock);
1508         return (freed);
1509 }
1510 
1511 static int
1512 nfs4_active_reclaim(void)
1513 {
1514         int freed;
1515         int index;
1516         rnode4_t *rp;
1517 
1518 #ifdef DEBUG
1519         clstat4_debug.a_reclaim.value.ui64++;
1520 #endif
1521         freed = 0;
1522         for (index = 0; index < rtable4size; index++) {
1523                 rw_enter(&rtable4[index].r_lock, RW_READER);
1524                 for (rp = rtable4[index].r_hashf;
1525                     rp != (rnode4_t *)(&rtable4[index]);
1526                     rp = rp->r_hashf) {
1527                         if (nfs4_active_data_reclaim(rp))
1528                                 freed = 1;
1529                 }
1530                 rw_exit(&rtable4[index].r_lock);
1531         }
1532         return (freed);
1533 }
1534 
1535 static int
1536 nfs4_rnode_reclaim(void)
1537 {
1538         int freed;
1539         rnode4_t *rp;
1540         vnode_t *vp;
1541 
1542 #ifdef DEBUG
1543         clstat4_debug.r_reclaim.value.ui64++;
1544 #endif
1545         freed = 0;
1546         mutex_enter(&rp4freelist_lock);
1547         while ((rp = rp4freelist) != NULL) {
1548                 rp4_rmfree(rp);
1549                 mutex_exit(&rp4freelist_lock);
1550                 if (rp->r_flags & R4HASHED) {
1551                         vp = RTOV4(rp);
1552                         rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
1553                         mutex_enter(&vp->v_lock);
1554                         if (vp->v_count > 1) {
1555                                 vp->v_count--;
1556                                 mutex_exit(&vp->v_lock);
1557                                 rw_exit(&rp->r_hashq->r_lock);
1558                                 mutex_enter(&rp4freelist_lock);
1559                                 continue;
1560                         }
1561                         mutex_exit(&vp->v_lock);
1562                         rp4_rmhash_locked(rp);
1563                         rw_exit(&rp->r_hashq->r_lock);
1564                 }
1565                 /*
1566                  * This call to rp_addfree will end up destroying the
1567                  * rnode, but in a safe way with the appropriate set
1568                  * of checks done.
1569                  */
1570                 rp4_addfree(rp, CRED());
1571                 mutex_enter(&rp4freelist_lock);
1572         }
1573         mutex_exit(&rp4freelist_lock);
1574         return (freed);
1575 }
1576 
1577 /*ARGSUSED*/
1578 static void
1579 nfs4_reclaim(void *cdrarg)
1580 {
1581 #ifdef DEBUG
1582         clstat4_debug.reclaim.value.ui64++;
1583 #endif
1584         if (nfs4_free_reclaim())
1585                 return;
1586 
1587         if (nfs4_active_reclaim())
1588                 return;
1589 
1590         (void) nfs4_rnode_reclaim();
1591 }
1592 
1593 /*
1594  * Returns the clientid4 to use for the given mntinfo4.  Note that the
1595  * clientid can change if the caller drops mi_recovlock.
1596  */
1597 
1598 clientid4
1599 mi2clientid(mntinfo4_t *mi)
1600 {
1601         nfs4_server_t   *sp;
1602         clientid4       clientid = 0;
1603 
1604         /* this locks down sp if it is found */
1605         sp = find_nfs4_server(mi);
1606         if (sp != NULL) {
1607                 clientid = sp->clientid;
1608                 mutex_exit(&sp->s_lock);
1609                 nfs4_server_rele(sp);
1610         }
1611         return (clientid);
1612 }
1613 
1614 /*
1615  * Return the current lease time for the server associated with the given
1616  * file.  Note that the lease time could change immediately after this
1617  * call.
1618  */
1619 
1620 time_t
1621 r2lease_time(rnode4_t *rp)
1622 {
1623         nfs4_server_t   *sp;
1624         time_t          lease_time;
1625         mntinfo4_t      *mi = VTOMI4(RTOV4(rp));
1626 
1627         (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
1628 
1629         /* this locks down sp if it is found */
1630         sp = find_nfs4_server(VTOMI4(RTOV4(rp)));
1631 
1632         if (VTOMI4(RTOV4(rp))->mi_vfsp->vfs_flag & VFS_UNMOUNTED) {
1633                 if (sp != NULL) {
1634                         mutex_exit(&sp->s_lock);
1635                         nfs4_server_rele(sp);
1636                 }
1637                 nfs_rw_exit(&mi->mi_recovlock);
1638                 return (1);             /* 1 second */
1639         }
1640 
1641         ASSERT(sp != NULL);
1642 
1643         lease_time = sp->s_lease_time;
1644 
1645         mutex_exit(&sp->s_lock);
1646         nfs4_server_rele(sp);
1647         nfs_rw_exit(&mi->mi_recovlock);
1648 
1649         return (lease_time);
1650 }
1651 
1652 /*
1653  * Return a list with information about all the known open instances for
1654  * a filesystem. The caller must call r4releopenlist() when done with the
1655  * list.
1656  *
1657  * We are safe at looking at os_valid and os_pending_close across dropping
1658  * the 'os_sync_lock' to count up the number of open streams and then
1659  * allocate memory for the osp list due to:
1660  *      -Looking at os_pending_close is safe since this routine is
1661  *      only called via recovery, and os_pending_close can only be set via
1662  *      a non-recovery operation (which are all blocked when recovery
1663  *      is active).
1664  *
1665  *      -Examining os_valid is safe since non-recovery operations, which
1666  *      could potentially switch os_valid to 0, are blocked (via
1667  *      nfs4_start_fop) and recovery is single-threaded per mntinfo4_t
1668  *      (which means we are the only recovery thread potentially acting
1669  *      on this open stream).
1670  */
1671 
1672 nfs4_opinst_t *
1673 r4mkopenlist(mntinfo4_t *mi)
1674 {
1675         nfs4_opinst_t *reopenlist, *rep;
1676         rnode4_t *rp;
1677         vnode_t *vp;
1678         vfs_t *vfsp = mi->mi_vfsp;
1679         int numosp;
1680         nfs4_open_stream_t *osp;
1681         int index;
1682         open_delegation_type4 dtype;
1683         int hold_vnode;
1684 
1685         reopenlist = NULL;
1686 
1687         for (index = 0; index < rtable4size; index++) {
1688                 rw_enter(&rtable4[index].r_lock, RW_READER);
1689                 for (rp = rtable4[index].r_hashf;
1690                     rp != (rnode4_t *)(&rtable4[index]);
1691                     rp = rp->r_hashf) {
1692 
1693                         vp = RTOV4(rp);
1694                         if (vp->v_vfsp != vfsp)
1695                                 continue;
1696                         hold_vnode = 0;
1697 
1698                         mutex_enter(&rp->r_os_lock);
1699 
1700                         /* Count the number of valid open_streams of the file */
1701                         numosp = 0;
1702                         for (osp = list_head(&rp->r_open_streams); osp != NULL;
1703                             osp = list_next(&rp->r_open_streams, osp)) {
1704                                 mutex_enter(&osp->os_sync_lock);
1705                                 if (osp->os_valid && !osp->os_pending_close)
1706                                         numosp++;
1707                                 mutex_exit(&osp->os_sync_lock);
1708                         }
1709 
1710                         /* Fill in the valid open streams per vp */
1711                         if (numosp > 0) {
1712                                 int j;
1713 
1714                                 hold_vnode = 1;
1715 
1716                                 /*
1717                                  * Add a new open instance to the list
1718                                  */
1719                                 rep = kmem_zalloc(sizeof (*reopenlist),
1720                                     KM_SLEEP);
1721                                 rep->re_next = reopenlist;
1722                                 reopenlist = rep;
1723 
1724                                 rep->re_vp = vp;
1725                                 rep->re_osp = kmem_zalloc(
1726                                     numosp * sizeof (*(rep->re_osp)),
1727                                     KM_SLEEP);
1728                                 rep->re_numosp = numosp;
1729 
1730                                 j = 0;
1731                                 for (osp = list_head(&rp->r_open_streams);
1732                                     osp != NULL;
1733                                     osp = list_next(&rp->r_open_streams, osp)) {
1734 
1735                                         mutex_enter(&osp->os_sync_lock);
1736                                         if (osp->os_valid &&
1737                                             !osp->os_pending_close) {
1738                                                 osp->os_ref_count++;
1739                                                 rep->re_osp[j] = osp;
1740                                                 j++;
1741                                         }
1742                                         mutex_exit(&osp->os_sync_lock);
1743                                 }
1744                                 /*
1745                                  * Assuming valid osp(s) stays valid between
1746                                  * the time obtaining j and numosp.
1747                                  */
1748                                 ASSERT(j == numosp);
1749                         }
1750 
1751                         mutex_exit(&rp->r_os_lock);
1752                         /* do this here to keep v_lock > r_os_lock */
1753                         if (hold_vnode)
1754                                 VN_HOLD(vp);
1755                         mutex_enter(&rp->r_statev4_lock);
1756                         if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
1757                                 /*
1758                                  * If this rnode holds a delegation,
1759                                  * but if there are no valid open streams,
1760                                  * then just discard the delegation
1761                                  * without doing delegreturn.
1762                                  */
1763                                 if (numosp > 0)
1764                                         rp->r_deleg_needs_recovery =
1765                                             rp->r_deleg_type;
1766                         }
1767                         /* Save the delegation type for use outside the lock */
1768                         dtype = rp->r_deleg_type;
1769                         mutex_exit(&rp->r_statev4_lock);
1770 
1771                         /*
1772                          * If we have a delegation then get rid of it.
1773                          * We've set rp->r_deleg_needs_recovery so we have
1774                          * enough information to recover.
1775                          */
1776                         if (dtype != OPEN_DELEGATE_NONE) {
1777                                 (void) nfs4delegreturn(rp, NFS4_DR_DISCARD);
1778                         }
1779                 }
1780                 rw_exit(&rtable4[index].r_lock);
1781         }
1782         return (reopenlist);
1783 }
1784 
1785 /*
1786  * Given a filesystem id, check to see if any rnodes
1787  * within this fsid reside in the rnode cache, other
1788  * than one we know about.
1789  *
1790  * Return 1 if an rnode is found, 0 otherwise
1791  */
1792 int
1793 r4find_by_fsid(mntinfo4_t *mi, fattr4_fsid *moved_fsid)
1794 {
1795         rnode4_t *rp;
1796         vnode_t *vp;
1797         vfs_t *vfsp = mi->mi_vfsp;
1798         fattr4_fsid *fsid;
1799         int index, found = 0;
1800 
1801         for (index = 0; index < rtable4size; index++) {
1802                 rw_enter(&rtable4[index].r_lock, RW_READER);
1803                 for (rp = rtable4[index].r_hashf;
1804                     rp != (rnode4_t *)(&rtable4[index]);
1805                     rp = rp->r_hashf) {
1806 
1807                         vp = RTOV4(rp);
1808                         if (vp->v_vfsp != vfsp)
1809                                 continue;
1810 
1811                         /*
1812                          * XXX there might be a case where a
1813                          * replicated fs may have the same fsid
1814                          * across two different servers. This
1815                          * check isn't good enough in that case
1816                          */
1817                         fsid = &rp->r_srv_fsid;
1818                         if (FATTR4_FSID_EQ(moved_fsid, fsid)) {
1819                                 found = 1;
1820                                 break;
1821                         }
1822                 }
1823                 rw_exit(&rtable4[index].r_lock);
1824 
1825                 if (found)
1826                         break;
1827         }
1828         return (found);
1829 }
1830 
1831 /*
1832  * Release the list of open instance references.
1833  */
1834 
1835 void
1836 r4releopenlist(nfs4_opinst_t *reopenp)
1837 {
1838         nfs4_opinst_t *rep, *next;
1839         int i;
1840 
1841         for (rep = reopenp; rep; rep = next) {
1842                 next = rep->re_next;
1843 
1844                 for (i = 0; i < rep->re_numosp; i++)
1845                         open_stream_rele(rep->re_osp[i], VTOR4(rep->re_vp));
1846 
1847                 VN_RELE(rep->re_vp);
1848                 kmem_free(rep->re_osp,
1849                     rep->re_numosp * sizeof (*(rep->re_osp)));
1850 
1851                 kmem_free(rep, sizeof (*rep));
1852         }
1853 }
1854 
1855 int
1856 nfs4_rnode_init(void)
1857 {
1858         ulong_t nrnode4_max;
1859         int i;
1860 
1861         /*
1862          * Compute the size of the rnode4 hash table
1863          */
1864         if (nrnode <= 0)
1865                 nrnode = ncsize;
1866         nrnode4_max =
1867             (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode4));
1868         if (nrnode > nrnode4_max || (nrnode == 0 && ncsize == 0)) {
1869                 zcmn_err(GLOBAL_ZONEID, CE_NOTE,
1870                     "!setting nrnode to max value of %ld", nrnode4_max);
1871                 nrnode = nrnode4_max;
1872         }
1873         rtable4size = 1 << highbit(nrnode / rnode4_hashlen);
1874         rtable4mask = rtable4size - 1;
1875 
1876         /*
1877          * Allocate and initialize the hash buckets
1878          */
1879         rtable4 = kmem_alloc(rtable4size * sizeof (*rtable4), KM_SLEEP);
1880         for (i = 0; i < rtable4size; i++) {
1881                 rtable4[i].r_hashf = (rnode4_t *)(&rtable4[i]);
1882                 rtable4[i].r_hashb = (rnode4_t *)(&rtable4[i]);
1883                 rw_init(&rtable4[i].r_lock, NULL, RW_DEFAULT, NULL);
1884         }
1885 
1886         rnode4_cache = kmem_cache_create("rnode4_cache", sizeof (rnode4_t),
1887             0, NULL, NULL, nfs4_reclaim, NULL, NULL, 0);
1888 
1889         return (0);
1890 }
1891 
1892 int
1893 nfs4_rnode_fini(void)
1894 {
1895         int i;
1896 
1897         /*
1898          * Deallocate the rnode hash queues
1899          */
1900         kmem_cache_destroy(rnode4_cache);
1901 
1902         for (i = 0; i < rtable4size; i++)
1903                 rw_destroy(&rtable4[i].r_lock);
1904 
1905         kmem_free(rtable4, rtable4size * sizeof (*rtable4));
1906 
1907         return (0);
1908 }
1909 
1910 /*
1911  * Return non-zero if the given filehandle refers to the root filehandle
1912  * for the given rnode.
1913  */
1914 
1915 static int
1916 isrootfh(nfs4_sharedfh_t *fh, rnode4_t *rp)
1917 {
1918         int isroot;
1919 
1920         isroot = 0;
1921         if (SFH4_SAME(VTOMI4(RTOV4(rp))->mi_rootfh, fh))
1922                 isroot = 1;
1923 
1924         return (isroot);
1925 }
1926 
1927 /*
1928  * The r4_stub_* routines assume that the rnode is newly activated, and
1929  * that the caller either holds the hash bucket r_lock for this rnode as
1930  * RW_WRITER, or holds r_statelock.
1931  */
1932 static void
1933 r4_stub_set(rnode4_t *rp, nfs4_stub_type_t type)
1934 {
1935         vnode_t *vp = RTOV4(rp);
1936         krwlock_t *hash_lock = &rp->r_hashq->r_lock;
1937 
1938         ASSERT(RW_WRITE_HELD(hash_lock) || MUTEX_HELD(&rp->r_statelock));
1939 
1940         rp->r_stub_type = type;
1941 
1942         /*
1943          * Safely switch this vnode to the trigger vnodeops.
1944          *
1945          * Currently, we don't ever switch a trigger vnode back to using
1946          * "regular" v4 vnodeops. NFS4_STUB_NONE is only used to note that
1947          * a new v4 object is not a trigger, and it will already have the
1948          * correct v4 vnodeops by default. So, no "else" case required here.
1949          */
1950         if (type != NFS4_STUB_NONE)
1951                 vn_setops(vp, nfs4_trigger_vnodeops);
1952 }
1953 
1954 void
1955 r4_stub_mirrormount(rnode4_t *rp)
1956 {
1957         r4_stub_set(rp, NFS4_STUB_MIRRORMOUNT);
1958 }
1959 
1960 void
1961 r4_stub_referral(rnode4_t *rp)
1962 {
1963         DTRACE_PROBE1(nfs4clnt__func__referral__moved,
1964             vnode_t *, RTOV4(rp));
1965         r4_stub_set(rp, NFS4_STUB_REFERRAL);
1966 }
1967 
1968 void
1969 r4_stub_none(rnode4_t *rp)
1970 {
1971         r4_stub_set(rp, NFS4_STUB_NONE);
1972 }
1973 
1974 #ifdef DEBUG
1975 
1976 /*
1977  * Look in the rnode table for other rnodes that have the same filehandle.
1978  * Assume the lock is held for the hash chain of checkrp
1979  */
1980 
1981 static void
1982 r4_dup_check(rnode4_t *checkrp, vfs_t *vfsp)
1983 {
1984         rnode4_t *rp;
1985         vnode_t *tvp;
1986         nfs4_fhandle_t fh, fh2;
1987         int index;
1988 
1989         if (!r4_check_for_dups)
1990                 return;
1991 
1992         ASSERT(RW_LOCK_HELD(&checkrp->r_hashq->r_lock));
1993 
1994         sfh4_copyval(checkrp->r_fh, &fh);
1995 
1996         for (index = 0; index < rtable4size; index++) {
1997 
1998                 if (&rtable4[index] != checkrp->r_hashq)
1999                         rw_enter(&rtable4[index].r_lock, RW_READER);
2000 
2001                 for (rp = rtable4[index].r_hashf;
2002                     rp != (rnode4_t *)(&rtable4[index]);
2003                     rp = rp->r_hashf) {
2004 
2005                         if (rp == checkrp)
2006                                 continue;
2007 
2008                         tvp = RTOV4(rp);
2009                         if (tvp->v_vfsp != vfsp)
2010                                 continue;
2011 
2012                         sfh4_copyval(rp->r_fh, &fh2);
2013                         if (nfs4cmpfhandle(&fh, &fh2) == 0) {
2014                                 cmn_err(CE_PANIC, "rnodes with same fs, fh "
2015                                     "(%p, %p)", (void *)checkrp, (void *)rp);
2016                         }
2017                 }
2018 
2019                 if (&rtable4[index] != checkrp->r_hashq)
2020                         rw_exit(&rtable4[index].r_lock);
2021         }
2022 }
2023 
2024 #endif /* DEBUG */