1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  26  *      All rights reserved.
  27  */
  28 
  29 /*
  30  * Node hash implementation initially borrowed from NFS (nfs_subr.c)
  31  * but then heavily modified. It's no longer an array of hash lists,
  32  * but an AVL tree per mount point.  More on this below.
  33  */
  34 
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/time.h>
  38 #include <sys/vnode.h>
  39 #include <sys/bitmap.h>
  40 #include <sys/dnlc.h>
  41 #include <sys/kmem.h>
  42 #include <sys/sunddi.h>
  43 #include <sys/sysmacros.h>
  44 
  45 #include <netsmb/smb_osdep.h>
  46 
  47 #include <netsmb/smb.h>
  48 #include <netsmb/smb_conn.h>
  49 #include <netsmb/smb_subr.h>
  50 #include <netsmb/smb_rq.h>
  51 
  52 #include <smbfs/smbfs.h>
  53 #include <smbfs/smbfs_node.h>
  54 #include <smbfs/smbfs_subr.h>
  55 
  56 /*
  57  * The AVL trees (now per-mount) allow finding an smbfs node by its
  58  * full remote path name.  It also allows easy traversal of all nodes
  59  * below (path wise) any given node.  A reader/writer lock for each
  60  * (per mount) AVL tree is used to control access and to synchronize
  61  * lookups, additions, and deletions from that AVL tree.
  62  *
  63  * Previously, this code use a global array of hash chains, each with
  64  * its own rwlock.  A few struct members, functions, and comments may
  65  * still refer to a "hash", and those should all now be considered to
  66  * refer to the per-mount AVL tree that replaced the old hash chains.
  67  * (i.e. member smi_hash_lk, function sn_hashfind, etc.)
  68  *
  69  * The smbnode freelist is organized as a doubly linked list with
  70  * a head pointer.  Additions and deletions are synchronized via
  71  * a single mutex.
  72  *
  73  * In order to add an smbnode to the free list, it must be linked into
  74  * the mount's AVL tree and the exclusive lock for the AVL must be held.
  75  * If an smbnode is not linked into the AVL tree, then it is destroyed
  76  * because it represents no valuable information that can be reused
  77  * about the file.  The exclusive lock for the AVL tree must be held
  78  * in order to prevent a lookup in the AVL tree from finding the
  79  * smbnode and using it and assuming that the smbnode is not on the
  80  * freelist.  The lookup in the AVL tree will have the AVL tree lock
  81  * held, either exclusive or shared.
  82  *
  83  * The vnode reference count for each smbnode is not allowed to drop
  84  * below 1.  This prevents external entities, such as the VM
  85  * subsystem, from acquiring references to vnodes already on the
  86  * freelist and then trying to place them back on the freelist
  87  * when their reference is released.  This means that the when an
  88  * smbnode is looked up in the AVL tree, then either the smbnode
  89  * is removed from the freelist and that reference is tranfered to
  90  * the new reference or the vnode reference count must be incremented
  91  * accordingly.  The mutex for the freelist must be held in order to
  92  * accurately test to see if the smbnode is on the freelist or not.
  93  * The AVL tree lock might be held shared and it is possible that
  94  * two different threads may race to remove the smbnode from the
  95  * freelist.  This race can be resolved by holding the mutex for the
  96  * freelist.  Please note that the mutex for the freelist does not
  97  * need to held if the smbnode is not on the freelist.  It can not be
  98  * placed on the freelist due to the requirement that the thread
  99  * putting the smbnode on the freelist must hold the exclusive lock
 100  * for the AVL tree and the thread doing the lookup in the AVL tree
 101  * is holding either a shared or exclusive lock for the AVL tree.
 102  *
 103  * The lock ordering is:
 104  *
 105  *      AVL tree lock -> vnode lock
 106  *      AVL tree lock -> freelist lock
 107  */
 108 
 109 static kmutex_t smbfreelist_lock;
 110 static smbnode_t *smbfreelist = NULL;
 111 static ulong_t  smbnodenew = 0;
 112 long    nsmbnode = 0;
 113 
 114 static struct kmem_cache *smbnode_cache;
 115 
 116 static const vsecattr_t smbfs_vsa0 = { 0 };
 117 
 118 /*
 119  * Mutex to protect the following variables:
 120  *      smbfs_major
 121  *      smbfs_minor
 122  */
 123 kmutex_t smbfs_minor_lock;
 124 int smbfs_major;
 125 int smbfs_minor;
 126 
 127 /* See smbfs_node_findcreate() */
 128 struct smbfattr smbfs_fattr0;
 129 
 130 /*
 131  * Local functions.
 132  * SN for Smb Node
 133  */
 134 static void sn_rmfree(smbnode_t *);
 135 static void sn_inactive(smbnode_t *);
 136 static void sn_addhash_locked(smbnode_t *, avl_index_t);
 137 static void sn_rmhash_locked(smbnode_t *);
 138 static void sn_destroy_node(smbnode_t *);
 139 void smbfs_kmem_reclaim(void *cdrarg);
 140 
 141 static smbnode_t *
 142 sn_hashfind(smbmntinfo_t *, const char *, int, avl_index_t *);
 143 
 144 static smbnode_t *
 145 make_smbnode(smbmntinfo_t *, const char *, int, int *);
 146 
 147 /*
 148  * Free the resources associated with an smbnode.
 149  * Note: This is different from smbfs_inactive
 150  *
 151  * NFS: nfs_subr.c:rinactive
 152  */
 153 static void
 154 sn_inactive(smbnode_t *np)
 155 {
 156         vsecattr_t      ovsa;
 157         cred_t          *oldcr;
 158         char            *orpath;
 159         int             orplen;
 160         vnode_t         *vp;
 161 
 162         /*
 163          * Flush and invalidate all pages
 164          * Free any held credentials and caches...
 165          * etc.  (See NFS code)
 166          */
 167         mutex_enter(&np->r_statelock);
 168 
 169         ovsa = np->r_secattr;
 170         np->r_secattr = smbfs_vsa0;
 171         np->r_sectime = 0;
 172 
 173         oldcr = np->r_cred;
 174         np->r_cred = NULL;
 175 
 176         orpath = np->n_rpath;
 177         orplen = np->n_rplen;
 178         np->n_rpath = NULL;
 179         np->n_rplen = 0;
 180 
 181         mutex_exit(&np->r_statelock);
 182 
 183         vp = SMBTOV(np);
 184         if (vn_has_cached_data(vp)) {
 185                 smbfs_invalidate_pages(vp, (u_offset_t) 0, oldcr);
 186         }
 187 
 188         if (ovsa.vsa_aclentp != NULL)
 189                 kmem_free(ovsa.vsa_aclentp, ovsa.vsa_aclentsz);
 190 
 191         if (oldcr != NULL)
 192                 crfree(oldcr);
 193 
 194         if (orpath != NULL)
 195                 kmem_free(orpath, orplen + 1);
 196 }
 197 
 198 /*
 199  * Find and optionally create an smbnode for the passed
 200  * mountinfo, directory, separator, and name.  If the
 201  * desired smbnode already exists, return a reference.
 202  * If the file attributes pointer is non-null, the node
 203  * is created if necessary and linked into the AVL tree.
 204  *
 205  * Callers that need a node created but don't have the
 206  * real attributes pass smbfs_fattr0 to force creation.
 207  *
 208  * Note: make_smbnode() may upgrade the "hash" lock to exclusive.
 209  *
 210  * NFS: nfs_subr.c:makenfsnode
 211  */
 212 smbnode_t *
 213 smbfs_node_findcreate(
 214         smbmntinfo_t *mi,
 215         const char *dirnm,
 216         int dirlen,
 217         const char *name,
 218         int nmlen,
 219         char sep,
 220         struct smbfattr *fap)
 221 {
 222         char tmpbuf[256];
 223         size_t rpalloc;
 224         char *p, *rpath;
 225         int rplen;
 226         smbnode_t *np;
 227         vnode_t *vp;
 228         int newnode;
 229 
 230         /*
 231          * Build the search string, either in tmpbuf or
 232          * in allocated memory if larger than tmpbuf.
 233          */
 234         rplen = dirlen;
 235         if (sep != '\0')
 236                 rplen++;
 237         rplen += nmlen;
 238         if (rplen < sizeof (tmpbuf)) {
 239                 /* use tmpbuf */
 240                 rpalloc = 0;
 241                 rpath = tmpbuf;
 242         } else {
 243                 rpalloc = rplen + 1;
 244                 rpath = kmem_alloc(rpalloc, KM_SLEEP);
 245         }
 246         p = rpath;
 247         bcopy(dirnm, p, dirlen);
 248         p += dirlen;
 249         if (sep != '\0')
 250                 *p++ = sep;
 251         if (name != NULL) {
 252                 bcopy(name, p, nmlen);
 253                 p += nmlen;
 254         }
 255         ASSERT(p == rpath + rplen);
 256 
 257         /*
 258          * Find or create a node with this path.
 259          */
 260         rw_enter(&mi->smi_hash_lk, RW_READER);
 261         if (fap == NULL)
 262                 np = sn_hashfind(mi, rpath, rplen, NULL);
 263         else
 264                 np = make_smbnode(mi, rpath, rplen, &newnode);
 265         rw_exit(&mi->smi_hash_lk);
 266 
 267         if (rpalloc)
 268                 kmem_free(rpath, rpalloc);
 269 
 270         if (fap == NULL) {
 271                 /*
 272                  * Caller is "just looking" (no create)
 273                  * so np may or may not be NULL here.
 274                  * Either way, we're done.
 275                  */
 276                 return (np);
 277         }
 278 
 279         /*
 280          * We should have a node, possibly created.
 281          * Do we have (real) attributes to apply?
 282          */
 283         ASSERT(np != NULL);
 284         if (fap == &smbfs_fattr0)
 285                 return (np);
 286 
 287         /*
 288          * Apply the given attributes to this node,
 289          * dealing with any cache impact, etc.
 290          */
 291         vp = SMBTOV(np);
 292         if (!newnode) {
 293                 /*
 294                  * Found an existing node.
 295                  * Maybe purge caches...
 296                  */
 297                 smbfs_cache_check(vp, fap);
 298         }
 299         smbfs_attrcache_fa(vp, fap);
 300 
 301         /*
 302          * Note NFS sets vp->v_type here, assuming it
 303          * can never change for the life of a node.
 304          * We allow v_type to change, and set it in
 305          * smbfs_attrcache().  Also: mode, uid, gid
 306          */
 307         return (np);
 308 }
 309 
 310 /*
 311  * NFS: nfs_subr.c:rtablehash
 312  * We use smbfs_hash().
 313  */
 314 
 315 /*
 316  * Find or create an smbnode.
 317  * NFS: nfs_subr.c:make_rnode
 318  */
 319 static smbnode_t *
 320 make_smbnode(
 321         smbmntinfo_t *mi,
 322         const char *rpath,
 323         int rplen,
 324         int *newnode)
 325 {
 326         smbnode_t *np;
 327         smbnode_t *tnp;
 328         vnode_t *vp;
 329         vfs_t *vfsp;
 330         avl_index_t where;
 331         char *new_rpath = NULL;
 332 
 333         ASSERT(RW_READ_HELD(&mi->smi_hash_lk));
 334         vfsp = mi->smi_vfsp;
 335 
 336 start:
 337         np = sn_hashfind(mi, rpath, rplen, NULL);
 338         if (np != NULL) {
 339                 *newnode = 0;
 340                 return (np);
 341         }
 342 
 343         /* Note: will retake this lock below. */
 344         rw_exit(&mi->smi_hash_lk);
 345 
 346         /*
 347          * see if we can find something on the freelist
 348          */
 349         mutex_enter(&smbfreelist_lock);
 350         if (smbfreelist != NULL && smbnodenew >= nsmbnode) {
 351                 np = smbfreelist;
 352                 sn_rmfree(np);
 353                 mutex_exit(&smbfreelist_lock);
 354 
 355                 vp = SMBTOV(np);
 356 
 357                 if (np->r_flags & RHASHED) {
 358                         smbmntinfo_t *tmp_mi = np->n_mount;
 359                         ASSERT(tmp_mi != NULL);
 360                         rw_enter(&tmp_mi->smi_hash_lk, RW_WRITER);
 361                         mutex_enter(&vp->v_lock);
 362                         if (vp->v_count > 1) {
 363                                 vp->v_count--;
 364                                 mutex_exit(&vp->v_lock);
 365                                 rw_exit(&tmp_mi->smi_hash_lk);
 366                                 /* start over */
 367                                 rw_enter(&mi->smi_hash_lk, RW_READER);
 368                                 goto start;
 369                         }
 370                         mutex_exit(&vp->v_lock);
 371                         sn_rmhash_locked(np);
 372                         rw_exit(&tmp_mi->smi_hash_lk);
 373                 }
 374 
 375                 sn_inactive(np);
 376 
 377                 mutex_enter(&vp->v_lock);
 378                 if (vp->v_count > 1) {
 379                         vp->v_count--;
 380                         mutex_exit(&vp->v_lock);
 381                         rw_enter(&mi->smi_hash_lk, RW_READER);
 382                         goto start;
 383                 }
 384                 mutex_exit(&vp->v_lock);
 385                 vn_invalid(vp);
 386                 /*
 387                  * destroy old locks before bzero'ing and
 388                  * recreating the locks below.
 389                  */
 390                 smbfs_rw_destroy(&np->r_rwlock);
 391                 smbfs_rw_destroy(&np->r_lkserlock);
 392                 mutex_destroy(&np->r_statelock);
 393                 cv_destroy(&np->r_cv);
 394                 /*
 395                  * Make sure that if smbnode is recycled then
 396                  * VFS count is decremented properly before
 397                  * reuse.
 398                  */
 399                 VFS_RELE(vp->v_vfsp);
 400                 vn_reinit(vp);
 401         } else {
 402                 /*
 403                  * allocate and initialize a new smbnode
 404                  */
 405                 vnode_t *new_vp;
 406 
 407                 mutex_exit(&smbfreelist_lock);
 408 
 409                 np = kmem_cache_alloc(smbnode_cache, KM_SLEEP);
 410                 new_vp = vn_alloc(KM_SLEEP);
 411 
 412                 atomic_add_long((ulong_t *)&smbnodenew, 1);
 413                 vp = new_vp;
 414         }
 415 
 416         /*
 417          * Allocate and copy the rpath we'll need below.
 418          */
 419         new_rpath = kmem_alloc(rplen + 1, KM_SLEEP);
 420         bcopy(rpath, new_rpath, rplen);
 421         new_rpath[rplen] = '\0';
 422 
 423         /* Initialize smbnode_t */
 424         bzero(np, sizeof (*np));
 425 
 426         smbfs_rw_init(&np->r_rwlock, NULL, RW_DEFAULT, NULL);
 427         smbfs_rw_init(&np->r_lkserlock, NULL, RW_DEFAULT, NULL);
 428         mutex_init(&np->r_statelock, NULL, MUTEX_DEFAULT, NULL);
 429         cv_init(&np->r_cv, NULL, CV_DEFAULT, NULL);
 430         /* cv_init(&np->r_commit.c_cv, NULL, CV_DEFAULT, NULL); */
 431 
 432         np->r_vnode = vp;
 433         np->n_mount = mi;
 434 
 435         np->n_fid = SMB_FID_UNUSED;
 436         np->n_uid = mi->smi_uid;
 437         np->n_gid = mi->smi_gid;
 438         /* Leave attributes "stale." */
 439 
 440 #if 0 /* XXX dircache */
 441         /*
 442          * We don't know if it's a directory yet.
 443          * Let the caller do this?  XXX
 444          */
 445         avl_create(&np->r_dir, compar, sizeof (rddir_cache),
 446             offsetof(rddir_cache, tree));
 447 #endif
 448 
 449         /* Now fill in the vnode. */
 450         vn_setops(vp, smbfs_vnodeops);
 451         vp->v_data = (caddr_t)np;
 452         VFS_HOLD(vfsp);
 453         vp->v_vfsp = vfsp;
 454         vp->v_type = VNON;
 455 
 456         /*
 457          * We entered with mi->smi_hash_lk held (reader).
 458          * Retake it now, (as the writer).
 459          * Will return with it held.
 460          */
 461         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 462 
 463         /*
 464          * There is a race condition where someone else
 465          * may alloc the smbnode while no locks are held,
 466          * so check again and recover if found.
 467          */
 468         tnp = sn_hashfind(mi, rpath, rplen, &where);
 469         if (tnp != NULL) {
 470                 /*
 471                  * Lost the race.  Put the node we were building
 472                  * on the free list and return the one we found.
 473                  */
 474                 rw_exit(&mi->smi_hash_lk);
 475                 kmem_free(new_rpath, rplen + 1);
 476                 smbfs_addfree(np);
 477                 rw_enter(&mi->smi_hash_lk, RW_READER);
 478                 *newnode = 0;
 479                 return (tnp);
 480         }
 481 
 482         /*
 483          * Hash search identifies nodes by the remote path
 484          * (n_rpath) so fill that in now, before linking
 485          * this node into the node cache (AVL tree).
 486          */
 487         np->n_rpath = new_rpath;
 488         np->n_rplen = rplen;
 489         np->n_ino = smbfs_gethash(new_rpath, rplen);
 490 
 491         sn_addhash_locked(np, where);
 492         *newnode = 1;
 493         return (np);
 494 }
 495 
 496 /*
 497  * smbfs_addfree
 498  * Put an smbnode on the free list, or destroy it immediately
 499  * if it offers no value were it to be reclaimed later.  Also
 500  * destroy immediately when we have too many smbnodes, etc.
 501  *
 502  * Normally called by smbfs_inactive, but also
 503  * called in here during cleanup operations.
 504  *
 505  * NFS: nfs_subr.c:rp_addfree
 506  */
 507 void
 508 smbfs_addfree(smbnode_t *np)
 509 {
 510         vnode_t *vp;
 511         struct vfs *vfsp;
 512         smbmntinfo_t *mi;
 513 
 514         ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
 515 
 516         vp = SMBTOV(np);
 517         ASSERT(vp->v_count >= 1);
 518 
 519         vfsp = vp->v_vfsp;
 520         mi = VFTOSMI(vfsp);
 521 
 522         /*
 523          * If there are no more references to this smbnode and:
 524          * we have too many smbnodes allocated, or if the node
 525          * is no longer accessible via the AVL tree (!RHASHED),
 526          * or an i/o error occurred while writing to the file,
 527          * or it's part of an unmounted FS, then try to destroy
 528          * it instead of putting it on the smbnode freelist.
 529          */
 530         if (np->r_count == 0 && (
 531             (np->r_flags & RHASHED) == 0 ||
 532             (np->r_error != 0) ||
 533             (vfsp->vfs_flag & VFS_UNMOUNTED) ||
 534             (smbnodenew > nsmbnode))) {
 535 
 536                 /* Try to destroy this node. */
 537 
 538                 if (np->r_flags & RHASHED) {
 539                         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 540                         mutex_enter(&vp->v_lock);
 541                         if (vp->v_count > 1) {
 542                                 vp->v_count--;
 543                                 mutex_exit(&vp->v_lock);
 544                                 rw_exit(&mi->smi_hash_lk);
 545                                 return;
 546                                 /*
 547                                  * Will get another call later,
 548                                  * via smbfs_inactive.
 549                                  */
 550                         }
 551                         mutex_exit(&vp->v_lock);
 552                         sn_rmhash_locked(np);
 553                         rw_exit(&mi->smi_hash_lk);
 554                 }
 555 
 556                 sn_inactive(np);
 557 
 558                 /*
 559                  * Recheck the vnode reference count.  We need to
 560                  * make sure that another reference has not been
 561                  * acquired while we were not holding v_lock.  The
 562                  * smbnode is not in the smbnode "hash" AVL tree, so
 563                  * the only way for a reference to have been acquired
 564                  * is for a VOP_PUTPAGE because the smbnode was marked
 565                  * with RDIRTY or for a modified page.  This vnode
 566                  * reference may have been acquired before our call
 567                  * to sn_inactive.  The i/o may have been completed,
 568                  * thus allowing sn_inactive to complete, but the
 569                  * reference to the vnode may not have been released
 570                  * yet.  In any case, the smbnode can not be destroyed
 571                  * until the other references to this vnode have been
 572                  * released.  The other references will take care of
 573                  * either destroying the smbnode or placing it on the
 574                  * smbnode freelist.  If there are no other references,
 575                  * then the smbnode may be safely destroyed.
 576                  */
 577                 mutex_enter(&vp->v_lock);
 578                 if (vp->v_count > 1) {
 579                         vp->v_count--;
 580                         mutex_exit(&vp->v_lock);
 581                         return;
 582                 }
 583                 mutex_exit(&vp->v_lock);
 584 
 585                 sn_destroy_node(np);
 586                 return;
 587         }
 588 
 589         /*
 590          * Lock the AVL tree and then recheck the reference count
 591          * to ensure that no other threads have acquired a reference
 592          * to indicate that the smbnode should not be placed on the
 593          * freelist.  If another reference has been acquired, then
 594          * just release this one and let the other thread complete
 595          * the processing of adding this smbnode to the freelist.
 596          */
 597         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 598 
 599         mutex_enter(&vp->v_lock);
 600         if (vp->v_count > 1) {
 601                 vp->v_count--;
 602                 mutex_exit(&vp->v_lock);
 603                 rw_exit(&mi->smi_hash_lk);
 604                 return;
 605         }
 606         mutex_exit(&vp->v_lock);
 607 
 608         /*
 609          * Put this node on the free list.
 610          */
 611         mutex_enter(&smbfreelist_lock);
 612         if (smbfreelist == NULL) {
 613                 np->r_freef = np;
 614                 np->r_freeb = np;
 615                 smbfreelist = np;
 616         } else {
 617                 np->r_freef = smbfreelist;
 618                 np->r_freeb = smbfreelist->r_freeb;
 619                 smbfreelist->r_freeb->r_freef = np;
 620                 smbfreelist->r_freeb = np;
 621         }
 622         mutex_exit(&smbfreelist_lock);
 623 
 624         rw_exit(&mi->smi_hash_lk);
 625 }
 626 
 627 /*
 628  * Remove an smbnode from the free list.
 629  *
 630  * The caller must be holding smbfreelist_lock and the smbnode
 631  * must be on the freelist.
 632  *
 633  * NFS: nfs_subr.c:rp_rmfree
 634  */
 635 static void
 636 sn_rmfree(smbnode_t *np)
 637 {
 638 
 639         ASSERT(MUTEX_HELD(&smbfreelist_lock));
 640         ASSERT(np->r_freef != NULL && np->r_freeb != NULL);
 641 
 642         if (np == smbfreelist) {
 643                 smbfreelist = np->r_freef;
 644                 if (np == smbfreelist)
 645                         smbfreelist = NULL;
 646         }
 647 
 648         np->r_freeb->r_freef = np->r_freef;
 649         np->r_freef->r_freeb = np->r_freeb;
 650 
 651         np->r_freef = np->r_freeb = NULL;
 652 }
 653 
 654 /*
 655  * Put an smbnode in the "hash" AVL tree.
 656  *
 657  * The caller must be hold the rwlock as writer.
 658  *
 659  * NFS: nfs_subr.c:rp_addhash
 660  */
 661 static void
 662 sn_addhash_locked(smbnode_t *np, avl_index_t where)
 663 {
 664         smbmntinfo_t *mi = np->n_mount;
 665 
 666         ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
 667         ASSERT(!(np->r_flags & RHASHED));
 668 
 669         avl_insert(&mi->smi_hash_avl, np, where);
 670 
 671         mutex_enter(&np->r_statelock);
 672         np->r_flags |= RHASHED;
 673         mutex_exit(&np->r_statelock);
 674 }
 675 
 676 /*
 677  * Remove an smbnode from the "hash" AVL tree.
 678  *
 679  * The caller must hold the rwlock as writer.
 680  *
 681  * NFS: nfs_subr.c:rp_rmhash_locked
 682  */
 683 static void
 684 sn_rmhash_locked(smbnode_t *np)
 685 {
 686         smbmntinfo_t *mi = np->n_mount;
 687 
 688         ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
 689         ASSERT(np->r_flags & RHASHED);
 690 
 691         avl_remove(&mi->smi_hash_avl, np);
 692 
 693         mutex_enter(&np->r_statelock);
 694         np->r_flags &= ~RHASHED;
 695         mutex_exit(&np->r_statelock);
 696 }
 697 
 698 /*
 699  * Remove an smbnode from the "hash" AVL tree.
 700  *
 701  * The caller must not be holding the rwlock.
 702  */
 703 void
 704 smbfs_rmhash(smbnode_t *np)
 705 {
 706         smbmntinfo_t *mi = np->n_mount;
 707 
 708         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 709         sn_rmhash_locked(np);
 710         rw_exit(&mi->smi_hash_lk);
 711 }
 712 
 713 /*
 714  * Lookup an smbnode by remote pathname
 715  *
 716  * The caller must be holding the AVL rwlock, either shared or exclusive.
 717  *
 718  * NFS: nfs_subr.c:rfind
 719  */
 720 static smbnode_t *
 721 sn_hashfind(
 722         smbmntinfo_t *mi,
 723         const char *rpath,
 724         int rplen,
 725         avl_index_t *pwhere) /* optional */
 726 {
 727         smbfs_node_hdr_t nhdr;
 728         smbnode_t *np;
 729         vnode_t *vp;
 730 
 731         ASSERT(RW_LOCK_HELD(&mi->smi_hash_lk));
 732 
 733         bzero(&nhdr, sizeof (nhdr));
 734         nhdr.hdr_n_rpath = (char *)rpath;
 735         nhdr.hdr_n_rplen = rplen;
 736 
 737         /* See smbfs_node_cmp below. */
 738         np = avl_find(&mi->smi_hash_avl, &nhdr, pwhere);
 739 
 740         if (np == NULL)
 741                 return (NULL);
 742 
 743         /*
 744          * Found it in the "hash" AVL tree.
 745          * Remove from free list, if necessary.
 746          */
 747         vp = SMBTOV(np);
 748         if (np->r_freef != NULL) {
 749                 mutex_enter(&smbfreelist_lock);
 750                 /*
 751                  * If the smbnode is on the freelist,
 752                  * then remove it and use that reference
 753                  * as the new reference.  Otherwise,
 754                  * need to increment the reference count.
 755                  */
 756                 if (np->r_freef != NULL) {
 757                         sn_rmfree(np);
 758                         mutex_exit(&smbfreelist_lock);
 759                 } else {
 760                         mutex_exit(&smbfreelist_lock);
 761                         VN_HOLD(vp);
 762                 }
 763         } else
 764                 VN_HOLD(vp);
 765 
 766         return (np);
 767 }
 768 
 769 static int
 770 smbfs_node_cmp(const void *va, const void *vb)
 771 {
 772         const smbfs_node_hdr_t *a = va;
 773         const smbfs_node_hdr_t *b = vb;
 774         int clen, diff;
 775 
 776         /*
 777          * Same semantics as strcmp, but does not
 778          * assume the strings are null terminated.
 779          */
 780         clen = (a->hdr_n_rplen < b->hdr_n_rplen) ?
 781             a->hdr_n_rplen : b->hdr_n_rplen;
 782         diff = strncmp(a->hdr_n_rpath, b->hdr_n_rpath, clen);
 783         if (diff < 0)
 784                 return (-1);
 785         if (diff > 0)
 786                 return (1);
 787         /* they match through clen */
 788         if (b->hdr_n_rplen > clen)
 789                 return (-1);
 790         if (a->hdr_n_rplen > clen)
 791                 return (1);
 792         return (0);
 793 }
 794 
 795 /*
 796  * Setup the "hash" AVL tree used for our node cache.
 797  * See: smbfs_mount, smbfs_destroy_table.
 798  */
 799 void
 800 smbfs_init_hash_avl(avl_tree_t *avl)
 801 {
 802         avl_create(avl, smbfs_node_cmp, sizeof (smbnode_t),
 803             offsetof(smbnode_t, r_avl_node));
 804 }
 805 
 806 /*
 807  * Invalidate the cached attributes for all nodes "under" the
 808  * passed-in node.  Note: the passed-in node is NOT affected by
 809  * this call.  This is used both for files under some directory
 810  * after the directory is deleted or renamed, and for extended
 811  * attribute files (named streams) under a plain file after that
 812  * file is renamed or deleted.
 813  *
 814  * Do this by walking the AVL tree starting at the passed in node,
 815  * and continuing while the visited nodes have a path prefix matching
 816  * the entire path of the passed-in node, and a separator just after
 817  * that matching path prefix.  Watch out for cases where the AVL tree
 818  * order may not exactly match the order of an FS walk, i.e.
 819  * consider this sequence:
 820  *      "foo"           (directory)
 821  *      "foo bar"       (name containing a space)
 822  *      "foo/bar"
 823  * The walk needs to skip "foo bar" and keep going until it finds
 824  * something that doesn't match the "foo" name prefix.
 825  */
 826 void
 827 smbfs_attrcache_prune(smbnode_t *top_np)
 828 {
 829         smbmntinfo_t *mi;
 830         smbnode_t *np;
 831         char *rpath;
 832         int rplen;
 833 
 834         mi = top_np->n_mount;
 835         rw_enter(&mi->smi_hash_lk, RW_READER);
 836 
 837         np = top_np;
 838         rpath = top_np->n_rpath;
 839         rplen = top_np->n_rplen;
 840         for (;;) {
 841                 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER);
 842                 if (np == NULL)
 843                         break;
 844                 if (np->n_rplen < rplen)
 845                         break;
 846                 if (0 != strncmp(np->n_rpath, rpath, rplen))
 847                         break;
 848                 if (np->n_rplen > rplen && (
 849                     np->n_rpath[rplen] == ':' ||
 850                     np->n_rpath[rplen] == '\\'))
 851                         smbfs_attrcache_remove(np);
 852         }
 853 
 854         rw_exit(&mi->smi_hash_lk);
 855 }
 856 
 857 #ifdef SMB_VNODE_DEBUG
 858 int smbfs_check_table_debug = 1;
 859 #else /* SMB_VNODE_DEBUG */
 860 int smbfs_check_table_debug = 0;
 861 #endif /* SMB_VNODE_DEBUG */
 862 
 863 
 864 /*
 865  * Return 1 if there is a active vnode belonging to this vfs in the
 866  * smbnode cache.
 867  *
 868  * Several of these checks are done without holding the usual
 869  * locks.  This is safe because destroy_smbtable(), smbfs_addfree(),
 870  * etc. will redo the necessary checks before actually destroying
 871  * any smbnodes.
 872  *
 873  * NFS: nfs_subr.c:check_rtable
 874  *
 875  * Debugging changes here relative to NFS.
 876  * Relatively harmless, so left 'em in.
 877  */
 878 int
 879 smbfs_check_table(struct vfs *vfsp, smbnode_t *rtnp)
 880 {
 881         smbmntinfo_t *mi;
 882         smbnode_t *np;
 883         vnode_t *vp;
 884         int busycnt = 0;
 885 
 886         mi = VFTOSMI(vfsp);
 887         rw_enter(&mi->smi_hash_lk, RW_READER);
 888         for (np = avl_first(&mi->smi_hash_avl); np != NULL;
 889             np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
 890 
 891                 if (np == rtnp)
 892                         continue; /* skip the root */
 893                 vp = SMBTOV(np);
 894 
 895                 /* Now the 'busy' checks: */
 896                 /* Not on the free list? */
 897                 if (np->r_freef == NULL) {
 898                         SMBVDEBUG("!r_freef: node=0x%p, rpath=%s\n",
 899                             (void *)np, np->n_rpath);
 900                         busycnt++;
 901                 }
 902 
 903                 /* Has dirty pages? */
 904                 if (vn_has_cached_data(vp) &&
 905                     (np->r_flags & RDIRTY)) {
 906                         SMBVDEBUG("is dirty: node=0x%p, rpath=%s\n",
 907                             (void *)np, np->n_rpath);
 908                         busycnt++;
 909                 }
 910 
 911                 /* Other refs? (not reflected in v_count) */
 912                 if (np->r_count > 0) {
 913                         SMBVDEBUG("+r_count: node=0x%p, rpath=%s\n",
 914                             (void *)np, np->n_rpath);
 915                         busycnt++;
 916                 }
 917 
 918                 if (busycnt && !smbfs_check_table_debug)
 919                         break;
 920 
 921         }
 922         rw_exit(&mi->smi_hash_lk);
 923 
 924         return (busycnt);
 925 }
 926 
 927 /*
 928  * Destroy inactive vnodes from the AVL tree which belong to this
 929  * vfs.  It is essential that we destroy all inactive vnodes during a
 930  * forced unmount as well as during a normal unmount.
 931  *
 932  * NFS: nfs_subr.c:destroy_rtable
 933  *
 934  * In here, we're normally destrying all or most of the AVL tree,
 935  * so the natural choice is to use avl_destroy_nodes.  However,
 936  * there may be a few busy nodes that should remain in the AVL
 937  * tree when we're done.  The solution: use a temporary tree to
 938  * hold the busy nodes until we're done destroying the old tree,
 939  * then copy the temporary tree over the (now emtpy) real tree.
 940  */
 941 void
 942 smbfs_destroy_table(struct vfs *vfsp)
 943 {
 944         avl_tree_t tmp_avl;
 945         smbmntinfo_t *mi;
 946         smbnode_t *np;
 947         smbnode_t *rlist;
 948         void *v;
 949 
 950         mi = VFTOSMI(vfsp);
 951         rlist = NULL;
 952         smbfs_init_hash_avl(&tmp_avl);
 953 
 954         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 955         v = NULL;
 956         while ((np = avl_destroy_nodes(&mi->smi_hash_avl, &v)) != NULL) {
 957 
 958                 mutex_enter(&smbfreelist_lock);
 959                 if (np->r_freef == NULL) {
 960                         /*
 961                          * Busy node (not on the free list).
 962                          * Will keep in the final AVL tree.
 963                          */
 964                         mutex_exit(&smbfreelist_lock);
 965                         avl_add(&tmp_avl, np);
 966                 } else {
 967                         /*
 968                          * It's on the free list.  Remove and
 969                          * arrange for it to be destroyed.
 970                          */
 971                         sn_rmfree(np);
 972                         mutex_exit(&smbfreelist_lock);
 973 
 974                         /*
 975                          * Last part of sn_rmhash_locked().
 976                          * NB: avl_destroy_nodes has already
 977                          * removed this from the "hash" AVL.
 978                          */
 979                         mutex_enter(&np->r_statelock);
 980                         np->r_flags &= ~RHASHED;
 981                         mutex_exit(&np->r_statelock);
 982 
 983                         /*
 984                          * Add to the list of nodes to destroy.
 985                          * Borrowing avl_child[0] for this list.
 986                          */
 987                         np->r_avl_node.avl_child[0] =
 988                             (struct avl_node *)rlist;
 989                         rlist = np;
 990                 }
 991         }
 992         avl_destroy(&mi->smi_hash_avl);
 993 
 994         /*
 995          * Replace the (now destroyed) "hash" AVL with the
 996          * temporary AVL, which restores the busy nodes.
 997          */
 998         mi->smi_hash_avl = tmp_avl;
 999         rw_exit(&mi->smi_hash_lk);
1000 
1001         /*
1002          * Now destroy the nodes on our temporary list (rlist).
1003          * This call to smbfs_addfree will end up destroying the
1004          * smbnode, but in a safe way with the appropriate set
1005          * of checks done.
1006          */
1007         while ((np = rlist) != NULL) {
1008                 rlist = (smbnode_t *)np->r_avl_node.avl_child[0];
1009                 smbfs_addfree(np);
1010         }
1011 }
1012 
1013 /*
1014  * This routine destroys all the resources associated with the smbnode
1015  * and then the smbnode itself.  Note: sn_inactive has been called.
1016  *
1017  * NFS: nfs_subr.c:destroy_rnode
1018  */
1019 static void
1020 sn_destroy_node(smbnode_t *np)
1021 {
1022         vnode_t *vp;
1023         vfs_t *vfsp;
1024 
1025         vp = SMBTOV(np);
1026         vfsp = vp->v_vfsp;
1027 
1028         ASSERT(vp->v_count == 1);
1029         ASSERT(np->r_count == 0);
1030         ASSERT(np->r_mapcnt == 0);
1031         ASSERT(np->r_secattr.vsa_aclentp == NULL);
1032         ASSERT(np->r_cred == NULL);
1033         ASSERT(np->n_rpath == NULL);
1034         ASSERT(!(np->r_flags & RHASHED));
1035         ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
1036         atomic_add_long((ulong_t *)&smbnodenew, -1);
1037         vn_invalid(vp);
1038         vn_free(vp);
1039         kmem_cache_free(smbnode_cache, np);
1040         VFS_RELE(vfsp);
1041 }
1042 
1043 /*
1044  * Flush all vnodes in this (or every) vfs.
1045  * Used by nfs_sync and by nfs_unmount.
1046  */
1047 /*ARGSUSED*/
1048 void
1049 smbfs_rflush(struct vfs *vfsp, cred_t *cr) {
1050 
1051     smbmntinfo_t *mi;
1052     smbnode_t *np;
1053     vnode_t *vp;
1054 
1055     long num, cnt;
1056 
1057     vnode_t **vplist;
1058 
1059     mi = VFTOSMI(vfsp);
1060 
1061     cnt = 0;
1062     num = mi->smi_hash_avl.avl_numnodes;
1063     vplist = kmem_alloc(num * sizeof (vnode_t*), KM_SLEEP);
1064 
1065     rw_enter(&mi->smi_hash_lk, RW_READER);
1066     for (np = avl_first(&mi->smi_hash_avl); np != NULL;
1067             np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
1068         vp = SMBTOV(np);
1069         if (vn_is_readonly(vp))
1070             continue;
1071 
1072         if (vn_has_cached_data(vp) && (np->r_flags & RDIRTY || np->r_mapcnt > 0)) {
1073             VN_HOLD(vp);
1074             vplist[cnt++] = vp;
1075             if (cnt == num)
1076                 break;
1077         }
1078     }
1079     rw_exit(&mi->smi_hash_lk);
1080 
1081     while (cnt-- > 0) {
1082         vp = vplist[cnt];
1083         (void) VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
1084         VN_RELE(vp);
1085     }
1086 
1087     kmem_free(vplist, num * sizeof (vnode_t*));
1088 }
1089 
1090 /* access cache */
1091 /* client handles */
1092 
1093 /*
1094  * initialize resources that are used by smbfs_subr.c
1095  * this is called from the _init() routine (by the way of smbfs_clntinit())
1096  *
1097  * NFS: nfs_subr.c:nfs_subrinit
1098  */
1099 int
1100 smbfs_subrinit(void)
1101 {
1102         ulong_t nsmbnode_max;
1103 
1104         /*
1105          * Allocate and initialize the smbnode cache
1106          */
1107         if (nsmbnode <= 0)
1108                 nsmbnode = ncsize; /* dnlc.h */
1109         nsmbnode_max = (ulong_t)((kmem_maxavail() >> 2) /
1110             sizeof (struct smbnode));
1111         if (nsmbnode > nsmbnode_max || (nsmbnode == 0 && ncsize == 0)) {
1112                 zcmn_err(GLOBAL_ZONEID, CE_NOTE,
1113                     "setting nsmbnode to max value of %ld", nsmbnode_max);
1114                 nsmbnode = nsmbnode_max;
1115         }
1116 
1117         smbnode_cache = kmem_cache_create("smbnode_cache", sizeof (smbnode_t),
1118             0, NULL, NULL, smbfs_kmem_reclaim, NULL, NULL, 0);
1119 
1120         /*
1121          * Initialize the various mutexes and reader/writer locks
1122          */
1123         mutex_init(&smbfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
1124         mutex_init(&smbfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
1125 
1126         /*
1127          * Assign unique major number for all smbfs mounts
1128          */
1129         if ((smbfs_major = getudev()) == -1) {
1130                 zcmn_err(GLOBAL_ZONEID, CE_WARN,
1131                     "smbfs: init: can't get unique device number");
1132                 smbfs_major = 0;
1133         }
1134         smbfs_minor = 0;
1135 
1136         return (0);
1137 }
1138 
1139 /*
1140  * free smbfs hash table, etc.
1141  * NFS: nfs_subr.c:nfs_subrfini
1142  */
1143 void
1144 smbfs_subrfini(void)
1145 {
1146 
1147         /*
1148          * Destroy the smbnode cache
1149          */
1150         kmem_cache_destroy(smbnode_cache);
1151 
1152         /*
1153          * Destroy the various mutexes and reader/writer locks
1154          */
1155         mutex_destroy(&smbfreelist_lock);
1156         mutex_destroy(&smbfs_minor_lock);
1157 }
1158 
1159 /* rddir_cache ? */
1160 
1161 /*
1162  * Support functions for smbfs_kmem_reclaim
1163  */
1164 
1165 static void
1166 smbfs_node_reclaim(void)
1167 {
1168         smbmntinfo_t *mi;
1169         smbnode_t *np;
1170         vnode_t *vp;
1171 
1172         mutex_enter(&smbfreelist_lock);
1173         while ((np = smbfreelist) != NULL) {
1174                 sn_rmfree(np);
1175                 mutex_exit(&smbfreelist_lock);
1176                 if (np->r_flags & RHASHED) {
1177                         vp = SMBTOV(np);
1178                         mi = np->n_mount;
1179                         rw_enter(&mi->smi_hash_lk, RW_WRITER);
1180                         mutex_enter(&vp->v_lock);
1181                         if (vp->v_count > 1) {
1182                                 vp->v_count--;
1183                                 mutex_exit(&vp->v_lock);
1184                                 rw_exit(&mi->smi_hash_lk);
1185                                 mutex_enter(&smbfreelist_lock);
1186                                 continue;
1187                         }
1188                         mutex_exit(&vp->v_lock);
1189                         sn_rmhash_locked(np);
1190                         rw_exit(&mi->smi_hash_lk);
1191                 }
1192                 /*
1193                  * This call to smbfs_addfree will end up destroying the
1194                  * smbnode, but in a safe way with the appropriate set
1195                  * of checks done.
1196                  */
1197                 smbfs_addfree(np);
1198                 mutex_enter(&smbfreelist_lock);
1199         }
1200         mutex_exit(&smbfreelist_lock);
1201 }
1202 
1203 /*
1204  * Called by kmem_cache_alloc ask us if we could
1205  * "Please give back some memory!"
1206  *
1207  * Todo: dump nodes from the free list?
1208  */
1209 /*ARGSUSED*/
1210 void
1211 smbfs_kmem_reclaim(void *cdrarg)
1212 {
1213         smbfs_node_reclaim();
1214 }
1215 
1216 /* nfs failover stuff */
1217 /* nfs_rw_xxx - see smbfs_rwlock.c */