1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  26  *      All rights reserved.
  27  */
  28 /*
  29  * Copyright (c) 2017 by Delphix. All rights reserved.
  30  */
  31 
  32 /*
  33  * Node hash implementation initially borrowed from NFS (nfs_subr.c)
  34  * but then heavily modified. It's no longer an array of hash lists,
  35  * but an AVL tree per mount point.  More on this below.
  36  */
  37 
  38 #include <sys/param.h>
  39 #include <sys/systm.h>
  40 #include <sys/time.h>
  41 #include <sys/vnode.h>
  42 #include <sys/bitmap.h>
  43 #include <sys/dnlc.h>
  44 #include <sys/kmem.h>
  45 #include <sys/sunddi.h>
  46 #include <sys/sysmacros.h>
  47 #include <sys/fcntl.h>
  48 
  49 #include <netsmb/smb_osdep.h>
  50 
  51 #include <netsmb/smb.h>
  52 #include <netsmb/smb_conn.h>
  53 #include <netsmb/smb_subr.h>
  54 #include <netsmb/smb_rq.h>
  55 
  56 #include <smbfs/smbfs.h>
  57 #include <smbfs/smbfs_node.h>
  58 #include <smbfs/smbfs_subr.h>
  59 
  60 /*
  61  * The AVL trees (now per-mount) allow finding an smbfs node by its
  62  * full remote path name.  It also allows easy traversal of all nodes
  63  * below (path wise) any given node.  A reader/writer lock for each
  64  * (per mount) AVL tree is used to control access and to synchronize
  65  * lookups, additions, and deletions from that AVL tree.
  66  *
  67  * Previously, this code use a global array of hash chains, each with
  68  * its own rwlock.  A few struct members, functions, and comments may
  69  * still refer to a "hash", and those should all now be considered to
  70  * refer to the per-mount AVL tree that replaced the old hash chains.
  71  * (i.e. member smi_hash_lk, function sn_hashfind, etc.)
  72  *
  73  * The smbnode freelist is organized as a doubly linked list with
  74  * a head pointer.  Additions and deletions are synchronized via
  75  * a single mutex.
  76  *
  77  * In order to add an smbnode to the free list, it must be linked into
  78  * the mount's AVL tree and the exclusive lock for the AVL must be held.
  79  * If an smbnode is not linked into the AVL tree, then it is destroyed
  80  * because it represents no valuable information that can be reused
  81  * about the file.  The exclusive lock for the AVL tree must be held
  82  * in order to prevent a lookup in the AVL tree from finding the
  83  * smbnode and using it and assuming that the smbnode is not on the
  84  * freelist.  The lookup in the AVL tree will have the AVL tree lock
  85  * held, either exclusive or shared.
  86  *
  87  * The vnode reference count for each smbnode is not allowed to drop
  88  * below 1.  This prevents external entities, such as the VM
  89  * subsystem, from acquiring references to vnodes already on the
  90  * freelist and then trying to place them back on the freelist
  91  * when their reference is released.  This means that the when an
  92  * smbnode is looked up in the AVL tree, then either the smbnode
  93  * is removed from the freelist and that reference is tranfered to
  94  * the new reference or the vnode reference count must be incremented
  95  * accordingly.  The mutex for the freelist must be held in order to
  96  * accurately test to see if the smbnode is on the freelist or not.
  97  * The AVL tree lock might be held shared and it is possible that
  98  * two different threads may race to remove the smbnode from the
  99  * freelist.  This race can be resolved by holding the mutex for the
 100  * freelist.  Please note that the mutex for the freelist does not
 101  * need to held if the smbnode is not on the freelist.  It can not be
 102  * placed on the freelist due to the requirement that the thread
 103  * putting the smbnode on the freelist must hold the exclusive lock
 104  * for the AVL tree and the thread doing the lookup in the AVL tree
 105  * is holding either a shared or exclusive lock for the AVL tree.
 106  *
 107  * The lock ordering is:
 108  *
 109  *      AVL tree lock -> vnode lock
 110  *      AVL tree lock -> freelist lock
 111  */
 112 
 113 static kmutex_t smbfreelist_lock;
 114 static smbnode_t *smbfreelist = NULL;
 115 static ulong_t  smbnodenew = 0;
 116 long    nsmbnode = 0;
 117 
 118 static struct kmem_cache *smbnode_cache;
 119 
 120 static const vsecattr_t smbfs_vsa0 = { 0 };
 121 
 122 /*
 123  * Mutex to protect the following variables:
 124  *      smbfs_major
 125  *      smbfs_minor
 126  */
 127 kmutex_t smbfs_minor_lock;
 128 int smbfs_major;
 129 int smbfs_minor;
 130 
 131 /* See smbfs_node_findcreate() */
 132 struct smbfattr smbfs_fattr0;
 133 
 134 /*
 135  * Local functions.
 136  * SN for Smb Node
 137  */
 138 static void sn_rmfree(smbnode_t *);
 139 static void sn_inactive(smbnode_t *);
 140 static void sn_addhash_locked(smbnode_t *, avl_index_t);
 141 static void sn_rmhash_locked(smbnode_t *);
 142 static void sn_destroy_node(smbnode_t *);
 143 void smbfs_kmem_reclaim(void *cdrarg);
 144 
 145 static smbnode_t *
 146 sn_hashfind(smbmntinfo_t *, const char *, int, avl_index_t *);
 147 
 148 static smbnode_t *
 149 make_smbnode(smbmntinfo_t *, const char *, int, int *);
 150 
 151 /*
 152  * Free the resources associated with an smbnode.
 153  * Note: This is different from smbfs_inactive
 154  *
 155  * From NFS: nfs_subr.c:rinactive
 156  */
 157 static void
 158 sn_inactive(smbnode_t *np)
 159 {
 160         vsecattr_t      ovsa;
 161         cred_t          *oldcr;
 162         char            *orpath;
 163         int             orplen;
 164         vnode_t         *vp;
 165 
 166         /*
 167          * Here NFS has:
 168          * Flush and invalidate all pages (done by caller)
 169          * Free any held credentials and caches...
 170          * etc.  (See NFS code)
 171          */
 172         mutex_enter(&np->r_statelock);
 173 
 174         ovsa = np->r_secattr;
 175         np->r_secattr = smbfs_vsa0;
 176         np->r_sectime = 0;
 177 
 178         oldcr = np->r_cred;
 179         np->r_cred = NULL;
 180 
 181         orpath = np->n_rpath;
 182         orplen = np->n_rplen;
 183         np->n_rpath = NULL;
 184         np->n_rplen = 0;
 185 
 186         mutex_exit(&np->r_statelock);
 187 
 188         vp = SMBTOV(np);
 189         if (vn_has_cached_data(vp)) {
 190                 ASSERT3P(vp,==,NULL);
 191         }
 192 
 193         if (ovsa.vsa_aclentp != NULL)
 194                 kmem_free(ovsa.vsa_aclentp, ovsa.vsa_aclentsz);
 195 
 196         if (oldcr != NULL)
 197                 crfree(oldcr);
 198 
 199         if (orpath != NULL)
 200                 kmem_free(orpath, orplen + 1);
 201 }
 202 
 203 /*
 204  * Find and optionally create an smbnode for the passed
 205  * mountinfo, directory, separator, and name.  If the
 206  * desired smbnode already exists, return a reference.
 207  * If the file attributes pointer is non-null, the node
 208  * is created if necessary and linked into the AVL tree.
 209  *
 210  * Callers that need a node created but don't have the
 211  * real attributes pass smbfs_fattr0 to force creation.
 212  *
 213  * Note: make_smbnode() may upgrade the "hash" lock to exclusive.
 214  *
 215  * Based on NFS: nfs_subr.c:makenfsnode
 216  */
 217 smbnode_t *
 218 smbfs_node_findcreate(
 219         smbmntinfo_t *mi,
 220         const char *dirnm,
 221         int dirlen,
 222         const char *name,
 223         int nmlen,
 224         char sep,
 225         struct smbfattr *fap)
 226 {
 227         char tmpbuf[256];
 228         size_t rpalloc;
 229         char *p, *rpath;
 230         int rplen;
 231         smbnode_t *np;
 232         vnode_t *vp;
 233         int newnode;
 234 
 235         /*
 236          * Build the search string, either in tmpbuf or
 237          * in allocated memory if larger than tmpbuf.
 238          */
 239         rplen = dirlen;
 240         if (sep != '\0')
 241                 rplen++;
 242         rplen += nmlen;
 243         if (rplen < sizeof (tmpbuf)) {
 244                 /* use tmpbuf */
 245                 rpalloc = 0;
 246                 rpath = tmpbuf;
 247         } else {
 248                 rpalloc = rplen + 1;
 249                 rpath = kmem_alloc(rpalloc, KM_SLEEP);
 250         }
 251         p = rpath;
 252         bcopy(dirnm, p, dirlen);
 253         p += dirlen;
 254         if (sep != '\0')
 255                 *p++ = sep;
 256         if (name != NULL) {
 257                 bcopy(name, p, nmlen);
 258                 p += nmlen;
 259         }
 260         ASSERT(p == rpath + rplen);
 261 
 262         /*
 263          * Find or create a node with this path.
 264          */
 265         rw_enter(&mi->smi_hash_lk, RW_READER);
 266         if (fap == NULL)
 267                 np = sn_hashfind(mi, rpath, rplen, NULL);
 268         else
 269                 np = make_smbnode(mi, rpath, rplen, &newnode);
 270         rw_exit(&mi->smi_hash_lk);
 271 
 272         if (rpalloc)
 273                 kmem_free(rpath, rpalloc);
 274 
 275         if (fap == NULL) {
 276                 /*
 277                  * Caller is "just looking" (no create)
 278                  * so np may or may not be NULL here.
 279                  * Either way, we're done.
 280                  */
 281                 return (np);
 282         }
 283 
 284         /*
 285          * We should have a node, possibly created.
 286          * Do we have (real) attributes to apply?
 287          */
 288         ASSERT(np != NULL);
 289         if (fap == &smbfs_fattr0)
 290                 return (np);
 291 
 292         /*
 293          * Apply the given attributes to this node,
 294          * dealing with any cache impact, etc.
 295          */
 296         vp = SMBTOV(np);
 297         smbfs_attrcache_fa(vp, fap);
 298 
 299         /*
 300          * Note NFS sets vp->v_type here, assuming it
 301          * can never change for the life of a node.
 302          * We allow v_type to change, and set it in
 303          * smbfs_attrcache().  Also: mode, uid, gid
 304          */
 305         return (np);
 306 }
 307 
 308 /*
 309  * Here NFS has: nfs_subr.c:rtablehash
 310  * We use smbfs_hash().
 311  */
 312 
 313 /*
 314  * Find or create an smbnode.
 315  * From NFS: nfs_subr.c:make_rnode
 316  */
 317 static smbnode_t *
 318 make_smbnode(
 319         smbmntinfo_t *mi,
 320         const char *rpath,
 321         int rplen,
 322         int *newnode)
 323 {
 324         smbnode_t *np;
 325         smbnode_t *tnp;
 326         vnode_t *vp;
 327         vfs_t *vfsp;
 328         avl_index_t where;
 329         char *new_rpath = NULL;
 330 
 331         ASSERT(RW_READ_HELD(&mi->smi_hash_lk));
 332         vfsp = mi->smi_vfsp;
 333 
 334 start:
 335         np = sn_hashfind(mi, rpath, rplen, NULL);
 336         if (np != NULL) {
 337                 *newnode = 0;
 338                 return (np);
 339         }
 340 
 341         /* Note: will retake this lock below. */
 342         rw_exit(&mi->smi_hash_lk);
 343 
 344         /*
 345          * see if we can find something on the freelist
 346          */
 347         mutex_enter(&smbfreelist_lock);
 348         if (smbfreelist != NULL && smbnodenew >= nsmbnode) {
 349                 np = smbfreelist;
 350                 sn_rmfree(np);
 351                 mutex_exit(&smbfreelist_lock);
 352 
 353                 vp = SMBTOV(np);
 354 
 355                 if (np->r_flags & RHASHED) {
 356                         smbmntinfo_t *tmp_mi = np->n_mount;
 357                         ASSERT(tmp_mi != NULL);
 358                         rw_enter(&tmp_mi->smi_hash_lk, RW_WRITER);
 359                         mutex_enter(&vp->v_lock);
 360                         if (vp->v_count > 1) {
 361                                 VN_RELE_LOCKED(vp);
 362                                 mutex_exit(&vp->v_lock);
 363                                 rw_exit(&tmp_mi->smi_hash_lk);
 364                                 /* start over */
 365                                 rw_enter(&mi->smi_hash_lk, RW_READER);
 366                                 goto start;
 367                         }
 368                         mutex_exit(&vp->v_lock);
 369                         sn_rmhash_locked(np);
 370                         rw_exit(&tmp_mi->smi_hash_lk);
 371                 }
 372 
 373                 sn_inactive(np);
 374 
 375                 mutex_enter(&vp->v_lock);
 376                 if (vp->v_count > 1) {
 377                         VN_RELE_LOCKED(vp);
 378                         mutex_exit(&vp->v_lock);
 379                         rw_enter(&mi->smi_hash_lk, RW_READER);
 380                         goto start;
 381                 }
 382                 mutex_exit(&vp->v_lock);
 383                 vn_invalid(vp);
 384                 /*
 385                  * destroy old locks before bzero'ing and
 386                  * recreating the locks below.
 387                  */
 388                 smbfs_rw_destroy(&np->r_rwlock);
 389                 smbfs_rw_destroy(&np->r_lkserlock);
 390                 mutex_destroy(&np->r_statelock);
 391                 cv_destroy(&np->r_cv);
 392                 /*
 393                  * Make sure that if smbnode is recycled then
 394                  * VFS count is decremented properly before
 395                  * reuse.
 396                  */
 397                 VFS_RELE(vp->v_vfsp);
 398                 vn_reinit(vp);
 399         } else {
 400                 /*
 401                  * allocate and initialize a new smbnode
 402                  */
 403                 vnode_t *new_vp;
 404 
 405                 mutex_exit(&smbfreelist_lock);
 406 
 407                 np = kmem_cache_alloc(smbnode_cache, KM_SLEEP);
 408                 new_vp = vn_alloc(KM_SLEEP);
 409 
 410                 atomic_inc_ulong((ulong_t *)&smbnodenew);
 411                 vp = new_vp;
 412         }
 413 
 414         /*
 415          * Allocate and copy the rpath we'll need below.
 416          */
 417         new_rpath = kmem_alloc(rplen + 1, KM_SLEEP);
 418         bcopy(rpath, new_rpath, rplen);
 419         new_rpath[rplen] = '\0';
 420 
 421         /* Initialize smbnode_t */
 422         bzero(np, sizeof (*np));
 423 
 424         smbfs_rw_init(&np->r_rwlock, NULL, RW_DEFAULT, NULL);
 425         smbfs_rw_init(&np->r_lkserlock, NULL, RW_DEFAULT, NULL);
 426         mutex_init(&np->r_statelock, NULL, MUTEX_DEFAULT, NULL);
 427         cv_init(&np->r_cv, NULL, CV_DEFAULT, NULL);
 428         /* cv_init(&np->r_commit.c_cv, NULL, CV_DEFAULT, NULL); */
 429 
 430         np->r_vnode = vp;
 431         np->n_mount = mi;
 432 
 433         np->n_fid = SMB_FID_UNUSED;
 434         np->n_uid = mi->smi_uid;
 435         np->n_gid = mi->smi_gid;
 436         /* Leave attributes "stale." */
 437 
 438         /*
 439          * Here NFS has avl_create(&np->r_dir, ...)
 440          * for the readdir cache (not used here).
 441          */
 442 
 443         /* Now fill in the vnode. */
 444         vn_setops(vp, smbfs_vnodeops);
 445         vp->v_data = (caddr_t)np;
 446         VFS_HOLD(vfsp);
 447         vp->v_vfsp = vfsp;
 448         vp->v_type = VNON;
 449 
 450         /*
 451          * We entered with mi->smi_hash_lk held (reader).
 452          * Retake it now, (as the writer).
 453          * Will return with it held.
 454          */
 455         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 456 
 457         /*
 458          * There is a race condition where someone else
 459          * may alloc the smbnode while no locks are held,
 460          * so check again and recover if found.
 461          */
 462         tnp = sn_hashfind(mi, rpath, rplen, &where);
 463         if (tnp != NULL) {
 464                 /*
 465                  * Lost the race.  Put the node we were building
 466                  * on the free list and return the one we found.
 467                  */
 468                 rw_exit(&mi->smi_hash_lk);
 469                 kmem_free(new_rpath, rplen + 1);
 470                 smbfs_addfree(np);
 471                 rw_enter(&mi->smi_hash_lk, RW_READER);
 472                 *newnode = 0;
 473                 return (tnp);
 474         }
 475 
 476         /*
 477          * Hash search identifies nodes by the remote path
 478          * (n_rpath) so fill that in now, before linking
 479          * this node into the node cache (AVL tree).
 480          */
 481         np->n_rpath = new_rpath;
 482         np->n_rplen = rplen;
 483         np->n_ino = smbfs_gethash(new_rpath, rplen);
 484 
 485         sn_addhash_locked(np, where);
 486         *newnode = 1;
 487         return (np);
 488 }
 489 
 490 /*
 491  * smbfs_addfree
 492  * Put an smbnode on the free list, or destroy it immediately
 493  * if it offers no value were it to be reclaimed later.  Also
 494  * destroy immediately when we have too many smbnodes, etc.
 495  *
 496  * Normally called by smbfs_inactive, but also
 497  * called in here during cleanup operations.
 498  *
 499  * From NFS: nfs_subr.c:rp_addfree
 500  */
 501 void
 502 smbfs_addfree(smbnode_t *np)
 503 {
 504         vnode_t *vp;
 505         struct vfs *vfsp;
 506         smbmntinfo_t *mi;
 507 
 508         ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
 509 
 510         vp = SMBTOV(np);
 511         ASSERT(vp->v_count >= 1);
 512 
 513         vfsp = vp->v_vfsp;
 514         mi = VFTOSMI(vfsp);
 515 
 516         /*
 517          * If there are no more references to this smbnode and:
 518          * we have too many smbnodes allocated, or if the node
 519          * is no longer accessible via the AVL tree (!RHASHED),
 520          * or an i/o error occurred while writing to the file,
 521          * or it's part of an unmounted FS, then try to destroy
 522          * it instead of putting it on the smbnode freelist.
 523          */
 524         if (np->r_count == 0 && (
 525             (np->r_flags & RHASHED) == 0 ||
 526             (np->r_error != 0) ||
 527             (vfsp->vfs_flag & VFS_UNMOUNTED) ||
 528             (smbnodenew > nsmbnode))) {
 529 
 530                 /* Try to destroy this node. */
 531 
 532                 if (np->r_flags & RHASHED) {
 533                         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 534                         mutex_enter(&vp->v_lock);
 535                         if (vp->v_count > 1) {
 536                                 VN_RELE_LOCKED(vp);
 537                                 mutex_exit(&vp->v_lock);
 538                                 rw_exit(&mi->smi_hash_lk);
 539                                 return;
 540                                 /*
 541                                  * Will get another call later,
 542                                  * via smbfs_inactive.
 543                                  */
 544                         }
 545                         mutex_exit(&vp->v_lock);
 546                         sn_rmhash_locked(np);
 547                         rw_exit(&mi->smi_hash_lk);
 548                 }
 549 
 550                 sn_inactive(np);
 551 
 552                 /*
 553                  * Recheck the vnode reference count.  We need to
 554                  * make sure that another reference has not been
 555                  * acquired while we were not holding v_lock.  The
 556                  * smbnode is not in the smbnode "hash" AVL tree, so
 557                  * the only way for a reference to have been acquired
 558                  * is for a VOP_PUTPAGE because the smbnode was marked
 559                  * with RDIRTY or for a modified page.  This vnode
 560                  * reference may have been acquired before our call
 561                  * to sn_inactive.  The i/o may have been completed,
 562                  * thus allowing sn_inactive to complete, but the
 563                  * reference to the vnode may not have been released
 564                  * yet.  In any case, the smbnode can not be destroyed
 565                  * until the other references to this vnode have been
 566                  * released.  The other references will take care of
 567                  * either destroying the smbnode or placing it on the
 568                  * smbnode freelist.  If there are no other references,
 569                  * then the smbnode may be safely destroyed.
 570                  */
 571                 mutex_enter(&vp->v_lock);
 572                 if (vp->v_count > 1) {
 573                         VN_RELE_LOCKED(vp);
 574                         mutex_exit(&vp->v_lock);
 575                         return;
 576                 }
 577                 mutex_exit(&vp->v_lock);
 578 
 579                 sn_destroy_node(np);
 580                 return;
 581         }
 582 
 583         /*
 584          * Lock the AVL tree and then recheck the reference count
 585          * to ensure that no other threads have acquired a reference
 586          * to indicate that the smbnode should not be placed on the
 587          * freelist.  If another reference has been acquired, then
 588          * just release this one and let the other thread complete
 589          * the processing of adding this smbnode to the freelist.
 590          */
 591         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 592 
 593         mutex_enter(&vp->v_lock);
 594         if (vp->v_count > 1) {
 595                 VN_RELE_LOCKED(vp);
 596                 mutex_exit(&vp->v_lock);
 597                 rw_exit(&mi->smi_hash_lk);
 598                 return;
 599         }
 600         mutex_exit(&vp->v_lock);
 601 
 602         /*
 603          * Put this node on the free list.
 604          */
 605         mutex_enter(&smbfreelist_lock);
 606         if (smbfreelist == NULL) {
 607                 np->r_freef = np;
 608                 np->r_freeb = np;
 609                 smbfreelist = np;
 610         } else {
 611                 np->r_freef = smbfreelist;
 612                 np->r_freeb = smbfreelist->r_freeb;
 613                 smbfreelist->r_freeb->r_freef = np;
 614                 smbfreelist->r_freeb = np;
 615         }
 616         mutex_exit(&smbfreelist_lock);
 617 
 618         rw_exit(&mi->smi_hash_lk);
 619 }
 620 
 621 /*
 622  * Remove an smbnode from the free list.
 623  *
 624  * The caller must be holding smbfreelist_lock and the smbnode
 625  * must be on the freelist.
 626  *
 627  * From NFS: nfs_subr.c:rp_rmfree
 628  */
 629 static void
 630 sn_rmfree(smbnode_t *np)
 631 {
 632 
 633         ASSERT(MUTEX_HELD(&smbfreelist_lock));
 634         ASSERT(np->r_freef != NULL && np->r_freeb != NULL);
 635 
 636         if (np == smbfreelist) {
 637                 smbfreelist = np->r_freef;
 638                 if (np == smbfreelist)
 639                         smbfreelist = NULL;
 640         }
 641 
 642         np->r_freeb->r_freef = np->r_freef;
 643         np->r_freef->r_freeb = np->r_freeb;
 644 
 645         np->r_freef = np->r_freeb = NULL;
 646 }
 647 
 648 /*
 649  * Put an smbnode in the "hash" AVL tree.
 650  *
 651  * The caller must be hold the rwlock as writer.
 652  *
 653  * From NFS: nfs_subr.c:rp_addhash
 654  */
 655 static void
 656 sn_addhash_locked(smbnode_t *np, avl_index_t where)
 657 {
 658         smbmntinfo_t *mi = np->n_mount;
 659 
 660         ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
 661 
 662         mutex_enter(&np->r_statelock);
 663         if ((np->r_flags & RHASHED) == 0) {
 664                 avl_insert(&mi->smi_hash_avl, np, where);
 665                 np->r_flags |= RHASHED;
 666         }
 667         mutex_exit(&np->r_statelock);
 668 }
 669 
 670 /*
 671  * Remove an smbnode from the "hash" AVL tree.
 672  *
 673  * The caller must hold the rwlock as writer.
 674  *
 675  * From NFS: nfs_subr.c:rp_rmhash_locked
 676  */
 677 static void
 678 sn_rmhash_locked(smbnode_t *np)
 679 {
 680         smbmntinfo_t *mi = np->n_mount;
 681 
 682         ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
 683 
 684         mutex_enter(&np->r_statelock);
 685         if ((np->r_flags & RHASHED) != 0) {
 686                 np->r_flags &= ~RHASHED;
 687                 avl_remove(&mi->smi_hash_avl, np);
 688         }
 689         mutex_exit(&np->r_statelock);
 690 }
 691 
 692 /*
 693  * Remove an smbnode from the "hash" AVL tree.
 694  *
 695  * The caller must not be holding the rwlock.
 696  */
 697 void
 698 smbfs_rmhash(smbnode_t *np)
 699 {
 700         smbmntinfo_t *mi = np->n_mount;
 701 
 702         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 703         sn_rmhash_locked(np);
 704         rw_exit(&mi->smi_hash_lk);
 705 }
 706 
 707 /*
 708  * Lookup an smbnode by remote pathname
 709  *
 710  * The caller must be holding the AVL rwlock, either shared or exclusive.
 711  *
 712  * From NFS: nfs_subr.c:rfind
 713  */
 714 static smbnode_t *
 715 sn_hashfind(
 716         smbmntinfo_t *mi,
 717         const char *rpath,
 718         int rplen,
 719         avl_index_t *pwhere) /* optional */
 720 {
 721         smbfs_node_hdr_t nhdr;
 722         smbnode_t *np;
 723         vnode_t *vp;
 724 
 725         ASSERT(RW_LOCK_HELD(&mi->smi_hash_lk));
 726 
 727         bzero(&nhdr, sizeof (nhdr));
 728         nhdr.hdr_n_rpath = (char *)rpath;
 729         nhdr.hdr_n_rplen = rplen;
 730 
 731         /* See smbfs_node_cmp below. */
 732         np = avl_find(&mi->smi_hash_avl, &nhdr, pwhere);
 733 
 734         if (np == NULL)
 735                 return (NULL);
 736 
 737         /*
 738          * Found it in the "hash" AVL tree.
 739          * Remove from free list, if necessary.
 740          */
 741         vp = SMBTOV(np);
 742         if (np->r_freef != NULL) {
 743                 mutex_enter(&smbfreelist_lock);
 744                 /*
 745                  * If the smbnode is on the freelist,
 746                  * then remove it and use that reference
 747                  * as the new reference.  Otherwise,
 748                  * need to increment the reference count.
 749                  */
 750                 if (np->r_freef != NULL) {
 751                         sn_rmfree(np);
 752                         mutex_exit(&smbfreelist_lock);
 753                 } else {
 754                         mutex_exit(&smbfreelist_lock);
 755                         VN_HOLD(vp);
 756                 }
 757         } else
 758                 VN_HOLD(vp);
 759 
 760         return (np);
 761 }
 762 
 763 static int
 764 smbfs_node_cmp(const void *va, const void *vb)
 765 {
 766         const smbfs_node_hdr_t *a = va;
 767         const smbfs_node_hdr_t *b = vb;
 768         int clen, diff;
 769 
 770         /*
 771          * Same semantics as strcmp, but does not
 772          * assume the strings are null terminated.
 773          */
 774         clen = (a->hdr_n_rplen < b->hdr_n_rplen) ?
 775             a->hdr_n_rplen : b->hdr_n_rplen;
 776         diff = strncmp(a->hdr_n_rpath, b->hdr_n_rpath, clen);
 777         if (diff < 0)
 778                 return (-1);
 779         if (diff > 0)
 780                 return (1);
 781         /* they match through clen */
 782         if (b->hdr_n_rplen > clen)
 783                 return (-1);
 784         if (a->hdr_n_rplen > clen)
 785                 return (1);
 786         return (0);
 787 }
 788 
 789 /*
 790  * Setup the "hash" AVL tree used for our node cache.
 791  * See: smbfs_mount, smbfs_destroy_table.
 792  */
 793 void
 794 smbfs_init_hash_avl(avl_tree_t *avl)
 795 {
 796         avl_create(avl, smbfs_node_cmp, sizeof (smbnode_t),
 797             offsetof(smbnode_t, r_avl_node));
 798 }
 799 
 800 /*
 801  * Invalidate the cached attributes for all nodes "under" the
 802  * passed-in node.  Note: the passed-in node is NOT affected by
 803  * this call.  This is used both for files under some directory
 804  * after the directory is deleted or renamed, and for extended
 805  * attribute files (named streams) under a plain file after that
 806  * file is renamed or deleted.
 807  *
 808  * Do this by walking the AVL tree starting at the passed in node,
 809  * and continuing while the visited nodes have a path prefix matching
 810  * the entire path of the passed-in node, and a separator just after
 811  * that matching path prefix.  Watch out for cases where the AVL tree
 812  * order may not exactly match the order of an FS walk, i.e.
 813  * consider this sequence:
 814  *      "foo"           (directory)
 815  *      "foo bar"       (name containing a space)
 816  *      "foo/bar"
 817  * The walk needs to skip "foo bar" and keep going until it finds
 818  * something that doesn't match the "foo" name prefix.
 819  */
 820 void
 821 smbfs_attrcache_prune(smbnode_t *top_np)
 822 {
 823         smbmntinfo_t *mi;
 824         smbnode_t *np;
 825         char *rpath;
 826         int rplen;
 827 
 828         mi = top_np->n_mount;
 829         rw_enter(&mi->smi_hash_lk, RW_READER);
 830 
 831         np = top_np;
 832         rpath = top_np->n_rpath;
 833         rplen = top_np->n_rplen;
 834         for (;;) {
 835                 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER);
 836                 if (np == NULL)
 837                         break;
 838                 if (np->n_rplen < rplen)
 839                         break;
 840                 if (0 != strncmp(np->n_rpath, rpath, rplen))
 841                         break;
 842                 if (np->n_rplen > rplen && (
 843                     np->n_rpath[rplen] == ':' ||
 844                     np->n_rpath[rplen] == '\\'))
 845                         smbfs_attrcache_remove(np);
 846         }
 847 
 848         rw_exit(&mi->smi_hash_lk);
 849 }
 850 
 851 #ifdef SMB_VNODE_DEBUG
 852 int smbfs_check_table_debug = 1;
 853 #else /* SMB_VNODE_DEBUG */
 854 int smbfs_check_table_debug = 0;
 855 #endif /* SMB_VNODE_DEBUG */
 856 
 857 
 858 /*
 859  * Return 1 if there is a active vnode belonging to this vfs in the
 860  * smbnode cache.
 861  *
 862  * Several of these checks are done without holding the usual
 863  * locks.  This is safe because destroy_smbtable(), smbfs_addfree(),
 864  * etc. will redo the necessary checks before actually destroying
 865  * any smbnodes.
 866  *
 867  * From NFS: nfs_subr.c:check_rtable
 868  *
 869  * Debugging changes here relative to NFS.
 870  * Relatively harmless, so left 'em in.
 871  */
 872 int
 873 smbfs_check_table(struct vfs *vfsp, smbnode_t *rtnp)
 874 {
 875         smbmntinfo_t *mi;
 876         smbnode_t *np;
 877         vnode_t *vp;
 878         int busycnt = 0;
 879 
 880         mi = VFTOSMI(vfsp);
 881         rw_enter(&mi->smi_hash_lk, RW_READER);
 882         for (np = avl_first(&mi->smi_hash_avl); np != NULL;
 883             np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
 884 
 885                 if (np == rtnp)
 886                         continue; /* skip the root */
 887                 vp = SMBTOV(np);
 888 
 889                 /* Now the 'busy' checks: */
 890                 /* Not on the free list? */
 891                 if (np->r_freef == NULL) {
 892                         SMBVDEBUG("!r_freef: node=0x%p, rpath=%s\n",
 893                             (void *)np, np->n_rpath);
 894                         busycnt++;
 895                 }
 896 
 897                 /* Has dirty pages? */
 898                 if (vn_has_cached_data(vp) &&
 899                     (np->r_flags & RDIRTY)) {
 900                         SMBVDEBUG("is dirty: node=0x%p, rpath=%s\n",
 901                             (void *)np, np->n_rpath);
 902                         busycnt++;
 903                 }
 904 
 905                 /* Other refs? (not reflected in v_count) */
 906                 if (np->r_count > 0) {
 907                         SMBVDEBUG("+r_count: node=0x%p, rpath=%s\n",
 908                             (void *)np, np->n_rpath);
 909                         busycnt++;
 910                 }
 911 
 912                 if (busycnt && !smbfs_check_table_debug)
 913                         break;
 914 
 915         }
 916         rw_exit(&mi->smi_hash_lk);
 917 
 918         return (busycnt);
 919 }
 920 
 921 /*
 922  * Destroy inactive vnodes from the AVL tree which belong to this
 923  * vfs.  It is essential that we destroy all inactive vnodes during a
 924  * forced unmount as well as during a normal unmount.
 925  *
 926  * Based on NFS: nfs_subr.c:destroy_rtable
 927  *
 928  * In here, we're normally destrying all or most of the AVL tree,
 929  * so the natural choice is to use avl_destroy_nodes.  However,
 930  * there may be a few busy nodes that should remain in the AVL
 931  * tree when we're done.  The solution: use a temporary tree to
 932  * hold the busy nodes until we're done destroying the old tree,
 933  * then copy the temporary tree over the (now emtpy) real tree.
 934  */
 935 void
 936 smbfs_destroy_table(struct vfs *vfsp)
 937 {
 938         avl_tree_t tmp_avl;
 939         smbmntinfo_t *mi;
 940         smbnode_t *np;
 941         smbnode_t *rlist;
 942         void *v;
 943 
 944         mi = VFTOSMI(vfsp);
 945         rlist = NULL;
 946         smbfs_init_hash_avl(&tmp_avl);
 947 
 948         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 949         v = NULL;
 950         while ((np = avl_destroy_nodes(&mi->smi_hash_avl, &v)) != NULL) {
 951 
 952                 mutex_enter(&smbfreelist_lock);
 953                 if (np->r_freef == NULL) {
 954                         /*
 955                          * Busy node (not on the free list).
 956                          * Will keep in the final AVL tree.
 957                          */
 958                         mutex_exit(&smbfreelist_lock);
 959                         avl_add(&tmp_avl, np);
 960                 } else {
 961                         /*
 962                          * It's on the free list.  Remove and
 963                          * arrange for it to be destroyed.
 964                          */
 965                         sn_rmfree(np);
 966                         mutex_exit(&smbfreelist_lock);
 967 
 968                         /*
 969                          * Last part of sn_rmhash_locked().
 970                          * NB: avl_destroy_nodes has already
 971                          * removed this from the "hash" AVL.
 972                          */
 973                         mutex_enter(&np->r_statelock);
 974                         np->r_flags &= ~RHASHED;
 975                         mutex_exit(&np->r_statelock);
 976 
 977                         /*
 978                          * Add to the list of nodes to destroy.
 979                          * Borrowing avl_child[0] for this list.
 980                          */
 981                         np->r_avl_node.avl_child[0] =
 982                             (struct avl_node *)rlist;
 983                         rlist = np;
 984                 }
 985         }
 986         avl_destroy(&mi->smi_hash_avl);
 987 
 988         /*
 989          * Replace the (now destroyed) "hash" AVL with the
 990          * temporary AVL, which restores the busy nodes.
 991          */
 992         mi->smi_hash_avl = tmp_avl;
 993         rw_exit(&mi->smi_hash_lk);
 994 
 995         /*
 996          * Now destroy the nodes on our temporary list (rlist).
 997          * This call to smbfs_addfree will end up destroying the
 998          * smbnode, but in a safe way with the appropriate set
 999          * of checks done.
1000          */
1001         while ((np = rlist) != NULL) {
1002                 rlist = (smbnode_t *)np->r_avl_node.avl_child[0];
1003                 smbfs_addfree(np);
1004         }
1005 }
1006 
1007 /*
1008  * This routine destroys all the resources associated with the smbnode
1009  * and then the smbnode itself.  Note: sn_inactive has been called.
1010  *
1011  * From NFS: nfs_subr.c:destroy_rnode
1012  */
1013 static void
1014 sn_destroy_node(smbnode_t *np)
1015 {
1016         vnode_t *vp;
1017         vfs_t *vfsp;
1018 
1019         vp = SMBTOV(np);
1020         vfsp = vp->v_vfsp;
1021 
1022         ASSERT(vp->v_count == 1);
1023         ASSERT(np->r_count == 0);
1024         ASSERT(np->r_mapcnt == 0);
1025         ASSERT(np->r_secattr.vsa_aclentp == NULL);
1026         ASSERT(np->r_cred == NULL);
1027         ASSERT(np->n_rpath == NULL);
1028         ASSERT(!(np->r_flags & RHASHED));
1029         ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
1030         atomic_dec_ulong((ulong_t *)&smbnodenew);
1031         vn_invalid(vp);
1032         vn_free(vp);
1033         kmem_cache_free(smbnode_cache, np);
1034         VFS_RELE(vfsp);
1035 }
1036 
1037 /*
1038  * From NFS rflush()
1039  * Flush all vnodes in this (or every) vfs.
1040  * Used by smbfs_sync and by smbfs_unmount.
1041  */
1042 /*ARGSUSED*/
1043 void
1044 smbfs_rflush(struct vfs *vfsp, cred_t *cr)
1045 {
1046         smbmntinfo_t *mi;
1047         smbnode_t *np;
1048         vnode_t *vp, **vplist;
1049         long num, cnt;
1050 
1051         mi = VFTOSMI(vfsp);
1052 
1053         /*
1054          * Check to see whether there is anything to do.
1055          */
1056         num = avl_numnodes(&mi->smi_hash_avl);
1057         if (num == 0)
1058                 return;
1059 
1060         /*
1061          * Allocate a slot for all currently active rnodes on the
1062          * supposition that they all may need flushing.
1063          */
1064         vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1065         cnt = 0;
1066 
1067         /*
1068          * Walk the AVL tree looking for rnodes with page
1069          * lists associated with them.  Make a list of these
1070          * files.
1071          */
1072         rw_enter(&mi->smi_hash_lk, RW_READER);
1073         for (np = avl_first(&mi->smi_hash_avl); np != NULL;
1074             np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
1075                 vp = SMBTOV(np);
1076                 /*
1077                  * Don't bother sync'ing a vp if it
1078                  * is part of virtual swap device or
1079                  * if VFS is read-only
1080                  */
1081                 if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1082                         continue;
1083                 /*
1084                  * If the vnode has pages and is marked as either
1085                  * dirty or mmap'd, hold and add this vnode to the
1086                  * list of vnodes to flush.
1087                  */
1088                 if (vn_has_cached_data(vp) &&
1089                     ((np->r_flags & RDIRTY) || np->r_mapcnt > 0)) {
1090                         VN_HOLD(vp);
1091                         vplist[cnt++] = vp;
1092                         if (cnt == num)
1093                                 break;
1094                 }
1095         }
1096         rw_exit(&mi->smi_hash_lk);
1097 
1098         /*
1099          * Flush and release all of the files on the list.
1100          */
1101         while (cnt-- > 0) {
1102                 vp = vplist[cnt];
1103                 (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
1104                 VN_RELE(vp);
1105         }
1106 
1107         kmem_free(vplist, num * sizeof (vnode_t *));
1108 }
1109 
1110 /* Here NFS has access cache stuff (nfs_subr.c) not used here */
1111 
1112 /*
1113  * Set or Clear direct I/O flag
1114  * VOP_RWLOCK() is held for write access to prevent a race condition
1115  * which would occur if a process is in the middle of a write when
1116  * directio flag gets set. It is possible that all pages may not get flushed.
1117  * From nfs_common.c
1118  */
1119 
1120 /* ARGSUSED */
1121 int
1122 smbfs_directio(vnode_t *vp, int cmd, cred_t *cr)
1123 {
1124         int     error = 0;
1125         smbnode_t       *np;
1126 
1127         np = VTOSMB(vp);
1128 
1129         if (cmd == DIRECTIO_ON) {
1130 
1131                 if (np->r_flags & RDIRECTIO)
1132                         return (0);
1133 
1134                 /*
1135                  * Flush the page cache.
1136                  */
1137 
1138                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1139 
1140                 if (np->r_flags & RDIRECTIO) {
1141                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1142                         return (0);
1143                 }
1144 
1145                 /* Here NFS also checks ->r_awcount */
1146                 if (vn_has_cached_data(vp) &&
1147                     (np->r_flags & RDIRTY) != 0) {
1148                         error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
1149                             B_INVAL, cr, NULL);
1150                         if (error) {
1151                                 if (error == ENOSPC || error == EDQUOT) {
1152                                         mutex_enter(&np->r_statelock);
1153                                         if (!np->r_error)
1154                                                 np->r_error = error;
1155                                         mutex_exit(&np->r_statelock);
1156                                 }
1157                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1158                                 return (error);
1159                         }
1160                 }
1161 
1162                 mutex_enter(&np->r_statelock);
1163                 np->r_flags |= RDIRECTIO;
1164                 mutex_exit(&np->r_statelock);
1165                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1166                 return (0);
1167         }
1168 
1169         if (cmd == DIRECTIO_OFF) {
1170                 mutex_enter(&np->r_statelock);
1171                 np->r_flags &= ~RDIRECTIO;       /* disable direct mode */
1172                 mutex_exit(&np->r_statelock);
1173                 return (0);
1174         }
1175 
1176         return (EINVAL);
1177 }
1178 
1179 static kmutex_t smbfs_newnum_lock;
1180 static uint32_t smbfs_newnum_val = 0;
1181 
1182 /*
1183  * Return a number 0..0xffffffff that's different from the last
1184  * 0xffffffff numbers this returned.  Used for unlinked files.
1185  * From NFS nfs_subr.c newnum
1186  */
1187 uint32_t
1188 smbfs_newnum(void)
1189 {
1190         uint32_t id;
1191 
1192         mutex_enter(&smbfs_newnum_lock);
1193         if (smbfs_newnum_val == 0)
1194                 smbfs_newnum_val = (uint32_t)gethrestime_sec();
1195         id = smbfs_newnum_val++;
1196         mutex_exit(&smbfs_newnum_lock);
1197         return (id);
1198 }
1199 
1200 /*
1201  * Fill in a temporary name at buf
1202  */
1203 int
1204 smbfs_newname(char *buf, size_t buflen)
1205 {
1206         uint_t id;
1207         int n;
1208 
1209         id = smbfs_newnum();
1210         n = snprintf(buf, buflen, "~$smbfs%08X", id);
1211         return (n);
1212 }
1213 
1214 
1215 /*
1216  * initialize resources that are used by smbfs_subr.c
1217  * this is called from the _init() routine (by the way of smbfs_clntinit())
1218  *
1219  * From NFS: nfs_subr.c:nfs_subrinit
1220  */
1221 int
1222 smbfs_subrinit(void)
1223 {
1224         ulong_t nsmbnode_max;
1225 
1226         /*
1227          * Allocate and initialize the smbnode cache
1228          */
1229         if (nsmbnode <= 0)
1230                 nsmbnode = ncsize; /* dnlc.h */
1231         nsmbnode_max = (ulong_t)((kmem_maxavail() >> 2) /
1232             sizeof (struct smbnode));
1233         if (nsmbnode > nsmbnode_max || (nsmbnode == 0 && ncsize == 0)) {
1234                 zcmn_err(GLOBAL_ZONEID, CE_NOTE,
1235                     "setting nsmbnode to max value of %ld", nsmbnode_max);
1236                 nsmbnode = nsmbnode_max;
1237         }
1238 
1239         smbnode_cache = kmem_cache_create("smbnode_cache", sizeof (smbnode_t),
1240             0, NULL, NULL, smbfs_kmem_reclaim, NULL, NULL, 0);
1241 
1242         /*
1243          * Initialize the various mutexes and reader/writer locks
1244          */
1245         mutex_init(&smbfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
1246         mutex_init(&smbfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
1247 
1248         /*
1249          * Assign unique major number for all smbfs mounts
1250          */
1251         if ((smbfs_major = getudev()) == -1) {
1252                 zcmn_err(GLOBAL_ZONEID, CE_WARN,
1253                     "smbfs: init: can't get unique device number");
1254                 smbfs_major = 0;
1255         }
1256         smbfs_minor = 0;
1257 
1258         return (0);
1259 }
1260 
1261 /*
1262  * free smbfs hash table, etc.
1263  * From NFS: nfs_subr.c:nfs_subrfini
1264  */
1265 void
1266 smbfs_subrfini(void)
1267 {
1268 
1269         /*
1270          * Destroy the smbnode cache
1271          */
1272         kmem_cache_destroy(smbnode_cache);
1273 
1274         /*
1275          * Destroy the various mutexes and reader/writer locks
1276          */
1277         mutex_destroy(&smbfreelist_lock);
1278         mutex_destroy(&smbfs_minor_lock);
1279 }
1280 
1281 /* rddir_cache ? */
1282 
1283 /*
1284  * Support functions for smbfs_kmem_reclaim
1285  */
1286 
1287 static void
1288 smbfs_node_reclaim(void)
1289 {
1290         smbmntinfo_t *mi;
1291         smbnode_t *np;
1292         vnode_t *vp;
1293 
1294         mutex_enter(&smbfreelist_lock);
1295         while ((np = smbfreelist) != NULL) {
1296                 sn_rmfree(np);
1297                 mutex_exit(&smbfreelist_lock);
1298                 if (np->r_flags & RHASHED) {
1299                         vp = SMBTOV(np);
1300                         mi = np->n_mount;
1301                         rw_enter(&mi->smi_hash_lk, RW_WRITER);
1302                         mutex_enter(&vp->v_lock);
1303                         if (vp->v_count > 1) {
1304                                 VN_RELE_LOCKED(vp);
1305                                 mutex_exit(&vp->v_lock);
1306                                 rw_exit(&mi->smi_hash_lk);
1307                                 mutex_enter(&smbfreelist_lock);
1308                                 continue;
1309                         }
1310                         mutex_exit(&vp->v_lock);
1311                         sn_rmhash_locked(np);
1312                         rw_exit(&mi->smi_hash_lk);
1313                 }
1314                 /*
1315                  * This call to smbfs_addfree will end up destroying the
1316                  * smbnode, but in a safe way with the appropriate set
1317                  * of checks done.
1318                  */
1319                 smbfs_addfree(np);
1320                 mutex_enter(&smbfreelist_lock);
1321         }
1322         mutex_exit(&smbfreelist_lock);
1323 }
1324 
1325 /*
1326  * Called by kmem_cache_alloc ask us if we could
1327  * "Please give back some memory!"
1328  *
1329  * Todo: dump nodes from the free list?
1330  */
1331 /*ARGSUSED*/
1332 void
1333 smbfs_kmem_reclaim(void *cdrarg)
1334 {
1335         smbfs_node_reclaim();
1336 }
1337 
1338 /*
1339  * Here NFS has failover stuff and
1340  * nfs_rw_xxx - see smbfs_rwlock.c
1341  */