1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
26 * All rights reserved.
27 */
28 /*
29 * Copyright (c) 2017 by Delphix. All rights reserved.
30 */
31
32 /*
33 * Node hash implementation initially borrowed from NFS (nfs_subr.c)
34 * but then heavily modified. It's no longer an array of hash lists,
35 * but an AVL tree per mount point. More on this below.
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/time.h>
41 #include <sys/vnode.h>
42 #include <sys/bitmap.h>
43 #include <sys/dnlc.h>
44 #include <sys/kmem.h>
45 #include <sys/sunddi.h>
46 #include <sys/sysmacros.h>
47 #include <sys/fcntl.h>
48
49 #include <netsmb/smb_osdep.h>
50
51 #include <netsmb/smb.h>
52 #include <netsmb/smb_conn.h>
53 #include <netsmb/smb_subr.h>
54 #include <netsmb/smb_rq.h>
55
56 #include <smbfs/smbfs.h>
57 #include <smbfs/smbfs_node.h>
58 #include <smbfs/smbfs_subr.h>
59
60 /*
61 * The AVL trees (now per-mount) allow finding an smbfs node by its
62 * full remote path name. It also allows easy traversal of all nodes
63 * below (path wise) any given node. A reader/writer lock for each
64 * (per mount) AVL tree is used to control access and to synchronize
65 * lookups, additions, and deletions from that AVL tree.
66 *
67 * Previously, this code use a global array of hash chains, each with
68 * its own rwlock. A few struct members, functions, and comments may
69 * still refer to a "hash", and those should all now be considered to
70 * refer to the per-mount AVL tree that replaced the old hash chains.
71 * (i.e. member smi_hash_lk, function sn_hashfind, etc.)
72 *
73 * The smbnode freelist is organized as a doubly linked list with
74 * a head pointer. Additions and deletions are synchronized via
75 * a single mutex.
76 *
77 * In order to add an smbnode to the free list, it must be linked into
78 * the mount's AVL tree and the exclusive lock for the AVL must be held.
79 * If an smbnode is not linked into the AVL tree, then it is destroyed
80 * because it represents no valuable information that can be reused
81 * about the file. The exclusive lock for the AVL tree must be held
82 * in order to prevent a lookup in the AVL tree from finding the
83 * smbnode and using it and assuming that the smbnode is not on the
84 * freelist. The lookup in the AVL tree will have the AVL tree lock
85 * held, either exclusive or shared.
86 *
87 * The vnode reference count for each smbnode is not allowed to drop
88 * below 1. This prevents external entities, such as the VM
89 * subsystem, from acquiring references to vnodes already on the
90 * freelist and then trying to place them back on the freelist
91 * when their reference is released. This means that the when an
92 * smbnode is looked up in the AVL tree, then either the smbnode
93 * is removed from the freelist and that reference is tranfered to
94 * the new reference or the vnode reference count must be incremented
95 * accordingly. The mutex for the freelist must be held in order to
96 * accurately test to see if the smbnode is on the freelist or not.
97 * The AVL tree lock might be held shared and it is possible that
98 * two different threads may race to remove the smbnode from the
99 * freelist. This race can be resolved by holding the mutex for the
100 * freelist. Please note that the mutex for the freelist does not
101 * need to held if the smbnode is not on the freelist. It can not be
102 * placed on the freelist due to the requirement that the thread
103 * putting the smbnode on the freelist must hold the exclusive lock
104 * for the AVL tree and the thread doing the lookup in the AVL tree
105 * is holding either a shared or exclusive lock for the AVL tree.
106 *
107 * The lock ordering is:
108 *
109 * AVL tree lock -> vnode lock
110 * AVL tree lock -> freelist lock
111 */
112
113 static kmutex_t smbfreelist_lock;
114 static smbnode_t *smbfreelist = NULL;
115 static ulong_t smbnodenew = 0;
116 long nsmbnode = 0;
117
118 static struct kmem_cache *smbnode_cache;
119
120 static const vsecattr_t smbfs_vsa0 = { 0 };
121
122 /*
123 * Mutex to protect the following variables:
124 * smbfs_major
125 * smbfs_minor
126 */
127 kmutex_t smbfs_minor_lock;
128 int smbfs_major;
129 int smbfs_minor;
130
131 /* See smbfs_node_findcreate() */
132 struct smbfattr smbfs_fattr0;
133
134 /*
135 * Local functions.
136 * SN for Smb Node
137 */
138 static void sn_rmfree(smbnode_t *);
139 static void sn_inactive(smbnode_t *);
140 static void sn_addhash_locked(smbnode_t *, avl_index_t);
141 static void sn_rmhash_locked(smbnode_t *);
142 static void sn_destroy_node(smbnode_t *);
143 void smbfs_kmem_reclaim(void *cdrarg);
144
145 static smbnode_t *
146 sn_hashfind(smbmntinfo_t *, const char *, int, avl_index_t *);
147
148 static smbnode_t *
149 make_smbnode(smbmntinfo_t *, const char *, int, int *);
150
151 /*
152 * Free the resources associated with an smbnode.
153 * Note: This is different from smbfs_inactive
154 *
155 * From NFS: nfs_subr.c:rinactive
156 */
157 static void
158 sn_inactive(smbnode_t *np)
159 {
160 vsecattr_t ovsa;
161 cred_t *oldcr;
162 char *orpath;
163 int orplen;
164 vnode_t *vp;
165
166 /*
167 * Here NFS has:
168 * Flush and invalidate all pages (done by caller)
169 * Free any held credentials and caches...
170 * etc. (See NFS code)
171 */
172 mutex_enter(&np->r_statelock);
173
174 ovsa = np->r_secattr;
175 np->r_secattr = smbfs_vsa0;
176 np->r_sectime = 0;
177
178 oldcr = np->r_cred;
179 np->r_cred = NULL;
180
181 orpath = np->n_rpath;
182 orplen = np->n_rplen;
183 np->n_rpath = NULL;
184 np->n_rplen = 0;
185
186 mutex_exit(&np->r_statelock);
187
188 vp = SMBTOV(np);
189 if (vn_has_cached_data(vp)) {
190 ASSERT3P(vp,==,NULL);
191 }
192
193 if (ovsa.vsa_aclentp != NULL)
194 kmem_free(ovsa.vsa_aclentp, ovsa.vsa_aclentsz);
195
196 if (oldcr != NULL)
197 crfree(oldcr);
198
199 if (orpath != NULL)
200 kmem_free(orpath, orplen + 1);
201 }
202
203 /*
204 * Find and optionally create an smbnode for the passed
205 * mountinfo, directory, separator, and name. If the
206 * desired smbnode already exists, return a reference.
207 * If the file attributes pointer is non-null, the node
208 * is created if necessary and linked into the AVL tree.
209 *
210 * Callers that need a node created but don't have the
211 * real attributes pass smbfs_fattr0 to force creation.
212 *
213 * Note: make_smbnode() may upgrade the "hash" lock to exclusive.
214 *
215 * Based on NFS: nfs_subr.c:makenfsnode
216 */
217 smbnode_t *
218 smbfs_node_findcreate(
219 smbmntinfo_t *mi,
220 const char *dirnm,
221 int dirlen,
222 const char *name,
223 int nmlen,
224 char sep,
225 struct smbfattr *fap)
226 {
227 char tmpbuf[256];
228 size_t rpalloc;
229 char *p, *rpath;
230 int rplen;
231 smbnode_t *np;
232 vnode_t *vp;
233 int newnode;
234
235 /*
236 * Build the search string, either in tmpbuf or
237 * in allocated memory if larger than tmpbuf.
238 */
239 rplen = dirlen;
240 if (sep != '\0')
241 rplen++;
242 rplen += nmlen;
243 if (rplen < sizeof (tmpbuf)) {
244 /* use tmpbuf */
245 rpalloc = 0;
246 rpath = tmpbuf;
247 } else {
248 rpalloc = rplen + 1;
249 rpath = kmem_alloc(rpalloc, KM_SLEEP);
250 }
251 p = rpath;
252 bcopy(dirnm, p, dirlen);
253 p += dirlen;
254 if (sep != '\0')
255 *p++ = sep;
256 if (name != NULL) {
257 bcopy(name, p, nmlen);
258 p += nmlen;
259 }
260 ASSERT(p == rpath + rplen);
261
262 /*
263 * Find or create a node with this path.
264 */
265 rw_enter(&mi->smi_hash_lk, RW_READER);
266 if (fap == NULL)
267 np = sn_hashfind(mi, rpath, rplen, NULL);
268 else
269 np = make_smbnode(mi, rpath, rplen, &newnode);
270 rw_exit(&mi->smi_hash_lk);
271
272 if (rpalloc)
273 kmem_free(rpath, rpalloc);
274
275 if (fap == NULL) {
276 /*
277 * Caller is "just looking" (no create)
278 * so np may or may not be NULL here.
279 * Either way, we're done.
280 */
281 return (np);
282 }
283
284 /*
285 * We should have a node, possibly created.
286 * Do we have (real) attributes to apply?
287 */
288 ASSERT(np != NULL);
289 if (fap == &smbfs_fattr0)
290 return (np);
291
292 /*
293 * Apply the given attributes to this node,
294 * dealing with any cache impact, etc.
295 */
296 vp = SMBTOV(np);
297 smbfs_attrcache_fa(vp, fap);
298
299 /*
300 * Note NFS sets vp->v_type here, assuming it
301 * can never change for the life of a node.
302 * We allow v_type to change, and set it in
303 * smbfs_attrcache(). Also: mode, uid, gid
304 */
305 return (np);
306 }
307
308 /*
309 * Here NFS has: nfs_subr.c:rtablehash
310 * We use smbfs_hash().
311 */
312
313 /*
314 * Find or create an smbnode.
315 * From NFS: nfs_subr.c:make_rnode
316 */
317 static smbnode_t *
318 make_smbnode(
319 smbmntinfo_t *mi,
320 const char *rpath,
321 int rplen,
322 int *newnode)
323 {
324 smbnode_t *np;
325 smbnode_t *tnp;
326 vnode_t *vp;
327 vfs_t *vfsp;
328 avl_index_t where;
329 char *new_rpath = NULL;
330
331 ASSERT(RW_READ_HELD(&mi->smi_hash_lk));
332 vfsp = mi->smi_vfsp;
333
334 start:
335 np = sn_hashfind(mi, rpath, rplen, NULL);
336 if (np != NULL) {
337 *newnode = 0;
338 return (np);
339 }
340
341 /* Note: will retake this lock below. */
342 rw_exit(&mi->smi_hash_lk);
343
344 /*
345 * see if we can find something on the freelist
346 */
347 mutex_enter(&smbfreelist_lock);
348 if (smbfreelist != NULL && smbnodenew >= nsmbnode) {
349 np = smbfreelist;
350 sn_rmfree(np);
351 mutex_exit(&smbfreelist_lock);
352
353 vp = SMBTOV(np);
354
355 if (np->r_flags & RHASHED) {
356 smbmntinfo_t *tmp_mi = np->n_mount;
357 ASSERT(tmp_mi != NULL);
358 rw_enter(&tmp_mi->smi_hash_lk, RW_WRITER);
359 mutex_enter(&vp->v_lock);
360 if (vp->v_count > 1) {
361 VN_RELE_LOCKED(vp);
362 mutex_exit(&vp->v_lock);
363 rw_exit(&tmp_mi->smi_hash_lk);
364 /* start over */
365 rw_enter(&mi->smi_hash_lk, RW_READER);
366 goto start;
367 }
368 mutex_exit(&vp->v_lock);
369 sn_rmhash_locked(np);
370 rw_exit(&tmp_mi->smi_hash_lk);
371 }
372
373 sn_inactive(np);
374
375 mutex_enter(&vp->v_lock);
376 if (vp->v_count > 1) {
377 VN_RELE_LOCKED(vp);
378 mutex_exit(&vp->v_lock);
379 rw_enter(&mi->smi_hash_lk, RW_READER);
380 goto start;
381 }
382 mutex_exit(&vp->v_lock);
383 vn_invalid(vp);
384 /*
385 * destroy old locks before bzero'ing and
386 * recreating the locks below.
387 */
388 smbfs_rw_destroy(&np->r_rwlock);
389 smbfs_rw_destroy(&np->r_lkserlock);
390 mutex_destroy(&np->r_statelock);
391 cv_destroy(&np->r_cv);
392 /*
393 * Make sure that if smbnode is recycled then
394 * VFS count is decremented properly before
395 * reuse.
396 */
397 VFS_RELE(vp->v_vfsp);
398 vn_reinit(vp);
399 } else {
400 /*
401 * allocate and initialize a new smbnode
402 */
403 vnode_t *new_vp;
404
405 mutex_exit(&smbfreelist_lock);
406
407 np = kmem_cache_alloc(smbnode_cache, KM_SLEEP);
408 new_vp = vn_alloc(KM_SLEEP);
409
410 atomic_inc_ulong((ulong_t *)&smbnodenew);
411 vp = new_vp;
412 }
413
414 /*
415 * Allocate and copy the rpath we'll need below.
416 */
417 new_rpath = kmem_alloc(rplen + 1, KM_SLEEP);
418 bcopy(rpath, new_rpath, rplen);
419 new_rpath[rplen] = '\0';
420
421 /* Initialize smbnode_t */
422 bzero(np, sizeof (*np));
423
424 smbfs_rw_init(&np->r_rwlock, NULL, RW_DEFAULT, NULL);
425 smbfs_rw_init(&np->r_lkserlock, NULL, RW_DEFAULT, NULL);
426 mutex_init(&np->r_statelock, NULL, MUTEX_DEFAULT, NULL);
427 cv_init(&np->r_cv, NULL, CV_DEFAULT, NULL);
428 /* cv_init(&np->r_commit.c_cv, NULL, CV_DEFAULT, NULL); */
429
430 np->r_vnode = vp;
431 np->n_mount = mi;
432
433 np->n_fid = SMB_FID_UNUSED;
434 np->n_uid = mi->smi_uid;
435 np->n_gid = mi->smi_gid;
436 /* Leave attributes "stale." */
437
438 /*
439 * Here NFS has avl_create(&np->r_dir, ...)
440 * for the readdir cache (not used here).
441 */
442
443 /* Now fill in the vnode. */
444 vn_setops(vp, smbfs_vnodeops);
445 vp->v_data = (caddr_t)np;
446 VFS_HOLD(vfsp);
447 vp->v_vfsp = vfsp;
448 vp->v_type = VNON;
449
450 /*
451 * We entered with mi->smi_hash_lk held (reader).
452 * Retake it now, (as the writer).
453 * Will return with it held.
454 */
455 rw_enter(&mi->smi_hash_lk, RW_WRITER);
456
457 /*
458 * There is a race condition where someone else
459 * may alloc the smbnode while no locks are held,
460 * so check again and recover if found.
461 */
462 tnp = sn_hashfind(mi, rpath, rplen, &where);
463 if (tnp != NULL) {
464 /*
465 * Lost the race. Put the node we were building
466 * on the free list and return the one we found.
467 */
468 rw_exit(&mi->smi_hash_lk);
469 kmem_free(new_rpath, rplen + 1);
470 smbfs_addfree(np);
471 rw_enter(&mi->smi_hash_lk, RW_READER);
472 *newnode = 0;
473 return (tnp);
474 }
475
476 /*
477 * Hash search identifies nodes by the remote path
478 * (n_rpath) so fill that in now, before linking
479 * this node into the node cache (AVL tree).
480 */
481 np->n_rpath = new_rpath;
482 np->n_rplen = rplen;
483 np->n_ino = smbfs_gethash(new_rpath, rplen);
484
485 sn_addhash_locked(np, where);
486 *newnode = 1;
487 return (np);
488 }
489
490 /*
491 * smbfs_addfree
492 * Put an smbnode on the free list, or destroy it immediately
493 * if it offers no value were it to be reclaimed later. Also
494 * destroy immediately when we have too many smbnodes, etc.
495 *
496 * Normally called by smbfs_inactive, but also
497 * called in here during cleanup operations.
498 *
499 * From NFS: nfs_subr.c:rp_addfree
500 */
501 void
502 smbfs_addfree(smbnode_t *np)
503 {
504 vnode_t *vp;
505 struct vfs *vfsp;
506 smbmntinfo_t *mi;
507
508 ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
509
510 vp = SMBTOV(np);
511 ASSERT(vp->v_count >= 1);
512
513 vfsp = vp->v_vfsp;
514 mi = VFTOSMI(vfsp);
515
516 /*
517 * If there are no more references to this smbnode and:
518 * we have too many smbnodes allocated, or if the node
519 * is no longer accessible via the AVL tree (!RHASHED),
520 * or an i/o error occurred while writing to the file,
521 * or it's part of an unmounted FS, then try to destroy
522 * it instead of putting it on the smbnode freelist.
523 */
524 if (np->r_count == 0 && (
525 (np->r_flags & RHASHED) == 0 ||
526 (np->r_error != 0) ||
527 (vfsp->vfs_flag & VFS_UNMOUNTED) ||
528 (smbnodenew > nsmbnode))) {
529
530 /* Try to destroy this node. */
531
532 if (np->r_flags & RHASHED) {
533 rw_enter(&mi->smi_hash_lk, RW_WRITER);
534 mutex_enter(&vp->v_lock);
535 if (vp->v_count > 1) {
536 VN_RELE_LOCKED(vp);
537 mutex_exit(&vp->v_lock);
538 rw_exit(&mi->smi_hash_lk);
539 return;
540 /*
541 * Will get another call later,
542 * via smbfs_inactive.
543 */
544 }
545 mutex_exit(&vp->v_lock);
546 sn_rmhash_locked(np);
547 rw_exit(&mi->smi_hash_lk);
548 }
549
550 sn_inactive(np);
551
552 /*
553 * Recheck the vnode reference count. We need to
554 * make sure that another reference has not been
555 * acquired while we were not holding v_lock. The
556 * smbnode is not in the smbnode "hash" AVL tree, so
557 * the only way for a reference to have been acquired
558 * is for a VOP_PUTPAGE because the smbnode was marked
559 * with RDIRTY or for a modified page. This vnode
560 * reference may have been acquired before our call
561 * to sn_inactive. The i/o may have been completed,
562 * thus allowing sn_inactive to complete, but the
563 * reference to the vnode may not have been released
564 * yet. In any case, the smbnode can not be destroyed
565 * until the other references to this vnode have been
566 * released. The other references will take care of
567 * either destroying the smbnode or placing it on the
568 * smbnode freelist. If there are no other references,
569 * then the smbnode may be safely destroyed.
570 */
571 mutex_enter(&vp->v_lock);
572 if (vp->v_count > 1) {
573 VN_RELE_LOCKED(vp);
574 mutex_exit(&vp->v_lock);
575 return;
576 }
577 mutex_exit(&vp->v_lock);
578
579 sn_destroy_node(np);
580 return;
581 }
582
583 /*
584 * Lock the AVL tree and then recheck the reference count
585 * to ensure that no other threads have acquired a reference
586 * to indicate that the smbnode should not be placed on the
587 * freelist. If another reference has been acquired, then
588 * just release this one and let the other thread complete
589 * the processing of adding this smbnode to the freelist.
590 */
591 rw_enter(&mi->smi_hash_lk, RW_WRITER);
592
593 mutex_enter(&vp->v_lock);
594 if (vp->v_count > 1) {
595 VN_RELE_LOCKED(vp);
596 mutex_exit(&vp->v_lock);
597 rw_exit(&mi->smi_hash_lk);
598 return;
599 }
600 mutex_exit(&vp->v_lock);
601
602 /*
603 * Put this node on the free list.
604 */
605 mutex_enter(&smbfreelist_lock);
606 if (smbfreelist == NULL) {
607 np->r_freef = np;
608 np->r_freeb = np;
609 smbfreelist = np;
610 } else {
611 np->r_freef = smbfreelist;
612 np->r_freeb = smbfreelist->r_freeb;
613 smbfreelist->r_freeb->r_freef = np;
614 smbfreelist->r_freeb = np;
615 }
616 mutex_exit(&smbfreelist_lock);
617
618 rw_exit(&mi->smi_hash_lk);
619 }
620
621 /*
622 * Remove an smbnode from the free list.
623 *
624 * The caller must be holding smbfreelist_lock and the smbnode
625 * must be on the freelist.
626 *
627 * From NFS: nfs_subr.c:rp_rmfree
628 */
629 static void
630 sn_rmfree(smbnode_t *np)
631 {
632
633 ASSERT(MUTEX_HELD(&smbfreelist_lock));
634 ASSERT(np->r_freef != NULL && np->r_freeb != NULL);
635
636 if (np == smbfreelist) {
637 smbfreelist = np->r_freef;
638 if (np == smbfreelist)
639 smbfreelist = NULL;
640 }
641
642 np->r_freeb->r_freef = np->r_freef;
643 np->r_freef->r_freeb = np->r_freeb;
644
645 np->r_freef = np->r_freeb = NULL;
646 }
647
648 /*
649 * Put an smbnode in the "hash" AVL tree.
650 *
651 * The caller must be hold the rwlock as writer.
652 *
653 * From NFS: nfs_subr.c:rp_addhash
654 */
655 static void
656 sn_addhash_locked(smbnode_t *np, avl_index_t where)
657 {
658 smbmntinfo_t *mi = np->n_mount;
659
660 ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
661
662 mutex_enter(&np->r_statelock);
663 if ((np->r_flags & RHASHED) == 0) {
664 avl_insert(&mi->smi_hash_avl, np, where);
665 np->r_flags |= RHASHED;
666 }
667 mutex_exit(&np->r_statelock);
668 }
669
670 /*
671 * Remove an smbnode from the "hash" AVL tree.
672 *
673 * The caller must hold the rwlock as writer.
674 *
675 * From NFS: nfs_subr.c:rp_rmhash_locked
676 */
677 static void
678 sn_rmhash_locked(smbnode_t *np)
679 {
680 smbmntinfo_t *mi = np->n_mount;
681
682 ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
683
684 mutex_enter(&np->r_statelock);
685 if ((np->r_flags & RHASHED) != 0) {
686 np->r_flags &= ~RHASHED;
687 avl_remove(&mi->smi_hash_avl, np);
688 }
689 mutex_exit(&np->r_statelock);
690 }
691
692 /*
693 * Remove an smbnode from the "hash" AVL tree.
694 *
695 * The caller must not be holding the rwlock.
696 */
697 void
698 smbfs_rmhash(smbnode_t *np)
699 {
700 smbmntinfo_t *mi = np->n_mount;
701
702 rw_enter(&mi->smi_hash_lk, RW_WRITER);
703 sn_rmhash_locked(np);
704 rw_exit(&mi->smi_hash_lk);
705 }
706
707 /*
708 * Lookup an smbnode by remote pathname
709 *
710 * The caller must be holding the AVL rwlock, either shared or exclusive.
711 *
712 * From NFS: nfs_subr.c:rfind
713 */
714 static smbnode_t *
715 sn_hashfind(
716 smbmntinfo_t *mi,
717 const char *rpath,
718 int rplen,
719 avl_index_t *pwhere) /* optional */
720 {
721 smbfs_node_hdr_t nhdr;
722 smbnode_t *np;
723 vnode_t *vp;
724
725 ASSERT(RW_LOCK_HELD(&mi->smi_hash_lk));
726
727 bzero(&nhdr, sizeof (nhdr));
728 nhdr.hdr_n_rpath = (char *)rpath;
729 nhdr.hdr_n_rplen = rplen;
730
731 /* See smbfs_node_cmp below. */
732 np = avl_find(&mi->smi_hash_avl, &nhdr, pwhere);
733
734 if (np == NULL)
735 return (NULL);
736
737 /*
738 * Found it in the "hash" AVL tree.
739 * Remove from free list, if necessary.
740 */
741 vp = SMBTOV(np);
742 if (np->r_freef != NULL) {
743 mutex_enter(&smbfreelist_lock);
744 /*
745 * If the smbnode is on the freelist,
746 * then remove it and use that reference
747 * as the new reference. Otherwise,
748 * need to increment the reference count.
749 */
750 if (np->r_freef != NULL) {
751 sn_rmfree(np);
752 mutex_exit(&smbfreelist_lock);
753 } else {
754 mutex_exit(&smbfreelist_lock);
755 VN_HOLD(vp);
756 }
757 } else
758 VN_HOLD(vp);
759
760 return (np);
761 }
762
763 static int
764 smbfs_node_cmp(const void *va, const void *vb)
765 {
766 const smbfs_node_hdr_t *a = va;
767 const smbfs_node_hdr_t *b = vb;
768 int clen, diff;
769
770 /*
771 * Same semantics as strcmp, but does not
772 * assume the strings are null terminated.
773 */
774 clen = (a->hdr_n_rplen < b->hdr_n_rplen) ?
775 a->hdr_n_rplen : b->hdr_n_rplen;
776 diff = strncmp(a->hdr_n_rpath, b->hdr_n_rpath, clen);
777 if (diff < 0)
778 return (-1);
779 if (diff > 0)
780 return (1);
781 /* they match through clen */
782 if (b->hdr_n_rplen > clen)
783 return (-1);
784 if (a->hdr_n_rplen > clen)
785 return (1);
786 return (0);
787 }
788
789 /*
790 * Setup the "hash" AVL tree used for our node cache.
791 * See: smbfs_mount, smbfs_destroy_table.
792 */
793 void
794 smbfs_init_hash_avl(avl_tree_t *avl)
795 {
796 avl_create(avl, smbfs_node_cmp, sizeof (smbnode_t),
797 offsetof(smbnode_t, r_avl_node));
798 }
799
800 /*
801 * Invalidate the cached attributes for all nodes "under" the
802 * passed-in node. Note: the passed-in node is NOT affected by
803 * this call. This is used both for files under some directory
804 * after the directory is deleted or renamed, and for extended
805 * attribute files (named streams) under a plain file after that
806 * file is renamed or deleted.
807 *
808 * Do this by walking the AVL tree starting at the passed in node,
809 * and continuing while the visited nodes have a path prefix matching
810 * the entire path of the passed-in node, and a separator just after
811 * that matching path prefix. Watch out for cases where the AVL tree
812 * order may not exactly match the order of an FS walk, i.e.
813 * consider this sequence:
814 * "foo" (directory)
815 * "foo bar" (name containing a space)
816 * "foo/bar"
817 * The walk needs to skip "foo bar" and keep going until it finds
818 * something that doesn't match the "foo" name prefix.
819 */
820 void
821 smbfs_attrcache_prune(smbnode_t *top_np)
822 {
823 smbmntinfo_t *mi;
824 smbnode_t *np;
825 char *rpath;
826 int rplen;
827
828 mi = top_np->n_mount;
829 rw_enter(&mi->smi_hash_lk, RW_READER);
830
831 np = top_np;
832 rpath = top_np->n_rpath;
833 rplen = top_np->n_rplen;
834 for (;;) {
835 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER);
836 if (np == NULL)
837 break;
838 if (np->n_rplen < rplen)
839 break;
840 if (0 != strncmp(np->n_rpath, rpath, rplen))
841 break;
842 if (np->n_rplen > rplen && (
843 np->n_rpath[rplen] == ':' ||
844 np->n_rpath[rplen] == '\\'))
845 smbfs_attrcache_remove(np);
846 }
847
848 rw_exit(&mi->smi_hash_lk);
849 }
850
851 #ifdef SMB_VNODE_DEBUG
852 int smbfs_check_table_debug = 1;
853 #else /* SMB_VNODE_DEBUG */
854 int smbfs_check_table_debug = 0;
855 #endif /* SMB_VNODE_DEBUG */
856
857
858 /*
859 * Return 1 if there is a active vnode belonging to this vfs in the
860 * smbnode cache.
861 *
862 * Several of these checks are done without holding the usual
863 * locks. This is safe because destroy_smbtable(), smbfs_addfree(),
864 * etc. will redo the necessary checks before actually destroying
865 * any smbnodes.
866 *
867 * From NFS: nfs_subr.c:check_rtable
868 *
869 * Debugging changes here relative to NFS.
870 * Relatively harmless, so left 'em in.
871 */
872 int
873 smbfs_check_table(struct vfs *vfsp, smbnode_t *rtnp)
874 {
875 smbmntinfo_t *mi;
876 smbnode_t *np;
877 vnode_t *vp;
878 int busycnt = 0;
879
880 mi = VFTOSMI(vfsp);
881 rw_enter(&mi->smi_hash_lk, RW_READER);
882 for (np = avl_first(&mi->smi_hash_avl); np != NULL;
883 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
884
885 if (np == rtnp)
886 continue; /* skip the root */
887 vp = SMBTOV(np);
888
889 /* Now the 'busy' checks: */
890 /* Not on the free list? */
891 if (np->r_freef == NULL) {
892 SMBVDEBUG("!r_freef: node=0x%p, rpath=%s\n",
893 (void *)np, np->n_rpath);
894 busycnt++;
895 }
896
897 /* Has dirty pages? */
898 if (vn_has_cached_data(vp) &&
899 (np->r_flags & RDIRTY)) {
900 SMBVDEBUG("is dirty: node=0x%p, rpath=%s\n",
901 (void *)np, np->n_rpath);
902 busycnt++;
903 }
904
905 /* Other refs? (not reflected in v_count) */
906 if (np->r_count > 0) {
907 SMBVDEBUG("+r_count: node=0x%p, rpath=%s\n",
908 (void *)np, np->n_rpath);
909 busycnt++;
910 }
911
912 if (busycnt && !smbfs_check_table_debug)
913 break;
914
915 }
916 rw_exit(&mi->smi_hash_lk);
917
918 return (busycnt);
919 }
920
921 /*
922 * Destroy inactive vnodes from the AVL tree which belong to this
923 * vfs. It is essential that we destroy all inactive vnodes during a
924 * forced unmount as well as during a normal unmount.
925 *
926 * Based on NFS: nfs_subr.c:destroy_rtable
927 *
928 * In here, we're normally destrying all or most of the AVL tree,
929 * so the natural choice is to use avl_destroy_nodes. However,
930 * there may be a few busy nodes that should remain in the AVL
931 * tree when we're done. The solution: use a temporary tree to
932 * hold the busy nodes until we're done destroying the old tree,
933 * then copy the temporary tree over the (now emtpy) real tree.
934 */
935 void
936 smbfs_destroy_table(struct vfs *vfsp)
937 {
938 avl_tree_t tmp_avl;
939 smbmntinfo_t *mi;
940 smbnode_t *np;
941 smbnode_t *rlist;
942 void *v;
943
944 mi = VFTOSMI(vfsp);
945 rlist = NULL;
946 smbfs_init_hash_avl(&tmp_avl);
947
948 rw_enter(&mi->smi_hash_lk, RW_WRITER);
949 v = NULL;
950 while ((np = avl_destroy_nodes(&mi->smi_hash_avl, &v)) != NULL) {
951
952 mutex_enter(&smbfreelist_lock);
953 if (np->r_freef == NULL) {
954 /*
955 * Busy node (not on the free list).
956 * Will keep in the final AVL tree.
957 */
958 mutex_exit(&smbfreelist_lock);
959 avl_add(&tmp_avl, np);
960 } else {
961 /*
962 * It's on the free list. Remove and
963 * arrange for it to be destroyed.
964 */
965 sn_rmfree(np);
966 mutex_exit(&smbfreelist_lock);
967
968 /*
969 * Last part of sn_rmhash_locked().
970 * NB: avl_destroy_nodes has already
971 * removed this from the "hash" AVL.
972 */
973 mutex_enter(&np->r_statelock);
974 np->r_flags &= ~RHASHED;
975 mutex_exit(&np->r_statelock);
976
977 /*
978 * Add to the list of nodes to destroy.
979 * Borrowing avl_child[0] for this list.
980 */
981 np->r_avl_node.avl_child[0] =
982 (struct avl_node *)rlist;
983 rlist = np;
984 }
985 }
986 avl_destroy(&mi->smi_hash_avl);
987
988 /*
989 * Replace the (now destroyed) "hash" AVL with the
990 * temporary AVL, which restores the busy nodes.
991 */
992 mi->smi_hash_avl = tmp_avl;
993 rw_exit(&mi->smi_hash_lk);
994
995 /*
996 * Now destroy the nodes on our temporary list (rlist).
997 * This call to smbfs_addfree will end up destroying the
998 * smbnode, but in a safe way with the appropriate set
999 * of checks done.
1000 */
1001 while ((np = rlist) != NULL) {
1002 rlist = (smbnode_t *)np->r_avl_node.avl_child[0];
1003 smbfs_addfree(np);
1004 }
1005 }
1006
1007 /*
1008 * This routine destroys all the resources associated with the smbnode
1009 * and then the smbnode itself. Note: sn_inactive has been called.
1010 *
1011 * From NFS: nfs_subr.c:destroy_rnode
1012 */
1013 static void
1014 sn_destroy_node(smbnode_t *np)
1015 {
1016 vnode_t *vp;
1017 vfs_t *vfsp;
1018
1019 vp = SMBTOV(np);
1020 vfsp = vp->v_vfsp;
1021
1022 ASSERT(vp->v_count == 1);
1023 ASSERT(np->r_count == 0);
1024 ASSERT(np->r_mapcnt == 0);
1025 ASSERT(np->r_secattr.vsa_aclentp == NULL);
1026 ASSERT(np->r_cred == NULL);
1027 ASSERT(np->n_rpath == NULL);
1028 ASSERT(!(np->r_flags & RHASHED));
1029 ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
1030 atomic_dec_ulong((ulong_t *)&smbnodenew);
1031 vn_invalid(vp);
1032 vn_free(vp);
1033 kmem_cache_free(smbnode_cache, np);
1034 VFS_RELE(vfsp);
1035 }
1036
1037 /*
1038 * From NFS rflush()
1039 * Flush all vnodes in this (or every) vfs.
1040 * Used by smbfs_sync and by smbfs_unmount.
1041 */
1042 /*ARGSUSED*/
1043 void
1044 smbfs_rflush(struct vfs *vfsp, cred_t *cr)
1045 {
1046 smbmntinfo_t *mi;
1047 smbnode_t *np;
1048 vnode_t *vp, **vplist;
1049 long num, cnt;
1050
1051 mi = VFTOSMI(vfsp);
1052
1053 /*
1054 * Check to see whether there is anything to do.
1055 */
1056 num = avl_numnodes(&mi->smi_hash_avl);
1057 if (num == 0)
1058 return;
1059
1060 /*
1061 * Allocate a slot for all currently active rnodes on the
1062 * supposition that they all may need flushing.
1063 */
1064 vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1065 cnt = 0;
1066
1067 /*
1068 * Walk the AVL tree looking for rnodes with page
1069 * lists associated with them. Make a list of these
1070 * files.
1071 */
1072 rw_enter(&mi->smi_hash_lk, RW_READER);
1073 for (np = avl_first(&mi->smi_hash_avl); np != NULL;
1074 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
1075 vp = SMBTOV(np);
1076 /*
1077 * Don't bother sync'ing a vp if it
1078 * is part of virtual swap device or
1079 * if VFS is read-only
1080 */
1081 if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1082 continue;
1083 /*
1084 * If the vnode has pages and is marked as either
1085 * dirty or mmap'd, hold and add this vnode to the
1086 * list of vnodes to flush.
1087 */
1088 if (vn_has_cached_data(vp) &&
1089 ((np->r_flags & RDIRTY) || np->r_mapcnt > 0)) {
1090 VN_HOLD(vp);
1091 vplist[cnt++] = vp;
1092 if (cnt == num)
1093 break;
1094 }
1095 }
1096 rw_exit(&mi->smi_hash_lk);
1097
1098 /*
1099 * Flush and release all of the files on the list.
1100 */
1101 while (cnt-- > 0) {
1102 vp = vplist[cnt];
1103 (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
1104 VN_RELE(vp);
1105 }
1106
1107 kmem_free(vplist, num * sizeof (vnode_t *));
1108 }
1109
1110 /* Here NFS has access cache stuff (nfs_subr.c) not used here */
1111
1112 /*
1113 * Set or Clear direct I/O flag
1114 * VOP_RWLOCK() is held for write access to prevent a race condition
1115 * which would occur if a process is in the middle of a write when
1116 * directio flag gets set. It is possible that all pages may not get flushed.
1117 * From nfs_common.c
1118 */
1119
1120 /* ARGSUSED */
1121 int
1122 smbfs_directio(vnode_t *vp, int cmd, cred_t *cr)
1123 {
1124 int error = 0;
1125 smbnode_t *np;
1126
1127 np = VTOSMB(vp);
1128
1129 if (cmd == DIRECTIO_ON) {
1130
1131 if (np->r_flags & RDIRECTIO)
1132 return (0);
1133
1134 /*
1135 * Flush the page cache.
1136 */
1137
1138 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1139
1140 if (np->r_flags & RDIRECTIO) {
1141 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1142 return (0);
1143 }
1144
1145 /* Here NFS also checks ->r_awcount */
1146 if (vn_has_cached_data(vp) &&
1147 (np->r_flags & RDIRTY) != 0) {
1148 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
1149 B_INVAL, cr, NULL);
1150 if (error) {
1151 if (error == ENOSPC || error == EDQUOT) {
1152 mutex_enter(&np->r_statelock);
1153 if (!np->r_error)
1154 np->r_error = error;
1155 mutex_exit(&np->r_statelock);
1156 }
1157 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1158 return (error);
1159 }
1160 }
1161
1162 mutex_enter(&np->r_statelock);
1163 np->r_flags |= RDIRECTIO;
1164 mutex_exit(&np->r_statelock);
1165 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1166 return (0);
1167 }
1168
1169 if (cmd == DIRECTIO_OFF) {
1170 mutex_enter(&np->r_statelock);
1171 np->r_flags &= ~RDIRECTIO; /* disable direct mode */
1172 mutex_exit(&np->r_statelock);
1173 return (0);
1174 }
1175
1176 return (EINVAL);
1177 }
1178
1179 static kmutex_t smbfs_newnum_lock;
1180 static uint32_t smbfs_newnum_val = 0;
1181
1182 /*
1183 * Return a number 0..0xffffffff that's different from the last
1184 * 0xffffffff numbers this returned. Used for unlinked files.
1185 * From NFS nfs_subr.c newnum
1186 */
1187 uint32_t
1188 smbfs_newnum(void)
1189 {
1190 uint32_t id;
1191
1192 mutex_enter(&smbfs_newnum_lock);
1193 if (smbfs_newnum_val == 0)
1194 smbfs_newnum_val = (uint32_t)gethrestime_sec();
1195 id = smbfs_newnum_val++;
1196 mutex_exit(&smbfs_newnum_lock);
1197 return (id);
1198 }
1199
1200 /*
1201 * Fill in a temporary name at buf
1202 */
1203 int
1204 smbfs_newname(char *buf, size_t buflen)
1205 {
1206 uint_t id;
1207 int n;
1208
1209 id = smbfs_newnum();
1210 n = snprintf(buf, buflen, "~$smbfs%08X", id);
1211 return (n);
1212 }
1213
1214
1215 /*
1216 * initialize resources that are used by smbfs_subr.c
1217 * this is called from the _init() routine (by the way of smbfs_clntinit())
1218 *
1219 * From NFS: nfs_subr.c:nfs_subrinit
1220 */
1221 int
1222 smbfs_subrinit(void)
1223 {
1224 ulong_t nsmbnode_max;
1225
1226 /*
1227 * Allocate and initialize the smbnode cache
1228 */
1229 if (nsmbnode <= 0)
1230 nsmbnode = ncsize; /* dnlc.h */
1231 nsmbnode_max = (ulong_t)((kmem_maxavail() >> 2) /
1232 sizeof (struct smbnode));
1233 if (nsmbnode > nsmbnode_max || (nsmbnode == 0 && ncsize == 0)) {
1234 zcmn_err(GLOBAL_ZONEID, CE_NOTE,
1235 "setting nsmbnode to max value of %ld", nsmbnode_max);
1236 nsmbnode = nsmbnode_max;
1237 }
1238
1239 smbnode_cache = kmem_cache_create("smbnode_cache", sizeof (smbnode_t),
1240 0, NULL, NULL, smbfs_kmem_reclaim, NULL, NULL, 0);
1241
1242 /*
1243 * Initialize the various mutexes and reader/writer locks
1244 */
1245 mutex_init(&smbfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
1246 mutex_init(&smbfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
1247
1248 /*
1249 * Assign unique major number for all smbfs mounts
1250 */
1251 if ((smbfs_major = getudev()) == -1) {
1252 zcmn_err(GLOBAL_ZONEID, CE_WARN,
1253 "smbfs: init: can't get unique device number");
1254 smbfs_major = 0;
1255 }
1256 smbfs_minor = 0;
1257
1258 return (0);
1259 }
1260
1261 /*
1262 * free smbfs hash table, etc.
1263 * From NFS: nfs_subr.c:nfs_subrfini
1264 */
1265 void
1266 smbfs_subrfini(void)
1267 {
1268
1269 /*
1270 * Destroy the smbnode cache
1271 */
1272 kmem_cache_destroy(smbnode_cache);
1273
1274 /*
1275 * Destroy the various mutexes and reader/writer locks
1276 */
1277 mutex_destroy(&smbfreelist_lock);
1278 mutex_destroy(&smbfs_minor_lock);
1279 }
1280
1281 /* rddir_cache ? */
1282
1283 /*
1284 * Support functions for smbfs_kmem_reclaim
1285 */
1286
1287 static void
1288 smbfs_node_reclaim(void)
1289 {
1290 smbmntinfo_t *mi;
1291 smbnode_t *np;
1292 vnode_t *vp;
1293
1294 mutex_enter(&smbfreelist_lock);
1295 while ((np = smbfreelist) != NULL) {
1296 sn_rmfree(np);
1297 mutex_exit(&smbfreelist_lock);
1298 if (np->r_flags & RHASHED) {
1299 vp = SMBTOV(np);
1300 mi = np->n_mount;
1301 rw_enter(&mi->smi_hash_lk, RW_WRITER);
1302 mutex_enter(&vp->v_lock);
1303 if (vp->v_count > 1) {
1304 VN_RELE_LOCKED(vp);
1305 mutex_exit(&vp->v_lock);
1306 rw_exit(&mi->smi_hash_lk);
1307 mutex_enter(&smbfreelist_lock);
1308 continue;
1309 }
1310 mutex_exit(&vp->v_lock);
1311 sn_rmhash_locked(np);
1312 rw_exit(&mi->smi_hash_lk);
1313 }
1314 /*
1315 * This call to smbfs_addfree will end up destroying the
1316 * smbnode, but in a safe way with the appropriate set
1317 * of checks done.
1318 */
1319 smbfs_addfree(np);
1320 mutex_enter(&smbfreelist_lock);
1321 }
1322 mutex_exit(&smbfreelist_lock);
1323 }
1324
1325 /*
1326 * Called by kmem_cache_alloc ask us if we could
1327 * "Please give back some memory!"
1328 *
1329 * Todo: dump nodes from the free list?
1330 */
1331 /*ARGSUSED*/
1332 void
1333 smbfs_kmem_reclaim(void *cdrarg)
1334 {
1335 smbfs_node_reclaim();
1336 }
1337
1338 /*
1339 * Here NFS has failover stuff and
1340 * nfs_rw_xxx - see smbfs_rwlock.c
1341 */