1 /*
2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3 * Authors: Doug Rabson <dfr@rabson.org>
4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Copyright (c) 2012 by Delphix. All rights reserved.
30 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
31 */
32
33 /*
34 * NFS LockManager, start/stop, support functions, etc.
35 * Most of the interesting code is here.
36 *
37 * Source code derived from FreeBSD nlm_prot_impl.c
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/thread.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/mount.h>
46 #include <sys/priv.h>
47 #include <sys/proc.h>
48 #include <sys/share.h>
49 #include <sys/socket.h>
50 #include <sys/syscall.h>
51 #include <sys/syslog.h>
52 #include <sys/systm.h>
53 #include <sys/class.h>
54 #include <sys/unistd.h>
55 #include <sys/vnode.h>
56 #include <sys/vfs.h>
57 #include <sys/queue.h>
58 #include <sys/bitmap.h>
59 #include <sys/sdt.h>
60 #include <netinet/in.h>
61
62 #include <rpc/rpc.h>
63 #include <rpc/xdr.h>
64 #include <rpc/pmap_prot.h>
65 #include <rpc/pmap_clnt.h>
66 #include <rpc/rpcb_prot.h>
67
68 #include <rpcsvc/nlm_prot.h>
69 #include <rpcsvc/sm_inter.h>
70 #include <rpcsvc/nsm_addr.h>
71
72 #include <nfs/nfs.h>
73 #include <nfs/nfs_clnt.h>
74 #include <nfs/export.h>
75 #include <nfs/rnode.h>
76 #include <nfs/lm.h>
77
78 #include "nlm_impl.h"
79
80 struct nlm_knc {
81 struct knetconfig n_knc;
82 const char *n_netid;
83 };
84
85 /*
86 * Number of attempts NLM tries to obtain RPC binding
87 * of local statd.
88 */
89 #define NLM_NSM_RPCBIND_RETRIES 10
90
91 /*
92 * Timeout (in seconds) NLM waits before making another
93 * attempt to obtain RPC binding of local statd.
94 */
95 #define NLM_NSM_RPCBIND_TIMEOUT 5
96
97 /*
98 * Total number of sysids in NLM sysid bitmap
99 */
100 #define NLM_BMAP_NITEMS (LM_SYSID_MAX + 1)
101
102 /*
103 * Number of ulong_t words in bitmap that is used
104 * for allocation of sysid numbers.
105 */
106 #define NLM_BMAP_WORDS (NLM_BMAP_NITEMS / BT_NBIPUL)
107
108 /*
109 * Given an integer x, the macro returns
110 * -1 if x is negative,
111 * 0 if x is zero
112 * 1 if x is positive
113 */
114 #define SIGN(x) (((x) > 0) - ((x) < 0))
115
116 #define ARRSIZE(arr) (sizeof (arr) / sizeof ((arr)[0]))
117 #define NLM_KNCS ARRSIZE(nlm_netconfigs)
118
119 krwlock_t lm_lck;
120
121 /*
122 * Zero timeout for asynchronous NLM RPC operations
123 */
124 static const struct timeval nlm_rpctv_zero = { 0, 0 };
125
126 /*
127 * List of all Zone globals nlm_globals instences
128 * linked together.
129 */
130 static struct nlm_globals_list nlm_zones_list; /* (g) */
131
132 /*
133 * NLM kmem caches
134 */
135 static struct kmem_cache *nlm_hosts_cache = NULL;
136 static struct kmem_cache *nlm_vhold_cache = NULL;
137
138 /*
139 * A bitmap for allocation of new sysids.
140 * Sysid is a unique number between LM_SYSID
141 * and LM_SYSID_MAX. Sysid represents unique remote
142 * host that does file locks on the given host.
143 */
144 static ulong_t nlm_sysid_bmap[NLM_BMAP_WORDS]; /* (g) */
145 static int nlm_sysid_nidx; /* (g) */
146
147 /*
148 * RPC service registration for all transports
149 */
150 static SVC_CALLOUT nlm_svcs[] = {
151 { NLM_PROG, 4, 4, nlm_prog_4 }, /* NLM4_VERS */
152 { NLM_PROG, 1, 3, nlm_prog_3 } /* NLM_VERS - NLM_VERSX */
153 };
154
155 static SVC_CALLOUT_TABLE nlm_sct = {
156 ARRSIZE(nlm_svcs),
157 FALSE,
158 nlm_svcs
159 };
160
161 /*
162 * Static table of all netid/knetconfig network
163 * lock manager can work with. nlm_netconfigs table
164 * is used when we need to get valid knetconfig by
165 * netid and vice versa.
166 *
167 * Knetconfigs are activated either by the call from
168 * user-space lockd daemon (server side) or by taking
169 * knetconfig from NFS mountinfo (client side)
170 */
171 static struct nlm_knc nlm_netconfigs[] = { /* (g) */
172 /* UDP */
173 {
174 { NC_TPI_CLTS, NC_INET, NC_UDP, NODEV },
175 "udp",
176 },
177 /* TCP */
178 {
179 { NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV },
180 "tcp",
181 },
182 /* UDP over IPv6 */
183 {
184 { NC_TPI_CLTS, NC_INET6, NC_UDP, NODEV },
185 "udp6",
186 },
187 /* TCP over IPv6 */
188 {
189 { NC_TPI_COTS_ORD, NC_INET6, NC_TCP, NODEV },
190 "tcp6",
191 },
192 /* ticlts (loopback over UDP) */
193 {
194 { NC_TPI_CLTS, NC_LOOPBACK, NC_NOPROTO, NODEV },
195 "ticlts",
196 },
197 /* ticotsord (loopback over TCP) */
198 {
199 { NC_TPI_COTS_ORD, NC_LOOPBACK, NC_NOPROTO, NODEV },
200 "ticotsord",
201 },
202 };
203
204 /*
205 * NLM misc. function
206 */
207 static void nlm_copy_netbuf(struct netbuf *, struct netbuf *);
208 static int nlm_netbuf_addrs_cmp(struct netbuf *, struct netbuf *);
209 static void nlm_kmem_reclaim(void *);
210 static void nlm_pool_shutdown(void);
211 static void nlm_suspend_zone(struct nlm_globals *);
212 static void nlm_resume_zone(struct nlm_globals *);
213 static void nlm_nsm_clnt_init(CLIENT *, struct nlm_nsm *);
214 static void nlm_netbuf_to_netobj(struct netbuf *, int *, netobj *);
215
216 /*
217 * NLM thread functions
218 */
219 static void nlm_gc(struct nlm_globals *);
220 static void nlm_reclaimer(struct nlm_host *);
221
222 /*
223 * NLM NSM functions
224 */
225 static int nlm_init_local_knc(struct knetconfig *);
226 static int nlm_nsm_init_local(struct nlm_nsm *);
227 static int nlm_nsm_init(struct nlm_nsm *, struct knetconfig *, struct netbuf *);
228 static void nlm_nsm_fini(struct nlm_nsm *);
229 static enum clnt_stat nlm_nsm_simu_crash(struct nlm_nsm *);
230 static enum clnt_stat nlm_nsm_stat(struct nlm_nsm *, int32_t *);
231 static enum clnt_stat nlm_nsm_mon(struct nlm_nsm *, char *, uint16_t);
232 static enum clnt_stat nlm_nsm_unmon(struct nlm_nsm *, char *);
233
234 /*
235 * NLM host functions
236 */
237 static int nlm_host_ctor(void *, void *, int);
238 static void nlm_host_dtor(void *, void *);
239 static void nlm_host_destroy(struct nlm_host *);
240 static struct nlm_host *nlm_host_create(char *, const char *,
241 struct knetconfig *, struct netbuf *);
242 static struct nlm_host *nlm_host_find_locked(struct nlm_globals *,
243 const char *, struct netbuf *, avl_index_t *);
244 static void nlm_host_unregister(struct nlm_globals *, struct nlm_host *);
245 static void nlm_host_gc_vholds(struct nlm_host *);
246 static bool_t nlm_host_has_srv_locks(struct nlm_host *);
247 static bool_t nlm_host_has_cli_locks(struct nlm_host *);
248 static bool_t nlm_host_has_locks(struct nlm_host *);
249
250 /*
251 * NLM vhold functions
252 */
253 static int nlm_vhold_ctor(void *, void *, int);
254 static void nlm_vhold_dtor(void *, void *);
255 static void nlm_vhold_destroy(struct nlm_host *,
256 struct nlm_vhold *);
257 static bool_t nlm_vhold_busy(struct nlm_host *, struct nlm_vhold *);
258 static void nlm_vhold_clean(struct nlm_vhold *, int);
259
260 /*
261 * NLM client/server sleeping locks/share reservation functions
262 */
263 struct nlm_slreq *nlm_slreq_find_locked(struct nlm_host *,
264 struct nlm_vhold *, struct flock64 *);
265 static struct nlm_shres *nlm_shres_create_item(struct shrlock *, vnode_t *);
266 static void nlm_shres_destroy_item(struct nlm_shres *);
267 static bool_t nlm_shres_equal(struct shrlock *, struct shrlock *);
268
269 /*
270 * NLM initialization functions.
271 */
272 void
273 nlm_init(void)
274 {
275 nlm_hosts_cache = kmem_cache_create("nlm_host_cache",
276 sizeof (struct nlm_host), 0, nlm_host_ctor, nlm_host_dtor,
277 nlm_kmem_reclaim, NULL, NULL, 0);
278
279 nlm_vhold_cache = kmem_cache_create("nlm_vhold_cache",
280 sizeof (struct nlm_vhold), 0, nlm_vhold_ctor, nlm_vhold_dtor,
281 NULL, NULL, NULL, 0);
282
283 nlm_rpc_init();
284 TAILQ_INIT(&nlm_zones_list);
285
286 /* initialize sysids bitmap */
287 bzero(nlm_sysid_bmap, sizeof (nlm_sysid_bmap));
288 nlm_sysid_nidx = 1;
289
290 /*
291 * Reserv the sysid #0, because it's associated
292 * with local locks only. Don't let to allocate
293 * it for remote locks.
294 */
295 BT_SET(nlm_sysid_bmap, 0);
296 }
297
298 void
299 nlm_globals_register(struct nlm_globals *g)
300 {
301 rw_enter(&lm_lck, RW_WRITER);
302 TAILQ_INSERT_TAIL(&nlm_zones_list, g, nlm_link);
303 rw_exit(&lm_lck);
304 }
305
306 void
307 nlm_globals_unregister(struct nlm_globals *g)
308 {
309 rw_enter(&lm_lck, RW_WRITER);
310 TAILQ_REMOVE(&nlm_zones_list, g, nlm_link);
311 rw_exit(&lm_lck);
312 }
313
314 /* ARGSUSED */
315 static void
316 nlm_kmem_reclaim(void *cdrarg)
317 {
318 struct nlm_globals *g;
319
320 rw_enter(&lm_lck, RW_READER);
321 TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
322 cv_broadcast(&g->nlm_gc_sched_cv);
323
324 rw_exit(&lm_lck);
325 }
326
327 /*
328 * NLM garbage collector thread (GC).
329 *
330 * NLM GC periodically checks whether there're any host objects
331 * that can be cleaned up. It also releases stale vnodes that
332 * live on the server side (under protection of vhold objects).
333 *
334 * NLM host objects are cleaned up from GC thread because
335 * operations helping us to determine whether given host has
336 * any locks can be quite expensive and it's not good to call
337 * them every time the very last reference to the host is dropped.
338 * Thus we use "lazy" approach for hosts cleanup.
339 *
340 * The work of GC is to release stale vnodes on the server side
341 * and destroy hosts that haven't any locks and any activity for
342 * some time (i.e. idle hosts).
343 */
344 static void
345 nlm_gc(struct nlm_globals *g)
346 {
347 struct nlm_host *hostp;
348 clock_t now, idle_period;
349
350 idle_period = SEC_TO_TICK(g->cn_idle_tmo);
351 mutex_enter(&g->lock);
352 for (;;) {
353 /*
354 * GC thread can be explicitly scheduled from
355 * memory reclamation function.
356 */
357 (void) cv_timedwait(&g->nlm_gc_sched_cv, &g->lock,
358 ddi_get_lbolt() + idle_period);
359
360 /*
361 * NLM is shutting down, time to die.
362 */
363 if (g->run_status == NLM_ST_STOPPING)
364 break;
365
366 now = ddi_get_lbolt();
367 DTRACE_PROBE2(gc__start, struct nlm_globals *, g,
368 clock_t, now);
369
370 /*
371 * Handle all hosts that are unused at the moment
372 * until we meet one with idle timeout in future.
373 */
374 while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
375 bool_t has_locks = FALSE;
376
377 if (hostp->nh_idle_timeout > now)
378 break;
379
380 /*
381 * Drop global lock while doing expensive work
382 * on this host. We'll re-check any conditions
383 * that might change after retaking the global
384 * lock.
385 */
386 mutex_exit(&g->lock);
387 mutex_enter(&hostp->nh_lock);
388
389 /*
390 * nlm_globals lock was dropped earlier because
391 * garbage collecting of vholds and checking whether
392 * host has any locks/shares are expensive operations.
393 */
394 nlm_host_gc_vholds(hostp);
395 has_locks = nlm_host_has_locks(hostp);
396
397 mutex_exit(&hostp->nh_lock);
398 mutex_enter(&g->lock);
399
400 /*
401 * While we were doing expensive operations outside of
402 * nlm_globals critical section, somebody could
403 * take the host, add lock/share to one of its vnodes
404 * and release the host back. If so, host's idle timeout
405 * is renewed and our information about locks on the
406 * given host is outdated.
407 */
408 if (hostp->nh_idle_timeout > now)
409 continue;
410
411 /*
412 * If either host has locks or somebody has began to
413 * use it while we were outside the nlm_globals critical
414 * section. In both cases we have to renew host's
415 * timeout and put it to the end of LRU list.
416 */
417 if (has_locks || hostp->nh_refs > 0) {
418 TAILQ_REMOVE(&g->nlm_idle_hosts,
419 hostp, nh_link);
420 hostp->nh_idle_timeout = now + idle_period;
421 TAILQ_INSERT_TAIL(&g->nlm_idle_hosts,
422 hostp, nh_link);
423 continue;
424 }
425
426 /*
427 * We're here if all the following conditions hold:
428 * 1) Host hasn't any locks or share reservations
429 * 2) Host is unused
430 * 3) Host wasn't touched by anyone at least for
431 * g->cn_idle_tmo seconds.
432 *
433 * So, now we can destroy it.
434 */
435 nlm_host_unregister(g, hostp);
436 mutex_exit(&g->lock);
437
438 nlm_host_unmonitor(g, hostp);
439 nlm_host_destroy(hostp);
440 mutex_enter(&g->lock);
441 if (g->run_status == NLM_ST_STOPPING)
442 break;
443
444 }
445
446 DTRACE_PROBE(gc__end);
447 }
448
449 DTRACE_PROBE1(gc__exit, struct nlm_globals *, g);
450
451 /* Let others know that GC has died */
452 g->nlm_gc_thread = NULL;
453 mutex_exit(&g->lock);
454
455 cv_broadcast(&g->nlm_gc_finish_cv);
456 zthread_exit();
457 }
458
459 /*
460 * Thread reclaim locks/shares acquired by the client side
461 * on the given server represented by hostp.
462 */
463 static void
464 nlm_reclaimer(struct nlm_host *hostp)
465 {
466 struct nlm_globals *g;
467
468 mutex_enter(&hostp->nh_lock);
469 hostp->nh_reclaimer = curthread;
470 mutex_exit(&hostp->nh_lock);
471
472 g = zone_getspecific(nlm_zone_key, curzone);
473 nlm_reclaim_client(g, hostp);
474
475 mutex_enter(&hostp->nh_lock);
476 hostp->nh_flags &= ~NLM_NH_RECLAIM;
477 hostp->nh_reclaimer = NULL;
478 cv_broadcast(&hostp->nh_recl_cv);
479 mutex_exit(&hostp->nh_lock);
480
481 /*
482 * Host was explicitly referenced before
483 * nlm_reclaim() was called, release it
484 * here.
485 */
486 nlm_host_release(g, hostp);
487 zthread_exit();
488 }
489
490 /*
491 * Copy a struct netobj. (see xdr.h)
492 */
493 void
494 nlm_copy_netobj(struct netobj *dst, struct netobj *src)
495 {
496 dst->n_len = src->n_len;
497 dst->n_bytes = kmem_alloc(src->n_len, KM_SLEEP);
498 bcopy(src->n_bytes, dst->n_bytes, src->n_len);
499 }
500
501 /*
502 * An NLM specificw replacement for clnt_call().
503 * nlm_clnt_call() is used by all RPC functions generated
504 * from nlm_prot.x specification. The function is aware
505 * about some pitfalls of NLM RPC procedures and has a logic
506 * that handles them properly.
507 */
508 enum clnt_stat
509 nlm_clnt_call(CLIENT *clnt, rpcproc_t procnum, xdrproc_t xdr_args,
510 caddr_t argsp, xdrproc_t xdr_result, caddr_t resultp, struct timeval wait)
511 {
512 k_sigset_t oldmask;
513 enum clnt_stat stat;
514 bool_t sig_blocked = FALSE;
515
516 /*
517 * If NLM RPC procnum is one of the NLM _RES procedures
518 * that are used to reply to asynchronous NLM RPC
519 * (MSG calls), explicitly set RPC timeout to zero.
520 * Client doesn't send a reply to RES procedures, so
521 * we don't need to wait anything.
522 *
523 * NOTE: we ignore NLM4_*_RES procnums because they are
524 * equal to NLM_*_RES numbers.
525 */
526 if (procnum >= NLM_TEST_RES && procnum <= NLM_GRANTED_RES)
527 wait = nlm_rpctv_zero;
528
529 /*
530 * We need to block signals in case of NLM_CANCEL RPC
531 * in order to prevent interruption of network RPC
532 * calls.
533 */
534 if (procnum == NLM_CANCEL) {
535 k_sigset_t newmask;
536
537 sigfillset(&newmask);
538 sigreplace(&newmask, &oldmask);
539 sig_blocked = TRUE;
540 }
541
542 stat = clnt_call(clnt, procnum, xdr_args,
543 argsp, xdr_result, resultp, wait);
544
545 /*
546 * Restore signal mask back if signals were blocked
547 */
548 if (sig_blocked)
549 sigreplace(&oldmask, (k_sigset_t *)NULL);
550
551 return (stat);
552 }
553
554 /*
555 * Suspend NLM client/server in the given zone.
556 *
557 * During suspend operation we mark those hosts
558 * that have any locks with NLM_NH_SUSPEND flags,
559 * so that they can be checked later, when resume
560 * operation occurs.
561 */
562 static void
563 nlm_suspend_zone(struct nlm_globals *g)
564 {
565 struct nlm_host *hostp;
566 struct nlm_host_list all_hosts;
567
568 /*
569 * Note that while we're doing suspend, GC thread is active
570 * and it can destroy some hosts while we're walking through
571 * the hosts tree. To prevent that and make suspend logic
572 * a bit more simple we put all hosts to local "all_hosts"
573 * list and increment reference counter of each host.
574 * This guaranties that no hosts will be released while
575 * we're doing suspend.
576 * NOTE: reference of each host must be dropped during
577 * resume operation.
578 */
579 TAILQ_INIT(&all_hosts);
580 mutex_enter(&g->lock);
581 for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
582 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
583 /*
584 * If host is idle, remove it from idle list and
585 * clear idle flag. That is done to prevent GC
586 * from touching this host.
587 */
588 if (hostp->nh_flags & NLM_NH_INIDLE) {
589 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
590 hostp->nh_flags &= ~NLM_NH_INIDLE;
591 }
592
593 hostp->nh_refs++;
594 TAILQ_INSERT_TAIL(&all_hosts, hostp, nh_link);
595 }
596
597 /*
598 * Now we can walk through all hosts on the system
599 * with zone globals lock released. The fact the
600 * we have taken a reference to each host guaranties
601 * that no hosts can be destroyed during that process.
602 */
603 mutex_exit(&g->lock);
604 while ((hostp = TAILQ_FIRST(&all_hosts)) != NULL) {
605 mutex_enter(&hostp->nh_lock);
606 if (nlm_host_has_locks(hostp))
607 hostp->nh_flags |= NLM_NH_SUSPEND;
608
609 mutex_exit(&hostp->nh_lock);
610 TAILQ_REMOVE(&all_hosts, hostp, nh_link);
611 }
612 }
613
614 /*
615 * Resume NLM hosts for the given zone.
616 *
617 * nlm_resume_zone() is called after hosts were suspended
618 * (see nlm_suspend_zone) and its main purpose to check
619 * whether remote locks owned by hosts are still in consistent
620 * state. If they aren't, resume function tries to reclaim
621 * reclaim locks (for client side hosts) and clean locks (for
622 * server side hosts).
623 */
624 static void
625 nlm_resume_zone(struct nlm_globals *g)
626 {
627 struct nlm_host *hostp, *h_next;
628
629 mutex_enter(&g->lock);
630 hostp = avl_first(&g->nlm_hosts_tree);
631
632 /*
633 * In nlm_suspend_zone() the reference counter of each
634 * host was incremented, so we can safely iterate through
635 * all hosts without worrying that any host we touch will
636 * be removed at the moment.
637 */
638 while (hostp != NULL) {
639 struct nlm_nsm nsm;
640 enum clnt_stat stat;
641 int32_t sm_state;
642 int error;
643 bool_t resume_failed = FALSE;
644
645 h_next = AVL_NEXT(&g->nlm_hosts_tree, hostp);
646 mutex_exit(&g->lock);
647
648 DTRACE_PROBE1(resume__host, struct nlm_host *, hostp);
649
650 /*
651 * Suspend operation marked that the host doesn't
652 * have any locks. Skip it.
653 */
654 if (!(hostp->nh_flags & NLM_NH_SUSPEND))
655 goto cycle_end;
656
657 error = nlm_nsm_init(&nsm, &hostp->nh_knc, &hostp->nh_addr);
658 if (error != 0) {
659 NLM_ERR("Resume: Failed to contact to NSM of host %s "
660 "[error=%d]\n", hostp->nh_name, error);
661 resume_failed = TRUE;
662 goto cycle_end;
663 }
664
665 stat = nlm_nsm_stat(&nsm, &sm_state);
666 if (stat != RPC_SUCCESS) {
667 NLM_ERR("Resume: Failed to call SM_STAT operation for "
668 "host %s [stat=%d]\n", hostp->nh_name, stat);
669 resume_failed = TRUE;
670 nlm_nsm_fini(&nsm);
671 goto cycle_end;
672 }
673
674 if (sm_state != hostp->nh_state) {
675 /*
676 * Current SM state of the host isn't equal
677 * to the one host had when it was suspended.
678 * Probably it was rebooted. Try to reclaim
679 * locks if the host has any on its client side.
680 * Also try to clean up its server side locks
681 * (if the host has any).
682 */
683 nlm_host_notify_client(hostp, sm_state);
684 nlm_host_notify_server(hostp, sm_state);
685 }
686
687 nlm_nsm_fini(&nsm);
688
689 cycle_end:
690 if (resume_failed) {
691 /*
692 * Resume failed for the given host.
693 * Just clean up all resources it owns.
694 */
695 nlm_host_notify_server(hostp, 0);
696 nlm_client_cancel_all(g, hostp);
697 }
698
699 hostp->nh_flags &= ~NLM_NH_SUSPEND;
700 nlm_host_release(g, hostp);
701 hostp = h_next;
702 mutex_enter(&g->lock);
703 }
704
705 mutex_exit(&g->lock);
706 }
707
708 /*
709 * NLM functions responsible for operations on NSM handle.
710 */
711
712 /*
713 * Initialize knetconfig that is used for communication
714 * with local statd via loopback interface.
715 */
716 static int
717 nlm_init_local_knc(struct knetconfig *knc)
718 {
719 int error;
720 vnode_t *vp;
721
722 bzero(knc, sizeof (*knc));
723 error = lookupname("/dev/tcp", UIO_SYSSPACE,
724 FOLLOW, NULLVPP, &vp);
725 if (error != 0)
726 return (error);
727
728 knc->knc_semantics = NC_TPI_COTS;
729 knc->knc_protofmly = NC_INET;
730 knc->knc_proto = NC_TCP;
731 knc->knc_rdev = vp->v_rdev;
732 VN_RELE(vp);
733
734
735 return (0);
736 }
737
738 /*
739 * Initialize NSM handle that will be used to talk
740 * to local statd via loopback interface.
741 */
742 static int
743 nlm_nsm_init_local(struct nlm_nsm *nsm)
744 {
745 int error;
746 struct knetconfig knc;
747 struct sockaddr_in sin;
748 struct netbuf nb;
749
750 error = nlm_init_local_knc(&knc);
751 if (error != 0)
752 return (error);
753
754 bzero(&sin, sizeof (sin));
755 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
756 sin.sin_family = AF_INET;
757
758 nb.buf = (char *)&sin;
759 nb.len = nb.maxlen = sizeof (sin);
760
761 return (nlm_nsm_init(nsm, &knc, &nb));
762 }
763
764 /*
765 * Initialize NSM handle used for talking to statd
766 */
767 static int
768 nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb)
769 {
770 enum clnt_stat stat;
771 int error, retries;
772
773 bzero(nsm, sizeof (*nsm));
774 nsm->ns_knc = *knc;
775 nlm_copy_netbuf(&nsm->ns_addr, nb);
776
777 /*
778 * Try several times to get the port of statd service,
779 * If rpcbind_getaddr returns RPC_PROGNOTREGISTERED,
780 * retry an attempt, but wait for NLM_NSM_RPCBIND_TIMEOUT
781 * seconds berofore.
782 */
783 for (retries = 0; retries < NLM_NSM_RPCBIND_RETRIES; retries++) {
784 stat = rpcbind_getaddr(&nsm->ns_knc, SM_PROG,
785 SM_VERS, &nsm->ns_addr);
786 if (stat != RPC_SUCCESS) {
787 if (stat == RPC_PROGNOTREGISTERED) {
788 delay(SEC_TO_TICK(NLM_NSM_RPCBIND_TIMEOUT));
789 continue;
790 }
791 }
792
793 break;
794 }
795
796 if (stat != RPC_SUCCESS) {
797 DTRACE_PROBE2(rpcbind__error, enum clnt_stat, stat,
798 int, retries);
799 error = ENOENT;
800 goto error;
801 }
802
803 /*
804 * Create an RPC handle that'll be used for communication with local
805 * statd using the status monitor protocol.
806 */
807 error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS,
808 0, NLM_RPC_RETRIES, kcred, &nsm->ns_handle);
809 if (error != 0)
810 goto error;
811
812 /*
813 * Create an RPC handle that'll be used for communication with the
814 * local statd using the address registration protocol.
815 */
816 error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM,
817 NSM_ADDR_V1, 0, NLM_RPC_RETRIES, kcred, &nsm->ns_addr_handle);
818 if (error != 0)
819 goto error;
820
821 sema_init(&nsm->ns_sem, 1, NULL, SEMA_DEFAULT, NULL);
822 return (0);
823
824 error:
825 kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
826 if (nsm->ns_handle)
827 CLNT_DESTROY(nsm->ns_handle);
828
829 return (error);
830 }
831
832 static void
833 nlm_nsm_fini(struct nlm_nsm *nsm)
834 {
835 kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
836 CLNT_DESTROY(nsm->ns_addr_handle);
837 nsm->ns_addr_handle = NULL;
838 CLNT_DESTROY(nsm->ns_handle);
839 nsm->ns_handle = NULL;
840 sema_destroy(&nsm->ns_sem);
841 }
842
843 static enum clnt_stat
844 nlm_nsm_simu_crash(struct nlm_nsm *nsm)
845 {
846 enum clnt_stat stat;
847
848 sema_p(&nsm->ns_sem);
849 nlm_nsm_clnt_init(nsm->ns_handle, nsm);
850 stat = sm_simu_crash_1(NULL, NULL, nsm->ns_handle);
851 sema_v(&nsm->ns_sem);
852
853 return (stat);
854 }
855
856 static enum clnt_stat
857 nlm_nsm_stat(struct nlm_nsm *nsm, int32_t *out_stat)
858 {
859 struct sm_name args;
860 struct sm_stat_res res;
861 enum clnt_stat stat;
862
863 args.mon_name = uts_nodename();
864 bzero(&res, sizeof (res));
865
866 sema_p(&nsm->ns_sem);
867 nlm_nsm_clnt_init(nsm->ns_handle, nsm);
868 stat = sm_stat_1(&args, &res, nsm->ns_handle);
869 sema_v(&nsm->ns_sem);
870
871 if (stat == RPC_SUCCESS)
872 *out_stat = res.state;
873
874 return (stat);
875 }
876
877 static enum clnt_stat
878 nlm_nsm_mon(struct nlm_nsm *nsm, char *hostname, uint16_t priv)
879 {
880 struct mon args;
881 struct sm_stat_res res;
882 enum clnt_stat stat;
883
884 bzero(&args, sizeof (args));
885 bzero(&res, sizeof (res));
886
887 args.mon_id.mon_name = hostname;
888 args.mon_id.my_id.my_name = uts_nodename();
889 args.mon_id.my_id.my_prog = NLM_PROG;
890 args.mon_id.my_id.my_vers = NLM_SM;
891 args.mon_id.my_id.my_proc = NLM_SM_NOTIFY1;
892 bcopy(&priv, args.priv, sizeof (priv));
893
894 sema_p(&nsm->ns_sem);
895 nlm_nsm_clnt_init(nsm->ns_handle, nsm);
896 stat = sm_mon_1(&args, &res, nsm->ns_handle);
897 sema_v(&nsm->ns_sem);
898
899 return (stat);
900 }
901
902 static enum clnt_stat
903 nlm_nsm_unmon(struct nlm_nsm *nsm, char *hostname)
904 {
905 struct mon_id args;
906 struct sm_stat res;
907 enum clnt_stat stat;
908
909 bzero(&args, sizeof (args));
910 bzero(&res, sizeof (res));
911
912 args.mon_name = hostname;
913 args.my_id.my_name = uts_nodename();
914 args.my_id.my_prog = NLM_PROG;
915 args.my_id.my_vers = NLM_SM;
916 args.my_id.my_proc = NLM_SM_NOTIFY1;
917
918 sema_p(&nsm->ns_sem);
919 nlm_nsm_clnt_init(nsm->ns_handle, nsm);
920 stat = sm_unmon_1(&args, &res, nsm->ns_handle);
921 sema_v(&nsm->ns_sem);
922
923 return (stat);
924 }
925
926 static enum clnt_stat
927 nlm_nsmaddr_reg(struct nlm_nsm *nsm, char *name, int family, netobj *address)
928 {
929 struct reg1args args = { 0 };
930 struct reg1res res = { 0 };
931 enum clnt_stat stat;
932
933 args.family = family;
934 args.name = name;
935 args.address = *address;
936
937 sema_p(&nsm->ns_sem);
938 nlm_nsm_clnt_init(nsm->ns_addr_handle, nsm);
939 stat = nsmaddrproc1_reg_1(&args, &res, nsm->ns_addr_handle);
940 sema_v(&nsm->ns_sem);
941
942 return (stat);
943 }
944
945 /*
946 * Get NLM vhold object corresponding to vnode "vp".
947 * If no such object was found, create a new one.
948 *
949 * The purpose of this function is to associate vhold
950 * object with given vnode, so that:
951 * 1) vnode is hold (VN_HOLD) while vhold object is alive.
952 * 2) host has a track of all vnodes it touched by lock
953 * or share operations. These vnodes are accessible
954 * via collection of vhold objects.
955 */
956 struct nlm_vhold *
957 nlm_vhold_get(struct nlm_host *hostp, vnode_t *vp)
958 {
959 struct nlm_vhold *nvp, *new_nvp = NULL;
960
961 mutex_enter(&hostp->nh_lock);
962 nvp = nlm_vhold_find_locked(hostp, vp);
963 if (nvp != NULL)
964 goto out;
965
966 /* nlm_vhold wasn't found, then create a new one */
967 mutex_exit(&hostp->nh_lock);
968 new_nvp = kmem_cache_alloc(nlm_vhold_cache, KM_SLEEP);
969
970 /*
971 * Check if another thread has already
972 * created the same nlm_vhold.
973 */
974 mutex_enter(&hostp->nh_lock);
975 nvp = nlm_vhold_find_locked(hostp, vp);
976 if (nvp == NULL) {
977 nvp = new_nvp;
978 new_nvp = NULL;
979
980 TAILQ_INIT(&nvp->nv_slreqs);
981 nvp->nv_vp = vp;
982 nvp->nv_refcnt = 1;
983 VN_HOLD(nvp->nv_vp);
984
985 VERIFY(mod_hash_insert(hostp->nh_vholds_by_vp,
986 (mod_hash_key_t)vp, (mod_hash_val_t)nvp) == 0);
987 TAILQ_INSERT_TAIL(&hostp->nh_vholds_list, nvp, nv_link);
988 }
989
990 out:
991 mutex_exit(&hostp->nh_lock);
992 if (new_nvp != NULL)
993 kmem_cache_free(nlm_vhold_cache, new_nvp);
994
995 return (nvp);
996 }
997
998 /*
999 * Drop a reference to vhold object nvp.
1000 */
1001 void
1002 nlm_vhold_release(struct nlm_host *hostp, struct nlm_vhold *nvp)
1003 {
1004 if (nvp == NULL)
1005 return;
1006
1007 mutex_enter(&hostp->nh_lock);
1008 ASSERT(nvp->nv_refcnt > 0);
1009 nvp->nv_refcnt--;
1010 mutex_exit(&hostp->nh_lock);
1011 }
1012
1013 /*
1014 * Clean all locks and share reservations on the
1015 * given vhold object that were acquired by the
1016 * given sysid
1017 */
1018 static void
1019 nlm_vhold_clean(struct nlm_vhold *nvp, int sysid)
1020 {
1021 cleanlocks(nvp->nv_vp, IGN_PID, sysid);
1022 cleanshares_by_sysid(nvp->nv_vp, sysid);
1023 }
1024
1025 static void
1026 nlm_vhold_destroy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1027 {
1028 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1029
1030 VERIFY(mod_hash_remove(hostp->nh_vholds_by_vp,
1031 (mod_hash_key_t)nvp->nv_vp,
1032 (mod_hash_val_t)&nvp) == 0);
1033
1034 TAILQ_REMOVE(&hostp->nh_vholds_list, nvp, nv_link);
1035 VN_RELE(nvp->nv_vp);
1036 nvp->nv_vp = NULL;
1037
1038 kmem_cache_free(nlm_vhold_cache, nvp);
1039 }
1040
1041 /*
1042 * Return TRUE if the given vhold is busy.
1043 * Vhold object is considered to be "busy" when
1044 * all the following conditions hold:
1045 * 1) No one uses it at the moment;
1046 * 2) It hasn't any locks;
1047 * 3) It hasn't any share reservations;
1048 */
1049 static bool_t
1050 nlm_vhold_busy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1051 {
1052 vnode_t *vp;
1053 int sysid;
1054
1055 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1056
1057 if (nvp->nv_refcnt > 0)
1058 return (TRUE);
1059
1060 vp = nvp->nv_vp;
1061 sysid = hostp->nh_sysid;
1062 if (flk_has_remote_locks_for_sysid(vp, sysid) ||
1063 shr_has_remote_shares(vp, sysid))
1064 return (TRUE);
1065
1066 return (FALSE);
1067 }
1068
1069 /* ARGSUSED */
1070 static int
1071 nlm_vhold_ctor(void *datap, void *cdrarg, int kmflags)
1072 {
1073 struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1074
1075 bzero(nvp, sizeof (*nvp));
1076 return (0);
1077 }
1078
1079 /* ARGSUSED */
1080 static void
1081 nlm_vhold_dtor(void *datap, void *cdrarg)
1082 {
1083 struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1084
1085 ASSERT(nvp->nv_refcnt == 0);
1086 ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1087 ASSERT(nvp->nv_vp == NULL);
1088 }
1089
1090 struct nlm_vhold *
1091 nlm_vhold_find_locked(struct nlm_host *hostp, const vnode_t *vp)
1092 {
1093 struct nlm_vhold *nvp = NULL;
1094
1095 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1096 (void) mod_hash_find(hostp->nh_vholds_by_vp,
1097 (mod_hash_key_t)vp,
1098 (mod_hash_val_t)&nvp);
1099
1100 if (nvp != NULL)
1101 nvp->nv_refcnt++;
1102
1103 return (nvp);
1104 }
1105
1106 /*
1107 * NLM host functions
1108 */
1109 static void
1110 nlm_copy_netbuf(struct netbuf *dst, struct netbuf *src)
1111 {
1112 ASSERT(src->len <= src->maxlen);
1113
1114 dst->maxlen = src->maxlen;
1115 dst->len = src->len;
1116 dst->buf = kmem_zalloc(src->maxlen, KM_SLEEP);
1117 bcopy(src->buf, dst->buf, src->len);
1118 }
1119
1120 /* ARGSUSED */
1121 static int
1122 nlm_host_ctor(void *datap, void *cdrarg, int kmflags)
1123 {
1124 struct nlm_host *hostp = (struct nlm_host *)datap;
1125
1126 bzero(hostp, sizeof (*hostp));
1127 return (0);
1128 }
1129
1130 /* ARGSUSED */
1131 static void
1132 nlm_host_dtor(void *datap, void *cdrarg)
1133 {
1134 struct nlm_host *hostp = (struct nlm_host *)datap;
1135 ASSERT(hostp->nh_refs == 0);
1136 }
1137
1138 static void
1139 nlm_host_unregister(struct nlm_globals *g, struct nlm_host *hostp)
1140 {
1141 ASSERT(hostp->nh_refs == 0);
1142
1143 avl_remove(&g->nlm_hosts_tree, hostp);
1144 VERIFY(mod_hash_remove(g->nlm_hosts_hash,
1145 (mod_hash_key_t)(uintptr_t)hostp->nh_sysid,
1146 (mod_hash_val_t)&hostp) == 0);
1147 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1148 hostp->nh_flags &= ~NLM_NH_INIDLE;
1149 }
1150
1151 /*
1152 * Free resources used by a host. This is called after the reference
1153 * count has reached zero so it doesn't need to worry about locks.
1154 */
1155 static void
1156 nlm_host_destroy(struct nlm_host *hostp)
1157 {
1158 ASSERT(hostp->nh_name != NULL);
1159 ASSERT(hostp->nh_netid != NULL);
1160 ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1161
1162 strfree(hostp->nh_name);
1163 strfree(hostp->nh_netid);
1164 kmem_free(hostp->nh_addr.buf, hostp->nh_addr.maxlen);
1165
1166 if (hostp->nh_sysid != LM_NOSYSID)
1167 nlm_sysid_free(hostp->nh_sysid);
1168
1169 nlm_rpc_cache_destroy(hostp);
1170
1171 ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1172 mod_hash_destroy_ptrhash(hostp->nh_vholds_by_vp);
1173
1174 mutex_destroy(&hostp->nh_lock);
1175 cv_destroy(&hostp->nh_rpcb_cv);
1176 cv_destroy(&hostp->nh_recl_cv);
1177
1178 kmem_cache_free(nlm_hosts_cache, hostp);
1179 }
1180
1181 /*
1182 * Cleanup SERVER-side state after a client restarts,
1183 * or becomes unresponsive, or whatever.
1184 *
1185 * We unlock any active locks owned by the host.
1186 * When rpc.lockd is shutting down,
1187 * this function is called with newstate set to zero
1188 * which allows us to cancel any pending async locks
1189 * and clear the locking state.
1190 *
1191 * When "state" is 0, we don't update host's state,
1192 * but cleanup all remote locks on the host.
1193 * It's useful to call this function for resources
1194 * cleanup.
1195 */
1196 void
1197 nlm_host_notify_server(struct nlm_host *hostp, int32_t state)
1198 {
1199 struct nlm_vhold *nvp;
1200 struct nlm_slreq *slr;
1201 struct nlm_slreq_list slreqs2free;
1202
1203 TAILQ_INIT(&slreqs2free);
1204 mutex_enter(&hostp->nh_lock);
1205 if (state != 0)
1206 hostp->nh_state = state;
1207
1208 TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
1209
1210 /* cleanup sleeping requests at first */
1211 while ((slr = TAILQ_FIRST(&nvp->nv_slreqs)) != NULL) {
1212 TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
1213
1214 /*
1215 * Instead of freeing cancelled sleeping request
1216 * here, we add it to the linked list created
1217 * on the stack in order to do all frees outside
1218 * the critical section.
1219 */
1220 TAILQ_INSERT_TAIL(&slreqs2free, slr, nsr_link);
1221 }
1222
1223 nvp->nv_refcnt++;
1224 mutex_exit(&hostp->nh_lock);
1225
1226 nlm_vhold_clean(nvp, hostp->nh_sysid);
1227
1228 mutex_enter(&hostp->nh_lock);
1229 nvp->nv_refcnt--;
1230 }
1231
1232 mutex_exit(&hostp->nh_lock);
1233 while ((slr = TAILQ_FIRST(&slreqs2free)) != NULL) {
1234 TAILQ_REMOVE(&slreqs2free, slr, nsr_link);
1235 kmem_free(slr, sizeof (*slr));
1236 }
1237 }
1238
1239 /*
1240 * Cleanup CLIENT-side state after a server restarts,
1241 * or becomes unresponsive, or whatever.
1242 *
1243 * This is called by the local NFS statd when we receive a
1244 * host state change notification. (also nlm_svc_stopping)
1245 *
1246 * Deal with a server restart. If we are stopping the
1247 * NLM service, we'll have newstate == 0, and will just
1248 * cancel all our client-side lock requests. Otherwise,
1249 * start the "recovery" process to reclaim any locks
1250 * we hold on this server.
1251 */
1252 void
1253 nlm_host_notify_client(struct nlm_host *hostp, int32_t state)
1254 {
1255 mutex_enter(&hostp->nh_lock);
1256 hostp->nh_state = state;
1257 if (hostp->nh_flags & NLM_NH_RECLAIM) {
1258 /*
1259 * Either host's state is up to date or
1260 * host is already in recovery.
1261 */
1262 mutex_exit(&hostp->nh_lock);
1263 return;
1264 }
1265
1266 hostp->nh_flags |= NLM_NH_RECLAIM;
1267
1268 /*
1269 * Host will be released by the recovery thread,
1270 * thus we need to increment refcount.
1271 */
1272 hostp->nh_refs++;
1273 mutex_exit(&hostp->nh_lock);
1274
1275 (void) zthread_create(NULL, 0, nlm_reclaimer,
1276 hostp, 0, minclsyspri);
1277 }
1278
1279 /*
1280 * The function is called when NLM client detects that
1281 * server has entered in grace period and client needs
1282 * to wait until reclamation process (if any) does
1283 * its job.
1284 */
1285 int
1286 nlm_host_wait_grace(struct nlm_host *hostp)
1287 {
1288 struct nlm_globals *g;
1289 int error = 0;
1290
1291 g = zone_getspecific(nlm_zone_key, curzone);
1292 mutex_enter(&hostp->nh_lock);
1293
1294 do {
1295 int rc;
1296
1297 rc = cv_timedwait_sig(&hostp->nh_recl_cv,
1298 &hostp->nh_lock, ddi_get_lbolt() +
1299 SEC_TO_TICK(g->retrans_tmo));
1300
1301 if (rc == 0) {
1302 error = EINTR;
1303 break;
1304 }
1305 } while (hostp->nh_flags & NLM_NH_RECLAIM);
1306
1307 mutex_exit(&hostp->nh_lock);
1308 return (error);
1309 }
1310
1311 /*
1312 * Create a new NLM host.
1313 *
1314 * NOTE: The in-kernel RPC (kRPC) subsystem uses TLI/XTI,
1315 * which needs both a knetconfig and an address when creating
1316 * endpoints. Thus host object stores both knetconfig and
1317 * netid.
1318 */
1319 static struct nlm_host *
1320 nlm_host_create(char *name, const char *netid,
1321 struct knetconfig *knc, struct netbuf *naddr)
1322 {
1323 struct nlm_host *host;
1324
1325 host = kmem_cache_alloc(nlm_hosts_cache, KM_SLEEP);
1326
1327 mutex_init(&host->nh_lock, NULL, MUTEX_DEFAULT, NULL);
1328 cv_init(&host->nh_rpcb_cv, NULL, CV_DEFAULT, NULL);
1329 cv_init(&host->nh_recl_cv, NULL, CV_DEFAULT, NULL);
1330
1331 host->nh_sysid = LM_NOSYSID;
1332 host->nh_refs = 1;
1333 host->nh_name = strdup(name);
1334 host->nh_netid = strdup(netid);
1335 host->nh_knc = *knc;
1336 nlm_copy_netbuf(&host->nh_addr, naddr);
1337
1338 host->nh_state = 0;
1339 host->nh_rpcb_state = NRPCB_NEED_UPDATE;
1340 host->nh_flags = 0;
1341
1342 host->nh_vholds_by_vp = mod_hash_create_ptrhash("nlm vholds hash",
1343 32, mod_hash_null_valdtor, sizeof (vnode_t));
1344
1345 TAILQ_INIT(&host->nh_vholds_list);
1346 TAILQ_INIT(&host->nh_rpchc);
1347
1348 return (host);
1349 }
1350
1351 /*
1352 * Cancel all client side sleeping locks owned by given host.
1353 */
1354 void
1355 nlm_host_cancel_slocks(struct nlm_globals *g, struct nlm_host *hostp)
1356 {
1357 struct nlm_slock *nslp;
1358
1359 mutex_enter(&g->lock);
1360 TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
1361 if (nslp->nsl_host == hostp) {
1362 nslp->nsl_state = NLM_SL_CANCELLED;
1363 cv_broadcast(&nslp->nsl_cond);
1364 }
1365 }
1366
1367 mutex_exit(&g->lock);
1368 }
1369
1370 /*
1371 * Garbage collect stale vhold objects.
1372 *
1373 * In other words check whether vnodes that are
1374 * held by vhold objects still have any locks
1375 * or shares or still in use. If they aren't,
1376 * just destroy them.
1377 */
1378 static void
1379 nlm_host_gc_vholds(struct nlm_host *hostp)
1380 {
1381 struct nlm_vhold *nvp;
1382
1383 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1384
1385 nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
1386 while (nvp != NULL) {
1387 struct nlm_vhold *nvp_tmp;
1388
1389 if (nlm_vhold_busy(hostp, nvp)) {
1390 nvp = TAILQ_NEXT(nvp, nv_link);
1391 continue;
1392 }
1393
1394 nvp_tmp = TAILQ_NEXT(nvp, nv_link);
1395 nlm_vhold_destroy(hostp, nvp);
1396 nvp = nvp_tmp;
1397 }
1398 }
1399
1400 /*
1401 * Check whether the given host has any
1402 * server side locks or share reservations.
1403 */
1404 static bool_t
1405 nlm_host_has_srv_locks(struct nlm_host *hostp)
1406 {
1407 /*
1408 * It's cheap and simple: if server has
1409 * any locks/shares there must be vhold
1410 * object storing the affected vnode.
1411 *
1412 * NOTE: We don't need to check sleeping
1413 * locks on the server side, because if
1414 * server side sleeping lock is alive,
1415 * there must be a vhold object corresponding
1416 * to target vnode.
1417 */
1418 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1419 if (!TAILQ_EMPTY(&hostp->nh_vholds_list))
1420 return (TRUE);
1421
1422 return (FALSE);
1423 }
1424
1425 /*
1426 * Check whether the given host has any client side
1427 * locks or share reservations.
1428 */
1429 static bool_t
1430 nlm_host_has_cli_locks(struct nlm_host *hostp)
1431 {
1432 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1433
1434 /*
1435 * XXX: It's not the way I'd like to do the check,
1436 * because flk_sysid_has_locks() can be very
1437 * expensive by design. Unfortunatelly it iterates
1438 * through all locks on the system, doesn't matter
1439 * were they made on remote system via NLM or
1440 * on local system via reclock. To understand the
1441 * problem, consider that there're dozens of thousands
1442 * of locks that are made on some ZFS dataset. And there's
1443 * another dataset shared by NFS where NLM client had locks
1444 * some time ago, but doesn't have them now.
1445 * In this case flk_sysid_has_locks() will iterate
1446 * thrught dozens of thousands locks until it returns us
1447 * FALSE.
1448 * Oh, I hope that in shiny future somebody will make
1449 * local lock manager (os/flock.c) better, so that
1450 * it'd be more friedly to remote locks and
1451 * flk_sysid_has_locks() wouldn't be so expensive.
1452 */
1453 if (flk_sysid_has_locks(hostp->nh_sysid |
1454 LM_SYSID_CLIENT, FLK_QUERY_ACTIVE))
1455 return (TRUE);
1456
1457 /*
1458 * Check whether host has any share reservations
1459 * registered on the client side.
1460 */
1461 if (hostp->nh_shrlist != NULL)
1462 return (TRUE);
1463
1464 return (FALSE);
1465 }
1466
1467 /*
1468 * Determine whether the given host owns any
1469 * locks or share reservations.
1470 */
1471 static bool_t
1472 nlm_host_has_locks(struct nlm_host *hostp)
1473 {
1474 if (nlm_host_has_srv_locks(hostp))
1475 return (TRUE);
1476
1477 return (nlm_host_has_cli_locks(hostp));
1478 }
1479
1480 /*
1481 * This function compares only addresses of two netbufs
1482 * that belong to NC_TCP[6] or NC_UDP[6] protofamily.
1483 * Port part of netbuf is ignored.
1484 *
1485 * Return values:
1486 * -1: nb1's address is "smaller" than nb2's
1487 * 0: addresses are equal
1488 * 1: nb1's address is "greater" than nb2's
1489 */
1490 static int
1491 nlm_netbuf_addrs_cmp(struct netbuf *nb1, struct netbuf *nb2)
1492 {
1493 union nlm_addr {
1494 struct sockaddr sa;
1495 struct sockaddr_in sin;
1496 struct sockaddr_in6 sin6;
1497 } *na1, *na2;
1498 int res;
1499
1500 /* LINTED E_BAD_PTR_CAST_ALIGN */
1501 na1 = (union nlm_addr *)nb1->buf;
1502 /* LINTED E_BAD_PTR_CAST_ALIGN */
1503 na2 = (union nlm_addr *)nb2->buf;
1504
1505 if (na1->sa.sa_family < na2->sa.sa_family)
1506 return (-1);
1507 if (na1->sa.sa_family > na2->sa.sa_family)
1508 return (1);
1509
1510 switch (na1->sa.sa_family) {
1511 case AF_INET:
1512 res = memcmp(&na1->sin.sin_addr, &na2->sin.sin_addr,
1513 sizeof (na1->sin.sin_addr));
1514 break;
1515 case AF_INET6:
1516 res = memcmp(&na1->sin6.sin6_addr, &na2->sin6.sin6_addr,
1517 sizeof (na1->sin6.sin6_addr));
1518 break;
1519 default:
1520 VERIFY(0);
1521 return (0);
1522 }
1523
1524 return (SIGN(res));
1525 }
1526
1527 /*
1528 * Compare two nlm hosts.
1529 * Return values:
1530 * -1: host1 is "smaller" than host2
1531 * 0: host1 is equal to host2
1532 * 1: host1 is "greater" than host2
1533 */
1534 int
1535 nlm_host_cmp(const void *p1, const void *p2)
1536 {
1537 struct nlm_host *h1 = (struct nlm_host *)p1;
1538 struct nlm_host *h2 = (struct nlm_host *)p2;
1539 int res;
1540
1541 res = strcmp(h1->nh_netid, h2->nh_netid);
1542 if (res != 0)
1543 return (SIGN(res));
1544
1545 res = nlm_netbuf_addrs_cmp(&h1->nh_addr, &h2->nh_addr);
1546 return (res);
1547 }
1548
1549 /*
1550 * Find the host specified by... (see below)
1551 * If found, increment the ref count.
1552 */
1553 static struct nlm_host *
1554 nlm_host_find_locked(struct nlm_globals *g, const char *netid,
1555 struct netbuf *naddr, avl_index_t *wherep)
1556 {
1557 struct nlm_host *hostp, key;
1558 avl_index_t pos;
1559
1560 ASSERT(MUTEX_HELD(&g->lock));
1561
1562 key.nh_netid = (char *)netid;
1563 key.nh_addr.buf = naddr->buf;
1564 key.nh_addr.len = naddr->len;
1565 key.nh_addr.maxlen = naddr->maxlen;
1566
1567 hostp = avl_find(&g->nlm_hosts_tree, &key, &pos);
1568
1569 if (hostp != NULL) {
1570 /*
1571 * Host is inuse now. Remove it from idle
1572 * hosts list if needed.
1573 */
1574 if (hostp->nh_flags & NLM_NH_INIDLE) {
1575 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1576 hostp->nh_flags &= ~NLM_NH_INIDLE;
1577 }
1578
1579 hostp->nh_refs++;
1580 }
1581 if (wherep != NULL)
1582 *wherep = pos;
1583
1584 return (hostp);
1585 }
1586
1587 /*
1588 * Find NLM host for the given name and address.
1589 */
1590 struct nlm_host *
1591 nlm_host_find(struct nlm_globals *g, const char *netid,
1592 struct netbuf *addr)
1593 {
1594 struct nlm_host *hostp = NULL;
1595
1596 mutex_enter(&g->lock);
1597 if (g->run_status != NLM_ST_UP)
1598 goto out;
1599
1600 hostp = nlm_host_find_locked(g, netid, addr, NULL);
1601
1602 out:
1603 mutex_exit(&g->lock);
1604 return (hostp);
1605 }
1606
1607
1608 /*
1609 * Find or create an NLM host for the given name and address.
1610 *
1611 * The remote host is determined by all of: name, netid, address.
1612 * Note that the netid is whatever nlm_svc_add_ep() gave to
1613 * svc_tli_kcreate() for the service binding. If any of these
1614 * are different, allocate a new host (new sysid).
1615 */
1616 struct nlm_host *
1617 nlm_host_findcreate(struct nlm_globals *g, char *name,
1618 const char *netid, struct netbuf *addr)
1619 {
1620 int err;
1621 struct nlm_host *host, *newhost = NULL;
1622 struct knetconfig knc;
1623 avl_index_t where;
1624
1625 mutex_enter(&g->lock);
1626 if (g->run_status != NLM_ST_UP) {
1627 mutex_exit(&g->lock);
1628 return (NULL);
1629 }
1630
1631 host = nlm_host_find_locked(g, netid, addr, NULL);
1632 mutex_exit(&g->lock);
1633 if (host != NULL)
1634 return (host);
1635
1636 err = nlm_knc_from_netid(netid, &knc);
1637 if (err != 0)
1638 return (NULL);
1639 /*
1640 * Do allocations (etc.) outside of mutex,
1641 * and then check again before inserting.
1642 */
1643 newhost = nlm_host_create(name, netid, &knc, addr);
1644 newhost->nh_sysid = nlm_sysid_alloc();
1645 if (newhost->nh_sysid == LM_NOSYSID)
1646 goto out;
1647
1648 mutex_enter(&g->lock);
1649 host = nlm_host_find_locked(g, netid, addr, &where);
1650 if (host == NULL) {
1651 host = newhost;
1652 newhost = NULL;
1653
1654 /*
1655 * Insert host to the hosts AVL tree that is
1656 * used to lookup by <netid, address> pair.
1657 */
1658 avl_insert(&g->nlm_hosts_tree, host, where);
1659
1660 /*
1661 * Insert host to the hosts hash table that is
1662 * used to lookup host by sysid.
1663 */
1664 VERIFY(mod_hash_insert(g->nlm_hosts_hash,
1665 (mod_hash_key_t)(uintptr_t)host->nh_sysid,
1666 (mod_hash_val_t)host) == 0);
1667 }
1668
1669 mutex_exit(&g->lock);
1670
1671 out:
1672 if (newhost != NULL) {
1673 /*
1674 * We do not need the preallocated nlm_host
1675 * so decrement the reference counter
1676 * and destroy it.
1677 */
1678 newhost->nh_refs--;
1679 nlm_host_destroy(newhost);
1680 }
1681
1682 return (host);
1683 }
1684
1685 /*
1686 * Find the NLM host that matches the value of 'sysid'.
1687 * If found, return it with a new ref,
1688 * else return NULL.
1689 */
1690 struct nlm_host *
1691 nlm_host_find_by_sysid(struct nlm_globals *g, sysid_t sysid)
1692 {
1693 struct nlm_host *hostp = NULL;
1694
1695 mutex_enter(&g->lock);
1696 if (g->run_status != NLM_ST_UP)
1697 goto out;
1698
1699 (void) mod_hash_find(g->nlm_hosts_hash,
1700 (mod_hash_key_t)(uintptr_t)sysid,
1701 (mod_hash_val_t)&hostp);
1702
1703 if (hostp == NULL)
1704 goto out;
1705
1706 /*
1707 * Host is inuse now. Remove it
1708 * from idle hosts list if needed.
1709 */
1710 if (hostp->nh_flags & NLM_NH_INIDLE) {
1711 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1712 hostp->nh_flags &= ~NLM_NH_INIDLE;
1713 }
1714
1715 hostp->nh_refs++;
1716
1717 out:
1718 mutex_exit(&g->lock);
1719 return (hostp);
1720 }
1721
1722 /*
1723 * Release the given host.
1724 * I.e. drop a reference that was taken earlier by one of
1725 * the following functions: nlm_host_findcreate(), nlm_host_find(),
1726 * nlm_host_find_by_sysid().
1727 *
1728 * When the very last reference is dropped, host is moved to
1729 * so-called "idle state". All hosts that are in idle state
1730 * have an idle timeout. If timeout is expired, GC thread
1731 * checks whether hosts have any locks and if they heven't
1732 * any, it removes them.
1733 * NOTE: only unused hosts can be in idle state.
1734 */
1735 void
1736 nlm_host_release(struct nlm_globals *g, struct nlm_host *hostp)
1737 {
1738 if (hostp == NULL)
1739 return;
1740
1741 mutex_enter(&g->lock);
1742 ASSERT(hostp->nh_refs > 0);
1743
1744 hostp->nh_refs--;
1745 if (hostp->nh_refs != 0) {
1746 mutex_exit(&g->lock);
1747 return;
1748 }
1749
1750 /*
1751 * The very last reference to the host was dropped,
1752 * thus host is unused now. Set its idle timeout
1753 * and move it to the idle hosts LRU list.
1754 */
1755 hostp->nh_idle_timeout = ddi_get_lbolt() +
1756 SEC_TO_TICK(g->cn_idle_tmo);
1757
1758 ASSERT((hostp->nh_flags & NLM_NH_INIDLE) == 0);
1759 TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, hostp, nh_link);
1760 hostp->nh_flags |= NLM_NH_INIDLE;
1761 mutex_exit(&g->lock);
1762 }
1763
1764 /*
1765 * Unregister this NLM host (NFS client) with the local statd
1766 * due to idleness (no locks held for a while).
1767 */
1768 void
1769 nlm_host_unmonitor(struct nlm_globals *g, struct nlm_host *host)
1770 {
1771 enum clnt_stat stat;
1772
1773 VERIFY(host->nh_refs == 0);
1774 if (!(host->nh_flags & NLM_NH_MONITORED))
1775 return;
1776
1777 host->nh_flags &= ~NLM_NH_MONITORED;
1778 stat = nlm_nsm_unmon(&g->nlm_nsm, host->nh_name);
1779 if (stat != RPC_SUCCESS) {
1780 NLM_WARN("NLM: Failed to contact statd, stat=%d\n", stat);
1781 return;
1782 }
1783 }
1784
1785 /*
1786 * Ask the local NFS statd to begin monitoring this host.
1787 * It will call us back when that host restarts, using the
1788 * prog,vers,proc specified below, i.e. NLM_SM_NOTIFY1,
1789 * which is handled in nlm_do_notify1().
1790 */
1791 void
1792 nlm_host_monitor(struct nlm_globals *g, struct nlm_host *host, int state)
1793 {
1794 int family;
1795 netobj obj;
1796 enum clnt_stat stat;
1797
1798 if (state != 0 && host->nh_state == 0) {
1799 /*
1800 * This is the first time we have seen an NSM state
1801 * Value for this host. We record it here to help
1802 * detect host reboots.
1803 */
1804 host->nh_state = state;
1805 }
1806
1807 mutex_enter(&host->nh_lock);
1808 if (host->nh_flags & NLM_NH_MONITORED) {
1809 mutex_exit(&host->nh_lock);
1810 return;
1811 }
1812
1813 host->nh_flags |= NLM_NH_MONITORED;
1814 mutex_exit(&host->nh_lock);
1815
1816 /*
1817 * Before we begin monitoring the host register the network address
1818 * associated with this hostname.
1819 */
1820 nlm_netbuf_to_netobj(&host->nh_addr, &family, &obj);
1821 stat = nlm_nsmaddr_reg(&g->nlm_nsm, host->nh_name, family, &obj);
1822 if (stat != RPC_SUCCESS) {
1823 NLM_WARN("Failed to register address, stat=%d\n", stat);
1824 mutex_enter(&g->lock);
1825 host->nh_flags &= ~NLM_NH_MONITORED;
1826 mutex_exit(&g->lock);
1827
1828 return;
1829 }
1830
1831 /*
1832 * Tell statd how to call us with status updates for
1833 * this host. Updates arrive via nlm_do_notify1().
1834 *
1835 * We put our assigned system ID value in the priv field to
1836 * make it simpler to find the host if we are notified of a
1837 * host restart.
1838 */
1839 stat = nlm_nsm_mon(&g->nlm_nsm, host->nh_name, host->nh_sysid);
1840 if (stat != RPC_SUCCESS) {
1841 NLM_WARN("Failed to contact local NSM, stat=%d\n", stat);
1842 mutex_enter(&g->lock);
1843 host->nh_flags &= ~NLM_NH_MONITORED;
1844 mutex_exit(&g->lock);
1845
1846 return;
1847 }
1848 }
1849
1850 int
1851 nlm_host_get_state(struct nlm_host *hostp)
1852 {
1853
1854 return (hostp->nh_state);
1855 }
1856
1857 /*
1858 * NLM client/server sleeping locks
1859 */
1860
1861 /*
1862 * Register client side sleeping lock.
1863 *
1864 * Our client code calls this to keep information
1865 * about sleeping lock somewhere. When it receives
1866 * grant callback from server or when it just
1867 * needs to remove all sleeping locks from vnode,
1868 * it uses this information for remove/apply lock
1869 * properly.
1870 */
1871 struct nlm_slock *
1872 nlm_slock_register(
1873 struct nlm_globals *g,
1874 struct nlm_host *host,
1875 struct nlm4_lock *lock,
1876 struct vnode *vp)
1877 {
1878 struct nlm_slock *nslp;
1879
1880 nslp = kmem_zalloc(sizeof (*nslp), KM_SLEEP);
1881 cv_init(&nslp->nsl_cond, NULL, CV_DEFAULT, NULL);
1882 nslp->nsl_lock = *lock;
1883 nlm_copy_netobj(&nslp->nsl_fh, &nslp->nsl_lock.fh);
1884 nslp->nsl_state = NLM_SL_BLOCKED;
1885 nslp->nsl_host = host;
1886 nslp->nsl_vp = vp;
1887
1888 mutex_enter(&g->lock);
1889 TAILQ_INSERT_TAIL(&g->nlm_slocks, nslp, nsl_link);
1890 mutex_exit(&g->lock);
1891
1892 return (nslp);
1893 }
1894
1895 /*
1896 * Remove this lock from the wait list and destroy it.
1897 */
1898 void
1899 nlm_slock_unregister(struct nlm_globals *g, struct nlm_slock *nslp)
1900 {
1901 mutex_enter(&g->lock);
1902 TAILQ_REMOVE(&g->nlm_slocks, nslp, nsl_link);
1903 mutex_exit(&g->lock);
1904
1905 kmem_free(nslp->nsl_fh.n_bytes, nslp->nsl_fh.n_len);
1906 cv_destroy(&nslp->nsl_cond);
1907 kmem_free(nslp, sizeof (*nslp));
1908 }
1909
1910 /*
1911 * Wait for a granted callback or cancellation event
1912 * for a sleeping lock.
1913 *
1914 * If a signal interrupted the wait or if the lock
1915 * was cancelled, return EINTR - the caller must arrange to send
1916 * a cancellation to the server.
1917 *
1918 * If timeout occurred, return ETIMEDOUT - the caller must
1919 * resend the lock request to the server.
1920 *
1921 * On success return 0.
1922 */
1923 int
1924 nlm_slock_wait(struct nlm_globals *g,
1925 struct nlm_slock *nslp, uint_t timeo_secs)
1926 {
1927 clock_t timeo_ticks;
1928 int cv_res, error;
1929
1930 /*
1931 * If the granted message arrived before we got here,
1932 * nslp->nsl_state will be NLM_SL_GRANTED - in that case don't sleep.
1933 */
1934 cv_res = 1;
1935 timeo_ticks = ddi_get_lbolt() + SEC_TO_TICK(timeo_secs);
1936
1937 mutex_enter(&g->lock);
1938 while (nslp->nsl_state == NLM_SL_BLOCKED && cv_res > 0) {
1939 cv_res = cv_timedwait_sig(&nslp->nsl_cond,
1940 &g->lock, timeo_ticks);
1941 }
1942
1943 /*
1944 * No matter why we wake up, if the lock was
1945 * cancelled, let the function caller to know
1946 * about it by returning EINTR.
1947 */
1948 if (nslp->nsl_state == NLM_SL_CANCELLED) {
1949 error = EINTR;
1950 goto out;
1951 }
1952
1953 if (cv_res <= 0) {
1954 /* We were woken up either by timeout or by interrupt */
1955 error = (cv_res < 0) ? ETIMEDOUT : EINTR;
1956
1957 /*
1958 * The granted message may arrive after the
1959 * interrupt/timeout but before we manage to lock the
1960 * mutex. Detect this by examining nslp.
1961 */
1962 if (nslp->nsl_state == NLM_SL_GRANTED)
1963 error = 0;
1964 } else { /* Awaken via cv_signal()/cv_broadcast() or didn't block */
1965 error = 0;
1966 VERIFY(nslp->nsl_state == NLM_SL_GRANTED);
1967 }
1968
1969 out:
1970 mutex_exit(&g->lock);
1971 return (error);
1972 }
1973
1974 /*
1975 * Mark client side sleeping lock as granted
1976 * and wake up a process blocked on the lock.
1977 * Called from server side NLM_GRANT handler.
1978 *
1979 * If sleeping lock is found return 0, otherwise
1980 * return ENOENT.
1981 */
1982 int
1983 nlm_slock_grant(struct nlm_globals *g,
1984 struct nlm_host *hostp, struct nlm4_lock *alock)
1985 {
1986 struct nlm_slock *nslp;
1987 int error = ENOENT;
1988
1989 mutex_enter(&g->lock);
1990 TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
1991 if ((nslp->nsl_state != NLM_SL_BLOCKED) ||
1992 (nslp->nsl_host != hostp))
1993 continue;
1994
1995 if (alock->svid == nslp->nsl_lock.svid &&
1996 alock->l_offset == nslp->nsl_lock.l_offset &&
1997 alock->l_len == nslp->nsl_lock.l_len &&
1998 alock->fh.n_len == nslp->nsl_lock.fh.n_len &&
1999 bcmp(alock->fh.n_bytes, nslp->nsl_lock.fh.n_bytes,
2000 nslp->nsl_lock.fh.n_len) == 0) {
2001 nslp->nsl_state = NLM_SL_GRANTED;
2002 cv_broadcast(&nslp->nsl_cond);
2003 error = 0;
2004 break;
2005 }
2006 }
2007
2008 mutex_exit(&g->lock);
2009 return (error);
2010 }
2011
2012 /*
2013 * Register sleeping lock request corresponding to
2014 * flp on the given vhold object.
2015 * On success function returns 0, otherwise (if
2016 * lock request with the same flp is already
2017 * registered) function returns EEXIST.
2018 */
2019 int
2020 nlm_slreq_register(struct nlm_host *hostp, struct nlm_vhold *nvp,
2021 struct flock64 *flp)
2022 {
2023 struct nlm_slreq *slr, *new_slr = NULL;
2024 int ret = EEXIST;
2025
2026 mutex_enter(&hostp->nh_lock);
2027 slr = nlm_slreq_find_locked(hostp, nvp, flp);
2028 if (slr != NULL)
2029 goto out;
2030
2031 mutex_exit(&hostp->nh_lock);
2032 new_slr = kmem_zalloc(sizeof (*slr), KM_SLEEP);
2033 bcopy(flp, &new_slr->nsr_fl, sizeof (*flp));
2034
2035 mutex_enter(&hostp->nh_lock);
2036 slr = nlm_slreq_find_locked(hostp, nvp, flp);
2037 if (slr == NULL) {
2038 slr = new_slr;
2039 new_slr = NULL;
2040 ret = 0;
2041
2042 TAILQ_INSERT_TAIL(&nvp->nv_slreqs, slr, nsr_link);
2043 }
2044
2045 out:
2046 mutex_exit(&hostp->nh_lock);
2047 if (new_slr != NULL)
2048 kmem_free(new_slr, sizeof (*new_slr));
2049
2050 return (ret);
2051 }
2052
2053 /*
2054 * Unregister sleeping lock request corresponding
2055 * to flp from the given vhold object.
2056 * On success function returns 0, otherwise (if
2057 * lock request corresponding to flp isn't found
2058 * on the given vhold) function returns ENOENT.
2059 */
2060 int
2061 nlm_slreq_unregister(struct nlm_host *hostp, struct nlm_vhold *nvp,
2062 struct flock64 *flp)
2063 {
2064 struct nlm_slreq *slr;
2065
2066 mutex_enter(&hostp->nh_lock);
2067 slr = nlm_slreq_find_locked(hostp, nvp, flp);
2068 if (slr == NULL) {
2069 mutex_exit(&hostp->nh_lock);
2070 return (ENOENT);
2071 }
2072
2073 TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
2074 mutex_exit(&hostp->nh_lock);
2075
2076 kmem_free(slr, sizeof (*slr));
2077 return (0);
2078 }
2079
2080 /*
2081 * Find sleeping lock request on the given vhold object by flp.
2082 */
2083 struct nlm_slreq *
2084 nlm_slreq_find_locked(struct nlm_host *hostp, struct nlm_vhold *nvp,
2085 struct flock64 *flp)
2086 {
2087 struct nlm_slreq *slr = NULL;
2088
2089 ASSERT(MUTEX_HELD(&hostp->nh_lock));
2090 TAILQ_FOREACH(slr, &nvp->nv_slreqs, nsr_link) {
2091 if (slr->nsr_fl.l_start == flp->l_start &&
2092 slr->nsr_fl.l_len == flp->l_len &&
2093 slr->nsr_fl.l_pid == flp->l_pid &&
2094 slr->nsr_fl.l_type == flp->l_type)
2095 break;
2096 }
2097
2098 return (slr);
2099 }
2100
2101 /*
2102 * NLM tracks active share reservations made on the client side.
2103 * It needs to have a track of share reservations for two purposes
2104 * 1) to determine if nlm_host is busy (if it has active locks and/or
2105 * share reservations, it is)
2106 * 2) to recover active share reservations when NLM server reports
2107 * that it has rebooted.
2108 *
2109 * Unfortunately Illumos local share reservations manager (see os/share.c)
2110 * doesn't have an ability to lookup all reservations on the system
2111 * by sysid (like local lock manager) or get all reservations by sysid.
2112 * It tracks reservations per vnode and is able to get/looup them
2113 * on particular vnode. It's not what NLM needs. Thus it has that ugly
2114 * share reservations tracking scheme.
2115 */
2116
2117 void
2118 nlm_shres_track(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2119 {
2120 struct nlm_shres *nsp, *nsp_new;
2121
2122 /*
2123 * NFS code must fill the s_owner, so that
2124 * s_own_len is never 0.
2125 */
2126 ASSERT(shrp->s_own_len > 0);
2127 nsp_new = nlm_shres_create_item(shrp, vp);
2128
2129 mutex_enter(&hostp->nh_lock);
2130 for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next)
2131 if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr))
2132 break;
2133
2134 if (nsp != NULL) {
2135 /*
2136 * Found a duplicate. Do nothing.
2137 */
2138
2139 goto out;
2140 }
2141
2142 nsp = nsp_new;
2143 nsp_new = NULL;
2144 nsp->ns_next = hostp->nh_shrlist;
2145 hostp->nh_shrlist = nsp;
2146
2147 out:
2148 mutex_exit(&hostp->nh_lock);
2149 if (nsp_new != NULL)
2150 nlm_shres_destroy_item(nsp_new);
2151 }
2152
2153 void
2154 nlm_shres_untrack(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2155 {
2156 struct nlm_shres *nsp, *nsp_prev = NULL;
2157
2158 mutex_enter(&hostp->nh_lock);
2159 nsp = hostp->nh_shrlist;
2160 while (nsp != NULL) {
2161 if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) {
2162 struct nlm_shres *nsp_del;
2163
2164 nsp_del = nsp;
2165 nsp = nsp->ns_next;
2166 if (nsp_prev != NULL)
2167 nsp_prev->ns_next = nsp;
2168 else
2169 hostp->nh_shrlist = nsp;
2170
2171 nlm_shres_destroy_item(nsp_del);
2172 continue;
2173 }
2174
2175 nsp_prev = nsp;
2176 nsp = nsp->ns_next;
2177 }
2178
2179 mutex_exit(&hostp->nh_lock);
2180 }
2181
2182 /*
2183 * Get a _copy_ of the list of all active share reservations
2184 * made by the given host.
2185 * NOTE: the list function returns _must_ be released using
2186 * nlm_free_shrlist().
2187 */
2188 struct nlm_shres *
2189 nlm_get_active_shres(struct nlm_host *hostp)
2190 {
2191 struct nlm_shres *nsp, *nslist = NULL;
2192
2193 mutex_enter(&hostp->nh_lock);
2194 for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) {
2195 struct nlm_shres *nsp_new;
2196
2197 nsp_new = nlm_shres_create_item(nsp->ns_shr, nsp->ns_vp);
2198 nsp_new->ns_next = nslist;
2199 nslist = nsp_new;
2200 }
2201
2202 mutex_exit(&hostp->nh_lock);
2203 return (nslist);
2204 }
2205
2206 /*
2207 * Free memory allocated for the active share reservations
2208 * list created by nlm_get_active_shres() function.
2209 */
2210 void
2211 nlm_free_shrlist(struct nlm_shres *nslist)
2212 {
2213 struct nlm_shres *nsp;
2214
2215 while (nslist != NULL) {
2216 nsp = nslist;
2217 nslist = nslist->ns_next;
2218
2219 nlm_shres_destroy_item(nsp);
2220 }
2221 }
2222
2223 static bool_t
2224 nlm_shres_equal(struct shrlock *shrp1, struct shrlock *shrp2)
2225 {
2226 if (shrp1->s_sysid == shrp2->s_sysid &&
2227 shrp1->s_pid == shrp2->s_pid &&
2228 shrp1->s_own_len == shrp2->s_own_len &&
2229 bcmp(shrp1->s_owner, shrp2->s_owner,
2230 shrp1->s_own_len) == 0)
2231 return (TRUE);
2232
2233 return (FALSE);
2234 }
2235
2236 static struct nlm_shres *
2237 nlm_shres_create_item(struct shrlock *shrp, vnode_t *vp)
2238 {
2239 struct nlm_shres *nsp;
2240
2241 nsp = kmem_alloc(sizeof (*nsp), KM_SLEEP);
2242 nsp->ns_shr = kmem_alloc(sizeof (*shrp), KM_SLEEP);
2243 bcopy(shrp, nsp->ns_shr, sizeof (*shrp));
2244 nsp->ns_shr->s_owner = kmem_alloc(shrp->s_own_len, KM_SLEEP);
2245 bcopy(shrp->s_owner, nsp->ns_shr->s_owner, shrp->s_own_len);
2246 nsp->ns_vp = vp;
2247
2248 return (nsp);
2249 }
2250
2251 static void
2252 nlm_shres_destroy_item(struct nlm_shres *nsp)
2253 {
2254 kmem_free(nsp->ns_shr->s_owner,
2255 nsp->ns_shr->s_own_len);
2256 kmem_free(nsp->ns_shr, sizeof (struct shrlock));
2257 kmem_free(nsp, sizeof (*nsp));
2258 }
2259
2260 /*
2261 * Called by klmmod.c when lockd adds a network endpoint
2262 * on which we should begin RPC services.
2263 */
2264 int
2265 nlm_svc_add_ep(struct file *fp, const char *netid, struct knetconfig *knc)
2266 {
2267 SVCMASTERXPRT *xprt = NULL;
2268 int error;
2269
2270 error = svc_tli_kcreate(fp, 0, (char *)netid, NULL, &xprt,
2271 &nlm_sct, NULL, NLM_SVCPOOL_ID, FALSE);
2272 if (error != 0)
2273 return (error);
2274
2275 (void) nlm_knc_to_netid(knc);
2276 return (0);
2277 }
2278
2279 /*
2280 * Start NLM service.
2281 */
2282 int
2283 nlm_svc_starting(struct nlm_globals *g, struct file *fp,
2284 const char *netid, struct knetconfig *knc)
2285 {
2286 int error;
2287 enum clnt_stat stat;
2288
2289 VERIFY(g->run_status == NLM_ST_STARTING);
2290 VERIFY(g->nlm_gc_thread == NULL);
2291
2292 error = nlm_nsm_init_local(&g->nlm_nsm);
2293 if (error != 0) {
2294 NLM_ERR("Failed to initialize NSM handler "
2295 "(error=%d)\n", error);
2296 g->run_status = NLM_ST_DOWN;
2297 return (error);
2298 }
2299
2300 error = EIO;
2301
2302 /*
2303 * Create an NLM garbage collector thread that will
2304 * clean up stale vholds and hosts objects.
2305 */
2306 g->nlm_gc_thread = zthread_create(NULL, 0, nlm_gc,
2307 g, 0, minclsyspri);
2308
2309 /*
2310 * Send SIMU_CRASH to local statd to report that
2311 * NLM started, so that statd can report other hosts
2312 * about NLM state change.
2313 */
2314
2315 stat = nlm_nsm_simu_crash(&g->nlm_nsm);
2316 if (stat != RPC_SUCCESS) {
2317 NLM_ERR("Failed to connect to local statd "
2318 "(rpcerr=%d)\n", stat);
2319 goto shutdown_lm;
2320 }
2321
2322 stat = nlm_nsm_stat(&g->nlm_nsm, &g->nsm_state);
2323 if (stat != RPC_SUCCESS) {
2324 NLM_ERR("Failed to get the status of local statd "
2325 "(rpcerr=%d)\n", stat);
2326 goto shutdown_lm;
2327 }
2328
2329 g->grace_threshold = ddi_get_lbolt() +
2330 SEC_TO_TICK(g->grace_period);
2331
2332 /* Register endpoint used for communications with local NLM */
2333 error = nlm_svc_add_ep(fp, netid, knc);
2334 if (error != 0)
2335 goto shutdown_lm;
2336
2337 (void) svc_pool_control(NLM_SVCPOOL_ID,
2338 SVCPSET_SHUTDOWN_PROC, (void *)nlm_pool_shutdown);
2339 g->run_status = NLM_ST_UP;
2340 return (0);
2341
2342 shutdown_lm:
2343 mutex_enter(&g->lock);
2344 g->run_status = NLM_ST_STOPPING;
2345 mutex_exit(&g->lock);
2346
2347 nlm_svc_stopping(g);
2348 return (error);
2349 }
2350
2351 /*
2352 * Called when the server pool is destroyed, so that
2353 * all transports are closed and no any server threads
2354 * exist.
2355 *
2356 * Just call lm_shutdown() to shut NLM down properly.
2357 */
2358 static void
2359 nlm_pool_shutdown(void)
2360 {
2361 (void) lm_shutdown();
2362 }
2363
2364 /*
2365 * Stop NLM service, cleanup all resources
2366 * NLM owns at the moment.
2367 *
2368 * NOTE: NFS code can call NLM while it's
2369 * stopping or even if it's shut down. Any attempt
2370 * to lock file either on client or on the server
2371 * will fail if NLM isn't in NLM_ST_UP state.
2372 */
2373 void
2374 nlm_svc_stopping(struct nlm_globals *g)
2375 {
2376 mutex_enter(&g->lock);
2377 ASSERT(g->run_status == NLM_ST_STOPPING);
2378
2379 /*
2380 * Ask NLM GC thread to exit and wait until it dies.
2381 */
2382 cv_signal(&g->nlm_gc_sched_cv);
2383 while (g->nlm_gc_thread != NULL)
2384 cv_wait(&g->nlm_gc_finish_cv, &g->lock);
2385
2386 mutex_exit(&g->lock);
2387
2388 /*
2389 * Cleanup locks owned by NLM hosts.
2390 * NOTE: New hosts won't be created while
2391 * NLM is stopping.
2392 */
2393 while (!avl_is_empty(&g->nlm_hosts_tree)) {
2394 struct nlm_host *hostp;
2395 int busy_hosts = 0;
2396
2397 /*
2398 * Iterate through all NLM hosts in the system
2399 * and drop the locks they own by force.
2400 */
2401 hostp = avl_first(&g->nlm_hosts_tree);
2402 while (hostp != NULL) {
2403 /* Cleanup all client and server side locks */
2404 nlm_client_cancel_all(g, hostp);
2405 nlm_host_notify_server(hostp, 0);
2406
2407 mutex_enter(&hostp->nh_lock);
2408 nlm_host_gc_vholds(hostp);
2409 if (hostp->nh_refs > 0 || nlm_host_has_locks(hostp)) {
2410 /*
2411 * Oh, it seems the host is still busy, let
2412 * it some time to release and go to the
2413 * next one.
2414 */
2415
2416 mutex_exit(&hostp->nh_lock);
2417 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2418 busy_hosts++;
2419 continue;
2420 }
2421
2422 mutex_exit(&hostp->nh_lock);
2423 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2424 }
2425
2426 /*
2427 * All hosts go to nlm_idle_hosts list after
2428 * all locks they own are cleaned up and last refereces
2429 * were dropped. Just destroy all hosts in nlm_idle_hosts
2430 * list, they can not be removed from there while we're
2431 * in stopping state.
2432 */
2433 while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
2434 nlm_host_unregister(g, hostp);
2435 nlm_host_destroy(hostp);
2436 }
2437
2438 if (busy_hosts > 0) {
2439 /*
2440 * There're some hosts that weren't cleaned
2441 * up. Probably they're in resource cleanup
2442 * process. Give them some time to do drop
2443 * references.
2444 */
2445 delay(MSEC_TO_TICK(500));
2446 }
2447 }
2448
2449 ASSERT(TAILQ_EMPTY(&g->nlm_slocks));
2450
2451 nlm_nsm_fini(&g->nlm_nsm);
2452 g->lockd_pid = 0;
2453 g->run_status = NLM_ST_DOWN;
2454 }
2455
2456 /*
2457 * Returns TRUE if the given vnode has
2458 * any active or sleeping locks.
2459 */
2460 int
2461 nlm_vp_active(const vnode_t *vp)
2462 {
2463 struct nlm_globals *g;
2464 struct nlm_host *hostp;
2465 struct nlm_vhold *nvp;
2466 int active = 0;
2467
2468 g = zone_getspecific(nlm_zone_key, curzone);
2469
2470 /*
2471 * Server side NLM has locks on the given vnode
2472 * if there exist a vhold object that holds
2473 * the given vnode "vp" in one of NLM hosts.
2474 */
2475 mutex_enter(&g->lock);
2476 hostp = avl_first(&g->nlm_hosts_tree);
2477 while (hostp != NULL) {
2478 mutex_enter(&hostp->nh_lock);
2479 nvp = nlm_vhold_find_locked(hostp, vp);
2480 mutex_exit(&hostp->nh_lock);
2481 if (nvp != NULL) {
2482 active = 1;
2483 break;
2484 }
2485
2486 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2487 }
2488
2489 mutex_exit(&g->lock);
2490 return (active);
2491 }
2492
2493 /*
2494 * Called right before NFS export is going to
2495 * dissapear. The function finds all vnodes
2496 * belonging to the given export and cleans
2497 * all remote locks and share reservations
2498 * on them.
2499 */
2500 void
2501 nlm_unexport(struct exportinfo *exi)
2502 {
2503 struct nlm_globals *g;
2504 struct nlm_host *hostp;
2505
2506 g = zone_getspecific(nlm_zone_key, curzone);
2507
2508 mutex_enter(&g->lock);
2509 hostp = avl_first(&g->nlm_hosts_tree);
2510 while (hostp != NULL) {
2511 struct nlm_vhold *nvp;
2512
2513 mutex_enter(&hostp->nh_lock);
2514 TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
2515 vnode_t *vp;
2516
2517 nvp->nv_refcnt++;
2518 mutex_exit(&hostp->nh_lock);
2519
2520 vp = nvp->nv_vp;
2521
2522 if (!EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid))
2523 goto next_iter;
2524
2525 /*
2526 * Ok, it we found out that vnode vp is under
2527 * control by the exportinfo exi, now we need
2528 * to drop all locks from this vnode, let's
2529 * do it.
2530 */
2531 nlm_vhold_clean(nvp, hostp->nh_sysid);
2532
2533 next_iter:
2534 mutex_enter(&hostp->nh_lock);
2535 nvp->nv_refcnt--;
2536 }
2537
2538 mutex_exit(&hostp->nh_lock);
2539 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2540 }
2541
2542 mutex_exit(&g->lock);
2543 }
2544
2545 /*
2546 * Allocate new unique sysid.
2547 * In case of failure (no available sysids)
2548 * return LM_NOSYSID.
2549 */
2550 sysid_t
2551 nlm_sysid_alloc(void)
2552 {
2553 sysid_t ret_sysid = LM_NOSYSID;
2554
2555 rw_enter(&lm_lck, RW_WRITER);
2556 if (nlm_sysid_nidx > LM_SYSID_MAX)
2557 nlm_sysid_nidx = LM_SYSID;
2558
2559 if (!BT_TEST(nlm_sysid_bmap, nlm_sysid_nidx)) {
2560 BT_SET(nlm_sysid_bmap, nlm_sysid_nidx);
2561 ret_sysid = nlm_sysid_nidx++;
2562 } else {
2563 index_t id;
2564
2565 id = bt_availbit(nlm_sysid_bmap, NLM_BMAP_NITEMS);
2566 if (id > 0) {
2567 nlm_sysid_nidx = id + 1;
2568 ret_sysid = id;
2569 BT_SET(nlm_sysid_bmap, id);
2570 }
2571 }
2572
2573 rw_exit(&lm_lck);
2574 return (ret_sysid);
2575 }
2576
2577 void
2578 nlm_sysid_free(sysid_t sysid)
2579 {
2580 ASSERT(sysid >= LM_SYSID && sysid <= LM_SYSID_MAX);
2581
2582 rw_enter(&lm_lck, RW_WRITER);
2583 ASSERT(BT_TEST(nlm_sysid_bmap, sysid));
2584 BT_CLEAR(nlm_sysid_bmap, sysid);
2585 rw_exit(&lm_lck);
2586 }
2587
2588 /*
2589 * Return true if the request came from a local caller.
2590 * By necessity, this "knows" the netid names invented
2591 * in lm_svc() and nlm_netid_from_knetconfig().
2592 */
2593 bool_t
2594 nlm_caller_is_local(SVCXPRT *transp)
2595 {
2596 char *netid;
2597 struct netbuf *rtaddr;
2598
2599 netid = svc_getnetid(transp);
2600 rtaddr = svc_getrpccaller(transp);
2601
2602 if (netid == NULL)
2603 return (FALSE);
2604
2605 if (strcmp(netid, "ticlts") == 0 ||
2606 strcmp(netid, "ticotsord") == 0)
2607 return (TRUE);
2608
2609 if (strcmp(netid, "tcp") == 0 || strcmp(netid, "udp") == 0) {
2610 struct sockaddr_in *sin = (void *)rtaddr->buf;
2611 if (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK))
2612 return (TRUE);
2613 }
2614 if (strcmp(netid, "tcp6") == 0 || strcmp(netid, "udp6") == 0) {
2615 struct sockaddr_in6 *sin6 = (void *)rtaddr->buf;
2616 if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
2617 return (TRUE);
2618 }
2619
2620 return (FALSE); /* unknown transport */
2621 }
2622
2623 /*
2624 * Get netid string correspondig to the given knetconfig.
2625 * If not done already, save knc->knc_rdev in our table.
2626 */
2627 const char *
2628 nlm_knc_to_netid(struct knetconfig *knc)
2629 {
2630 int i;
2631 dev_t rdev;
2632 struct nlm_knc *nc;
2633 const char *netid = NULL;
2634
2635 rw_enter(&lm_lck, RW_READER);
2636 for (i = 0; i < NLM_KNCS; i++) {
2637 nc = &nlm_netconfigs[i];
2638
2639 if (nc->n_knc.knc_semantics == knc->knc_semantics &&
2640 strcmp(nc->n_knc.knc_protofmly,
2641 knc->knc_protofmly) == 0) {
2642 netid = nc->n_netid;
2643 rdev = nc->n_knc.knc_rdev;
2644 break;
2645 }
2646 }
2647 rw_exit(&lm_lck);
2648
2649 if (netid != NULL && rdev == NODEV) {
2650 rw_enter(&lm_lck, RW_WRITER);
2651 if (nc->n_knc.knc_rdev == NODEV)
2652 nc->n_knc.knc_rdev = knc->knc_rdev;
2653 rw_exit(&lm_lck);
2654 }
2655
2656 return (netid);
2657 }
2658
2659 /*
2660 * Get a knetconfig corresponding to the given netid.
2661 * If there's no knetconfig for this netid, ENOENT
2662 * is returned.
2663 */
2664 int
2665 nlm_knc_from_netid(const char *netid, struct knetconfig *knc)
2666 {
2667 int i, ret;
2668
2669 ret = ENOENT;
2670 for (i = 0; i < NLM_KNCS; i++) {
2671 struct nlm_knc *nknc;
2672
2673 nknc = &nlm_netconfigs[i];
2674 if (strcmp(netid, nknc->n_netid) == 0 &&
2675 nknc->n_knc.knc_rdev != NODEV) {
2676 *knc = nknc->n_knc;
2677 ret = 0;
2678 break;
2679 }
2680 }
2681
2682 return (ret);
2683 }
2684
2685 void
2686 nlm_cprsuspend(void)
2687 {
2688 struct nlm_globals *g;
2689
2690 rw_enter(&lm_lck, RW_READER);
2691 TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2692 nlm_suspend_zone(g);
2693
2694 rw_exit(&lm_lck);
2695 }
2696
2697 void
2698 nlm_cprresume(void)
2699 {
2700 struct nlm_globals *g;
2701
2702 rw_enter(&lm_lck, RW_READER);
2703 TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2704 nlm_resume_zone(g);
2705
2706 rw_exit(&lm_lck);
2707 }
2708
2709 static void
2710 nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm)
2711 {
2712 (void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0,
2713 NLM_RPC_RETRIES, kcred);
2714 }
2715
2716 static void
2717 nlm_netbuf_to_netobj(struct netbuf *addr, int *family, netobj *obj)
2718 {
2719 /* LINTED pointer alignment */
2720 struct sockaddr *sa = (struct sockaddr *)addr->buf;
2721
2722 *family = sa->sa_family;
2723
2724 switch (sa->sa_family) {
2725 case AF_INET: {
2726 /* LINTED pointer alignment */
2727 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2728
2729 obj->n_len = sizeof (sin->sin_addr);
2730 obj->n_bytes = (char *)&sin->sin_addr;
2731 break;
2732 }
2733
2734 case AF_INET6: {
2735 /* LINTED pointer alignment */
2736 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2737
2738 obj->n_len = sizeof (sin6->sin6_addr);
2739 obj->n_bytes = (char *)&sin6->sin6_addr;
2740 break;
2741 }
2742
2743 default:
2744 VERIFY(0);
2745 break;
2746 }
2747 }