1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy is of the CDDL is also available via the Internet
   9  * at http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  14  * Copyright (c) 2012 by Delphix. All rights reserved.
  15  */
  16 
  17 /*
  18  * NFS Lock Manager, server-side and common.
  19  *
  20  * This file contains all the external entry points of klmmod.
  21  * Basically, this is the "glue" to the BSD nlm code.
  22  */
  23 
  24 #include <sys/types.h>
  25 #include <sys/errno.h>
  26 #include <sys/modctl.h>
  27 #include <sys/flock.h>
  28 
  29 #include <nfs/nfs.h>
  30 #include <nfs/nfssys.h>
  31 #include <nfs/lm.h>
  32 #include <rpcsvc/nlm_prot.h>
  33 #include "nlm_impl.h"
  34 
  35 static struct modlmisc modlmisc = {
  36         &mod_miscops, "lock mgr common module"
  37 };
  38 
  39 static struct modlinkage modlinkage = {
  40         MODREV_1, &modlmisc, NULL
  41 };
  42 
  43 /*
  44  * Cluster node ID.  Zero unless we're part of a cluster.
  45  * Set by lm_set_nlmid_flk.  Pass to lm_set_nlm_status.
  46  * We're not yet doing "clustered" NLM stuff.
  47  */
  48 int lm_global_nlmid = 0;
  49 
  50 /*
  51  * Call-back hook for clusters: Set lock manager status.
  52  * If this hook is set, call this instead of the ususal
  53  * flk_set_lockmgr_status(FLK_LOCKMGR_UP / DOWN);
  54  */
  55 void (*lm_set_nlm_status)(int nlm_id, flk_nlm_status_t) = NULL;
  56 
  57 /*
  58  * Call-back hook for clusters: Delete all locks held by sysid.
  59  * Call from code that drops all client locks (for which we're
  60  * the server) i.e. after the SM tells us a client has crashed.
  61  */
  62 void (*lm_remove_file_locks)(int) = NULL;
  63 
  64 krwlock_t               lm_lck;
  65 zone_key_t              nlm_zone_key;
  66 
  67 /*
  68  * Init/fini per-zone stuff for klm
  69  */
  70 /* ARGSUSED */
  71 void *
  72 lm_zone_init(zoneid_t zoneid)
  73 {
  74         struct nlm_globals *g;
  75 
  76         g = kmem_zalloc(sizeof (*g), KM_SLEEP);
  77 
  78         avl_create(&g->nlm_hosts_tree, nlm_host_cmp,
  79             sizeof (struct nlm_host),
  80             offsetof(struct nlm_host, nh_by_addr));
  81 
  82         g->nlm_hosts_hash = mod_hash_create_idhash("nlm_host_by_sysid",
  83             64, mod_hash_null_valdtor);
  84 
  85         TAILQ_INIT(&g->nlm_idle_hosts);
  86         TAILQ_INIT(&g->nlm_slocks);
  87 
  88         mutex_init(&g->lock, NULL, MUTEX_DEFAULT, NULL);
  89         cv_init(&g->nlm_gc_sched_cv, NULL, CV_DEFAULT, NULL);
  90         cv_init(&g->nlm_gc_finish_cv, NULL, CV_DEFAULT, NULL);
  91         mutex_init(&g->clean_lock, NULL, MUTEX_DEFAULT, NULL);
  92 
  93         g->lockd_pid = 0;
  94         g->run_status = NLM_ST_DOWN;
  95 
  96         nlm_globals_register(g);
  97         return (g);
  98 }
  99 
 100 /* ARGSUSED */
 101 void
 102 lm_zone_fini(zoneid_t zoneid, void *data)
 103 {
 104         struct nlm_globals *g = data;
 105 
 106         ASSERT(avl_is_empty(&g->nlm_hosts_tree));
 107         avl_destroy(&g->nlm_hosts_tree);
 108         mod_hash_destroy_idhash(g->nlm_hosts_hash);
 109 
 110         ASSERT(g->nlm_gc_thread == NULL);
 111         mutex_destroy(&g->lock);
 112         cv_destroy(&g->nlm_gc_sched_cv);
 113         cv_destroy(&g->nlm_gc_finish_cv);
 114         mutex_destroy(&g->clean_lock);
 115 
 116         nlm_globals_unregister(g);
 117         kmem_free(g, sizeof (*g));
 118 }
 119 
 120 
 121 
 122 /*
 123  * ****************************************************************
 124  * module init, fini, info
 125  */
 126 int
 127 _init()
 128 {
 129         int retval;
 130 
 131         rw_init(&lm_lck, NULL, RW_DEFAULT, NULL);
 132         nlm_init();
 133 
 134         zone_key_create(&nlm_zone_key, lm_zone_init, NULL, lm_zone_fini);
 135         /* Per-zone lockmgr data.  See: os/flock.c */
 136         zone_key_create(&flock_zone_key, flk_zone_init, NULL, flk_zone_fini);
 137 
 138         retval = mod_install(&modlinkage);
 139         if (retval == 0)
 140                 return (0);
 141 
 142         /*
 143          * mod_install failed! undo above, reverse order
 144          */
 145 
 146         (void) zone_key_delete(flock_zone_key);
 147         flock_zone_key = ZONE_KEY_UNINITIALIZED;
 148         (void) zone_key_delete(nlm_zone_key);
 149         rw_destroy(&lm_lck);
 150 
 151         return (retval);
 152 }
 153 
 154 int
 155 _fini()
 156 {
 157         /* Don't unload. */
 158         return (EBUSY);
 159 }
 160 
 161 int
 162 _info(struct modinfo *modinfop)
 163 {
 164         return (mod_info(&modlinkage, modinfop));
 165 }
 166 
 167 
 168 
 169 /*
 170  * ****************************************************************
 171  * Stubs listed in modstubs.s
 172  */
 173 
 174 /*
 175  * klm system calls.  Start service on some endpoint.
 176  * Called by nfssys() LM_SVC, from lockd.
 177  */
 178 int
 179 lm_svc(struct lm_svc_args *args)
 180 {
 181         struct knetconfig knc;
 182         const char *netid;
 183         struct nlm_globals *g;
 184         struct file *fp = NULL;
 185         int err = 0;
 186 
 187         /* Get our "globals" */
 188         g = zone_getspecific(nlm_zone_key, curzone);
 189 
 190         /*
 191          * Check version of lockd calling.
 192          */
 193         if (args->version != LM_SVC_CUR_VERS) {
 194                 NLM_ERR("lm_svc: Version mismatch "
 195                     "(given 0x%x, expected 0x%x)\n",
 196                     args->version, LM_SVC_CUR_VERS);
 197                 return (EINVAL);
 198         }
 199 
 200         /*
 201          * Build knetconfig, checking arg values.
 202          * Also come up with the "netid" string.
 203          * (With some knowledge of /etc/netconfig)
 204          */
 205         bzero(&knc, sizeof (knc));
 206         switch (args->n_proto) {
 207         case LM_TCP:
 208                 knc.knc_semantics = NC_TPI_COTS_ORD;
 209                 knc.knc_proto = NC_TCP;
 210                 break;
 211         case LM_UDP:
 212                 knc.knc_semantics = NC_TPI_CLTS;
 213                 knc.knc_proto = NC_UDP;
 214                 break;
 215         default:
 216                 NLM_ERR("nlm_build_knetconfig: Unknown "
 217                     "lm_proto=0x%x\n", args->n_proto);
 218                 return (EINVAL);
 219         }
 220 
 221         switch (args->n_fmly) {
 222         case LM_INET:
 223                 knc.knc_protofmly = NC_INET;
 224                 break;
 225         case LM_INET6:
 226                 knc.knc_protofmly = NC_INET6;
 227                 break;
 228         case LM_LOOPBACK:
 229                 knc.knc_protofmly = NC_LOOPBACK;
 230                 /* Override what we set above. */
 231                 knc.knc_proto = NC_NOPROTO;
 232                 break;
 233         default:
 234                 NLM_ERR("nlm_build_knetconfig: Unknown "
 235                     "lm_fmly=0x%x\n", args->n_fmly);
 236                 return (EINVAL);
 237         }
 238 
 239         knc.knc_rdev = args->n_rdev;
 240         netid = nlm_knc_to_netid(&knc);
 241         if (!netid)
 242                 return (EINVAL);
 243 
 244         /*
 245          * Setup service on the passed transport.
 246          * NB: must releasef(fp) after this.
 247          */
 248         if ((fp = getf(args->fd)) == NULL)
 249                 return (EBADF);
 250 
 251         mutex_enter(&g->lock);
 252         /*
 253          * Don't try to start while still shutting down,
 254          * or lots of things will fail...
 255          */
 256         if (g->run_status == NLM_ST_STOPPING) {
 257                 err = EAGAIN;
 258                 goto out;
 259         }
 260 
 261         /*
 262          * There is no separate "initialize" sub-call for nfssys,
 263          * and we want to do some one-time work when the first
 264          * binding comes in from lockd.
 265          */
 266         if (g->run_status == NLM_ST_DOWN) {
 267                 g->run_status = NLM_ST_STARTING;
 268                 g->lockd_pid = curproc->p_pid;
 269 
 270                 /* Save the options. */
 271                 g->cn_idle_tmo = args->timout;
 272                 g->grace_period = args->grace;
 273                 g->retrans_tmo = args->retransmittimeout;
 274 
 275                 /* See nfs_sys.c (not yet per-zone) */
 276                 if (INGLOBALZONE(curproc)) {
 277                         rfs4_grace_period = args->grace;
 278                         rfs4_lease_time   = args->grace;
 279                 }
 280 
 281                 mutex_exit(&g->lock);
 282                 err = nlm_svc_starting(g, fp, netid, &knc);
 283                 mutex_enter(&g->lock);
 284         } else {
 285                 /*
 286                  * If KLM is not started and the very first endpoint lockd
 287                  * tries to add is not a loopback device, report an error.
 288                  */
 289                 if (g->run_status != NLM_ST_UP) {
 290                         err = ENOTACTIVE;
 291                         goto out;
 292                 }
 293                 if (g->lockd_pid != curproc->p_pid) {
 294                         /* Check if caller has the same PID lockd does */
 295                         err = EPERM;
 296                         goto out;
 297                 }
 298 
 299                 err = nlm_svc_add_ep(fp, netid, &knc);
 300         }
 301 
 302 out:
 303         mutex_exit(&g->lock);
 304         if (fp != NULL)
 305                 releasef(args->fd);
 306 
 307         return (err);
 308 }
 309 
 310 /*
 311  * klm system calls.  Kill the lock manager.
 312  * Called by nfssys() KILL_LOCKMGR,
 313  * liblm:lm_shutdown() <- unused?
 314  */
 315 int
 316 lm_shutdown(void)
 317 {
 318         struct nlm_globals *g;
 319         proc_t *p;
 320         pid_t pid;
 321 
 322         /* Get our "globals" */
 323         g = zone_getspecific(nlm_zone_key, curzone);
 324 
 325         mutex_enter(&g->lock);
 326         if (g->run_status != NLM_ST_UP) {
 327                 mutex_exit(&g->lock);
 328                 return (EBUSY);
 329         }
 330 
 331         g->run_status = NLM_ST_STOPPING;
 332         pid = g->lockd_pid;
 333         mutex_exit(&g->lock);
 334         nlm_svc_stopping(g);
 335 
 336         mutex_enter(&pidlock);
 337         p = prfind(pid);
 338         if (p != NULL)
 339                 psignal(p, SIGTERM);
 340 
 341         mutex_exit(&pidlock);
 342         return (0);
 343 }
 344 
 345 /*
 346  * Cleanup remote locks on FS un-export.
 347  *
 348  * NOTE: called from nfs_export.c:unexport()
 349  * right before the share is going to
 350  * be unexported.
 351  */
 352 void
 353 lm_unexport(struct exportinfo *exi)
 354 {
 355         nlm_unexport(exi);
 356 }
 357 
 358 /*
 359  * CPR suspend/resume hooks.
 360  * See:cpr_suspend, cpr_resume
 361  *
 362  * Before suspend, get current state from "statd" on
 363  * all remote systems for which we have locks.
 364  *
 365  * After resume, check with those systems again,
 366  * and either reclaim locks, or do SIGLOST.
 367  */
 368 void
 369 lm_cprsuspend(void)
 370 {
 371         nlm_cprsuspend();
 372 }
 373 
 374 void
 375 lm_cprresume(void)
 376 {
 377         nlm_cprresume();
 378 }
 379 
 380 /*
 381  * Add the nlm_id bits to the sysid (by ref).
 382  */
 383 void
 384 lm_set_nlmid_flk(int *new_sysid)
 385 {
 386         if (lm_global_nlmid != 0)
 387                 *new_sysid |= (lm_global_nlmid << BITS_IN_SYSID);
 388 }
 389 
 390 /*
 391  * It seems that closed source klmmod used
 392  * this function to release knetconfig stored
 393  * in mntinfo structure (see mntinfo's mi_klmconfig
 394  * field).
 395  * We store knetconfigs differently, thus we don't
 396  * need this function.
 397  */
 398 void
 399 lm_free_config(struct knetconfig *knc)
 400 {
 401         _NOTE(ARGUNUSED(knc));
 402 }
 403 
 404 /*
 405  * Called by NFS4 delegation code to check if there are any
 406  * NFSv2/v3 locks for the file, so it should not delegate.
 407  *
 408  * NOTE: called from NFSv4 code
 409  * (see nfs4_srv_deleg.c:rfs4_bgrant_delegation())
 410  */
 411 int
 412 lm_vp_active(const vnode_t *vp)
 413 {
 414         return (nlm_vp_active(vp));
 415 }
 416 
 417 /*
 418  * Find or create a "sysid" for given knc+addr.
 419  * name is optional.  Sets nc_changed if the
 420  * found knc_proto is different from passed.
 421  * Increments the reference count.
 422  *
 423  * Called internally, and in nfs4_find_sysid()
 424  */
 425 struct lm_sysid *
 426 lm_get_sysid(struct knetconfig *knc, struct netbuf *addr,
 427     char *name, bool_t *nc_changed)
 428 {
 429         struct nlm_globals *g;
 430         const char *netid;
 431         struct nlm_host *hostp;
 432 
 433         _NOTE(ARGUNUSED(nc_changed));
 434         netid = nlm_knc_to_netid(knc);
 435         if (netid == NULL)
 436                 return (NULL);
 437 
 438         g = zone_getspecific(nlm_zone_key, curzone);
 439 
 440         hostp = nlm_host_findcreate(g, name, netid, addr);
 441         if (hostp == NULL)
 442                 return (NULL);
 443 
 444         return ((struct lm_sysid *)hostp);
 445 }
 446 
 447 /*
 448  * Release a reference on a "sysid".
 449  */
 450 void
 451 lm_rel_sysid(struct lm_sysid *sysid)
 452 {
 453         struct nlm_globals *g;
 454 
 455         g = zone_getspecific(nlm_zone_key, curzone);
 456         nlm_host_release(g, (struct nlm_host *)sysid);
 457 }
 458 
 459 /*
 460  * Alloc/free a sysid_t (a unique number between
 461  * LM_SYSID and LM_SYSID_MAX).
 462  *
 463  * Used by NFSv4 rfs4_op_lockt and smbsrv/smb_fsop_frlock,
 464  * both to represent non-local locks outside of klm.
 465  *
 466  * NOTE: called from NFSv4 and SMBFS to allocate unique
 467  * sysid.
 468  */
 469 sysid_t
 470 lm_alloc_sysidt(void)
 471 {
 472         return (nlm_sysid_alloc());
 473 }
 474 
 475 void
 476 lm_free_sysidt(sysid_t sysid)
 477 {
 478         nlm_sysid_free(sysid);
 479 }
 480 
 481 /* Access private member lms->sysid */
 482 sysid_t
 483 lm_sysidt(struct lm_sysid *lms)
 484 {
 485         return (((struct nlm_host *)lms)->nh_sysid);
 486 }
 487 
 488 /*
 489  * Called by nfs_frlock to check lock constraints.
 490  * Return non-zero if the lock request is "safe", i.e.
 491  * the range is not mapped, not MANDLOCK, etc.
 492  *
 493  * NOTE: callde from NFSv3/NFSv2 frlock() functions to
 494  * determine whether it's safe to add new lock.
 495  */
 496 int
 497 lm_safelock(vnode_t *vp, const struct flock64 *fl, cred_t *cr)
 498 {
 499         return (nlm_safelock(vp, fl, cr));
 500 }
 501 
 502 /*
 503  * Called by nfs_lockcompletion to check whether it's "safe"
 504  * to map the file (and cache it's data).  Walks the list of
 505  * file locks looking for any that are not "whole file".
 506  *
 507  * NOTE: called from nfs_client.c:nfs_lockcompletion()
 508  */
 509 int
 510 lm_safemap(const vnode_t *vp)
 511 {
 512         return (nlm_safemap(vp));
 513 }
 514 
 515 /*
 516  * Called by nfs_map() for the MANDLOCK case.
 517  * Return non-zero if the file has any locks with a
 518  * blocked request (sleep).
 519  *
 520  * NOTE: called from NFSv3/NFSv2 map() functions in
 521  * order to determine whether it's safe to add new
 522  * mapping.
 523  */
 524 int
 525 lm_has_sleep(const vnode_t *vp)
 526 {
 527         return (nlm_has_sleep(vp));
 528 }
 529 
 530 /*
 531  * ****************************************************************
 532  * Stuff needed by klmops?
 533  */