illumos-gate Wdiff usr/src/uts/common/klm/nlm_impl.c

Print this page

4965 nlm null rpc returns RPC_TIMEDOUT with shorter timeout value

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/klm/nlm_impl.c
          +++ new/usr/src/uts/common/klm/nlm_impl.c

   1    1  /*
   2    2   * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
   3    3   * Authors: Doug Rabson <dfr@rabson.org>
   4    4   * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
   5    5   *
   6    6   * Redistribution and use in source and binary forms, with or without
   7    7   * modification, are permitted provided that the following conditions
   8    8   * are met:
   9    9   * 1. Redistributions of source code must retain the above copyright
  10   10   *    notice, this list of conditions and the following disclaimer.
  11   11   * 2. Redistributions in binary form must reproduce the above copyright
  12   12   *    notice, this list of conditions and the following disclaimer in the
  13   13   *    documentation and/or other materials provided with the distribution.
  14   14   *
  15   15   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16   16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17   17   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18   18   * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19   19   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20   20   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21   21   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22   22   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23   23   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24   24   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25   25   * SUCH DAMAGE.
  26   26   */
  27   27  
  28   28  /*
  29   29   * Copyright (c) 2012 by Delphix. All rights reserved.
  30   30   * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  31   31   */
  32   32  
  33   33  /*
  34   34   * NFS LockManager, start/stop, support functions, etc.
  35   35   * Most of the interesting code is here.
  36   36   *
  37   37   * Source code derived from FreeBSD nlm_prot_impl.c
  38   38   */
  39   39  
  40   40  #include <sys/param.h>
  41   41  #include <sys/systm.h>
  42   42  #include <sys/thread.h>
  43   43  #include <sys/fcntl.h>
  44   44  #include <sys/flock.h>
  45   45  #include <sys/mount.h>
  46   46  #include <sys/priv.h>
  47   47  #include <sys/proc.h>
  48   48  #include <sys/share.h>
  49   49  #include <sys/socket.h>
  50   50  #include <sys/syscall.h>
  51   51  #include <sys/syslog.h>
  52   52  #include <sys/systm.h>
  53   53  #include <sys/class.h>
  54   54  #include <sys/unistd.h>
  55   55  #include <sys/vnode.h>
  56   56  #include <sys/vfs.h>
  57   57  #include <sys/queue.h>
  58   58  #include <sys/bitmap.h>
  59   59  #include <sys/sdt.h>
  60   60  #include <netinet/in.h>
  61   61  
  62   62  #include <rpc/rpc.h>
  63   63  #include <rpc/xdr.h>
  64   64  #include <rpc/pmap_prot.h>
  65   65  #include <rpc/pmap_clnt.h>
  66   66  #include <rpc/rpcb_prot.h>
  67   67  
  68   68  #include <rpcsvc/nlm_prot.h>
  69   69  #include <rpcsvc/sm_inter.h>
  70   70  #include <rpcsvc/nsm_addr.h>
  71   71  
  72   72  #include <nfs/nfs.h>
  73   73  #include <nfs/nfs_clnt.h>
  74   74  #include <nfs/export.h>
  75   75  #include <nfs/rnode.h>
  76   76  #include <nfs/lm.h>
  77   77  
  78   78  #include "nlm_impl.h"
  79   79  
  80   80  struct nlm_knc {
  81   81          struct knetconfig       n_knc;
  82   82          const char              *n_netid;
  83   83  };
  84   84  
  85   85  /*
  86   86   * Number of attempts NLM tries to obtain RPC binding
  87   87   * of local statd.
  88   88   */
  89   89  #define NLM_NSM_RPCBIND_RETRIES 10
  90   90  
  91   91  /*
  92   92   * Timeout (in seconds) NLM waits before making another
  93   93   * attempt to obtain RPC binding of local statd.
  94   94   */
  95   95  #define NLM_NSM_RPCBIND_TIMEOUT 5
  96   96  
  97   97  /*
  98   98   * Total number of sysids in NLM sysid bitmap
  99   99   */
 100  100  #define NLM_BMAP_NITEMS (LM_SYSID_MAX + 1)
 101  101  
 102  102  /*
 103  103   * Number of ulong_t words in bitmap that is used
 104  104   * for allocation of sysid numbers.
 105  105   */
 106  106  #define NLM_BMAP_WORDS  (NLM_BMAP_NITEMS / BT_NBIPUL)
 107  107  
 108  108  /*
 109  109   * Given an integer x, the macro returns
 110  110   * -1 if x is negative,
 111  111   *  0 if x is zero
 112  112   *  1 if x is positive
 113  113   */
 114  114  #define SIGN(x) (((x) > 0) - ((x) < 0))
 115  115  
 116  116  #define ARRSIZE(arr)    (sizeof (arr) / sizeof ((arr)[0]))

↓ open down ↓

116 lines elided

↑ open up ↑

 117  117  #define NLM_KNCS        ARRSIZE(nlm_netconfigs)
 118  118  
 119  119  krwlock_t lm_lck;
 120  120  
 121  121  /*
 122  122   * Zero timeout for asynchronous NLM RPC operations
 123  123   */
 124  124  static const struct timeval nlm_rpctv_zero = { 0,  0 };
 125  125  
 126  126  /*
      127 + * Initial timeout for NLM NULL RPC
      128 + */
      129 +static volatile struct timeval nlm_nullrpc_wait = { 0, 200000 };
      130 +
      131 +/*
 127  132   * List of all Zone globals nlm_globals instences
 128  133   * linked together.
 129  134   */
 130  135  static struct nlm_globals_list nlm_zones_list; /* (g) */
 131  136  
 132  137  /*
 133  138   * NLM kmem caches
 134  139   */
 135  140  static struct kmem_cache *nlm_hosts_cache = NULL;
 136  141  static struct kmem_cache *nlm_vhold_cache = NULL;

 137  142  
 138  143  /*
 139  144   * A bitmap for allocation of new sysids.
 140  145   * Sysid is a unique number between LM_SYSID
 141  146   * and LM_SYSID_MAX. Sysid represents unique remote
 142  147   * host that does file locks on the given host.
 143  148   */
 144  149  static ulong_t  nlm_sysid_bmap[NLM_BMAP_WORDS]; /* (g) */
 145  150  static int      nlm_sysid_nidx;                 /* (g) */
 146  151  
 147  152  /*
 148  153   * RPC service registration for all transports
 149  154   */
 150  155  static SVC_CALLOUT nlm_svcs[] = {
 151  156          { NLM_PROG, 4, 4, nlm_prog_4 }, /* NLM4_VERS */
 152  157          { NLM_PROG, 1, 3, nlm_prog_3 }  /* NLM_VERS - NLM_VERSX */
 153  158  };
 154  159  
 155  160  static SVC_CALLOUT_TABLE nlm_sct = {
 156  161          ARRSIZE(nlm_svcs),
 157  162          FALSE,
 158  163          nlm_svcs
 159  164  };
 160  165  
 161  166  /*
 162  167   * Static table of all netid/knetconfig network
 163  168   * lock manager can work with. nlm_netconfigs table
 164  169   * is used when we need to get valid knetconfig by
 165  170   * netid and vice versa.
 166  171   *
 167  172   * Knetconfigs are activated either by the call from
 168  173   * user-space lockd daemon (server side) or by taking
 169  174   * knetconfig from NFS mountinfo (client side)
 170  175   */
 171  176  static struct nlm_knc nlm_netconfigs[] = { /* (g) */
 172  177          /* UDP */
 173  178          {
 174  179                  { NC_TPI_CLTS, NC_INET, NC_UDP, NODEV },
 175  180                  "udp",
 176  181          },
 177  182          /* TCP */
 178  183          {
 179  184                  { NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV },
 180  185                  "tcp",
 181  186          },
 182  187          /* UDP over IPv6 */
 183  188          {
 184  189                  { NC_TPI_CLTS, NC_INET6, NC_UDP, NODEV },
 185  190                  "udp6",
 186  191          },
 187  192          /* TCP over IPv6 */
 188  193          {
 189  194                  { NC_TPI_COTS_ORD, NC_INET6, NC_TCP, NODEV },
 190  195                  "tcp6",
 191  196          },
 192  197          /* ticlts (loopback over UDP) */
 193  198          {
 194  199                  { NC_TPI_CLTS, NC_LOOPBACK, NC_NOPROTO, NODEV },
 195  200                  "ticlts",
 196  201          },
 197  202          /* ticotsord (loopback over TCP) */
 198  203          {
 199  204                  { NC_TPI_COTS_ORD, NC_LOOPBACK, NC_NOPROTO, NODEV },
 200  205                  "ticotsord",
 201  206          },
 202  207  };
 203  208  
 204  209  /*
 205  210   * NLM misc. function
 206  211   */
 207  212  static void nlm_copy_netbuf(struct netbuf *, struct netbuf *);
 208  213  static int nlm_netbuf_addrs_cmp(struct netbuf *, struct netbuf *);
 209  214  static void nlm_kmem_reclaim(void *);
 210  215  static void nlm_pool_shutdown(void);
 211  216  static void nlm_suspend_zone(struct nlm_globals *);
 212  217  static void nlm_resume_zone(struct nlm_globals *);
 213  218  static void nlm_nsm_clnt_init(CLIENT *, struct nlm_nsm *);
 214  219  static void nlm_netbuf_to_netobj(struct netbuf *, int *, netobj *);
 215  220  
 216  221  /*
 217  222   * NLM thread functions
 218  223   */
 219  224  static void nlm_gc(struct nlm_globals *);
 220  225  static void nlm_reclaimer(struct nlm_host *);
 221  226  
 222  227  /*
 223  228   * NLM NSM functions
 224  229   */
 225  230  static int nlm_init_local_knc(struct knetconfig *);
 226  231  static int nlm_nsm_init_local(struct nlm_nsm *);
 227  232  static int nlm_nsm_init(struct nlm_nsm *, struct knetconfig *, struct netbuf *);
 228  233  static void nlm_nsm_fini(struct nlm_nsm *);
 229  234  static enum clnt_stat nlm_nsm_simu_crash(struct nlm_nsm *);
 230  235  static enum clnt_stat nlm_nsm_stat(struct nlm_nsm *, int32_t *);
 231  236  static enum clnt_stat nlm_nsm_mon(struct nlm_nsm *, char *, uint16_t);
 232  237  static enum clnt_stat nlm_nsm_unmon(struct nlm_nsm *, char *);
 233  238  
 234  239  /*
 235  240   * NLM host functions
 236  241   */
 237  242  static int nlm_host_ctor(void *, void *, int);
 238  243  static void nlm_host_dtor(void *, void *);
 239  244  static void nlm_host_destroy(struct nlm_host *);
 240  245  static struct nlm_host *nlm_host_create(char *, const char *,
 241  246      struct knetconfig *, struct netbuf *);
 242  247  static struct nlm_host *nlm_host_find_locked(struct nlm_globals *,
 243  248      const char *, struct netbuf *, avl_index_t *);
 244  249  static void nlm_host_unregister(struct nlm_globals *, struct nlm_host *);
 245  250  static void nlm_host_gc_vholds(struct nlm_host *);
 246  251  static bool_t nlm_host_has_srv_locks(struct nlm_host *);
 247  252  static bool_t nlm_host_has_cli_locks(struct nlm_host *);
 248  253  static bool_t nlm_host_has_locks(struct nlm_host *);
 249  254  
 250  255  /*
 251  256   * NLM vhold functions
 252  257   */
 253  258  static int nlm_vhold_ctor(void *, void *, int);
 254  259  static void nlm_vhold_dtor(void *, void *);
 255  260  static void nlm_vhold_destroy(struct nlm_host *,
 256  261      struct nlm_vhold *);
 257  262  static bool_t nlm_vhold_busy(struct nlm_host *, struct nlm_vhold *);
 258  263  static void nlm_vhold_clean(struct nlm_vhold *, int);
 259  264  
 260  265  /*
 261  266   * NLM client/server sleeping locks/share reservation functions
 262  267   */
 263  268  struct nlm_slreq *nlm_slreq_find_locked(struct nlm_host *,
 264  269      struct nlm_vhold *, struct flock64 *);
 265  270  static struct nlm_shres *nlm_shres_create_item(struct shrlock *, vnode_t *);
 266  271  static void nlm_shres_destroy_item(struct nlm_shres *);
 267  272  static bool_t nlm_shres_equal(struct shrlock *, struct shrlock *);
 268  273  
 269  274  /*
 270  275   * NLM initialization functions.
 271  276   */
 272  277  void
 273  278  nlm_init(void)
 274  279  {
 275  280          nlm_hosts_cache = kmem_cache_create("nlm_host_cache",
 276  281              sizeof (struct nlm_host), 0, nlm_host_ctor, nlm_host_dtor,
 277  282              nlm_kmem_reclaim, NULL, NULL, 0);
 278  283  
 279  284          nlm_vhold_cache = kmem_cache_create("nlm_vhold_cache",
 280  285              sizeof (struct nlm_vhold), 0, nlm_vhold_ctor, nlm_vhold_dtor,
 281  286              NULL, NULL, NULL, 0);
 282  287  
 283  288          nlm_rpc_init();
 284  289          TAILQ_INIT(&nlm_zones_list);
 285  290  
 286  291          /* initialize sysids bitmap */
 287  292          bzero(nlm_sysid_bmap, sizeof (nlm_sysid_bmap));
 288  293          nlm_sysid_nidx = 1;
 289  294  
 290  295          /*
 291  296           * Reserv the sysid #0, because it's associated
 292  297           * with local locks only. Don't let to allocate
 293  298           * it for remote locks.
 294  299           */
 295  300          BT_SET(nlm_sysid_bmap, 0);
 296  301  }
 297  302  
 298  303  void
 299  304  nlm_globals_register(struct nlm_globals *g)
 300  305  {
 301  306          rw_enter(&lm_lck, RW_WRITER);
 302  307          TAILQ_INSERT_TAIL(&nlm_zones_list, g, nlm_link);
 303  308          rw_exit(&lm_lck);
 304  309  }
 305  310  
 306  311  void
 307  312  nlm_globals_unregister(struct nlm_globals *g)
 308  313  {
 309  314          rw_enter(&lm_lck, RW_WRITER);
 310  315          TAILQ_REMOVE(&nlm_zones_list, g, nlm_link);
 311  316          rw_exit(&lm_lck);
 312  317  }
 313  318  
 314  319  /* ARGSUSED */
 315  320  static void
 316  321  nlm_kmem_reclaim(void *cdrarg)
 317  322  {
 318  323          struct nlm_globals *g;
 319  324  
 320  325          rw_enter(&lm_lck, RW_READER);
 321  326          TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
 322  327                  cv_broadcast(&g->nlm_gc_sched_cv);
 323  328  
 324  329          rw_exit(&lm_lck);
 325  330  }
 326  331  
 327  332  /*
 328  333   * NLM garbage collector thread (GC).
 329  334   *
 330  335   * NLM GC periodically checks whether there're any host objects
 331  336   * that can be cleaned up. It also releases stale vnodes that
 332  337   * live on the server side (under protection of vhold objects).
 333  338   *
 334  339   * NLM host objects are cleaned up from GC thread because
 335  340   * operations helping us to determine whether given host has
 336  341   * any locks can be quite expensive and it's not good to call
 337  342   * them every time the very last reference to the host is dropped.
 338  343   * Thus we use "lazy" approach for hosts cleanup.
 339  344   *
 340  345   * The work of GC is to release stale vnodes on the server side
 341  346   * and destroy hosts that haven't any locks and any activity for
 342  347   * some time (i.e. idle hosts).
 343  348   */
 344  349  static void
 345  350  nlm_gc(struct nlm_globals *g)
 346  351  {
 347  352          struct nlm_host *hostp;
 348  353          clock_t now, idle_period;
 349  354  
 350  355          idle_period = SEC_TO_TICK(g->cn_idle_tmo);
 351  356          mutex_enter(&g->lock);
 352  357          for (;;) {
 353  358                  /*
 354  359                   * GC thread can be explicitly scheduled from
 355  360                   * memory reclamation function.
 356  361                   */
 357  362                  (void) cv_timedwait(&g->nlm_gc_sched_cv, &g->lock,
 358  363                      ddi_get_lbolt() + idle_period);
 359  364  
 360  365                  /*
 361  366                   * NLM is shutting down, time to die.
 362  367                   */
 363  368                  if (g->run_status == NLM_ST_STOPPING)
 364  369                          break;
 365  370  
 366  371                  now = ddi_get_lbolt();
 367  372                  DTRACE_PROBE2(gc__start, struct nlm_globals *, g,
 368  373                      clock_t, now);
 369  374  
 370  375                  /*
 371  376                   * Handle all hosts that are unused at the moment
 372  377                   * until we meet one with idle timeout in future.
 373  378                   */
 374  379                  while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
 375  380                          bool_t has_locks = FALSE;
 376  381  
 377  382                          if (hostp->nh_idle_timeout > now)
 378  383                                  break;
 379  384  
 380  385                          /*
 381  386                           * Drop global lock while doing expensive work
 382  387                           * on this host. We'll re-check any conditions
 383  388                           * that might change after retaking the global
 384  389                           * lock.
 385  390                           */
 386  391                          mutex_exit(&g->lock);
 387  392                          mutex_enter(&hostp->nh_lock);
 388  393  
 389  394                          /*
 390  395                           * nlm_globals lock was dropped earlier because
 391  396                           * garbage collecting of vholds and checking whether
 392  397                           * host has any locks/shares are expensive operations.
 393  398                           */
 394  399                          nlm_host_gc_vholds(hostp);
 395  400                          has_locks = nlm_host_has_locks(hostp);
 396  401  
 397  402                          mutex_exit(&hostp->nh_lock);
 398  403                          mutex_enter(&g->lock);
 399  404  
 400  405                          /*
 401  406                           * While we were doing expensive operations outside of
 402  407                           * nlm_globals critical section, somebody could
 403  408                           * take the host, add lock/share to one of its vnodes
 404  409                           * and release the host back. If so, host's idle timeout
 405  410                           * is renewed and our information about locks on the
 406  411                           * given host is outdated.
 407  412                           */
 408  413                          if (hostp->nh_idle_timeout > now)
 409  414                                  continue;
 410  415  
 411  416                          /*
 412  417                           * If either host has locks or somebody has began to
 413  418                           * use it while we were outside the nlm_globals critical
 414  419                           * section. In both cases we have to renew host's
 415  420                           * timeout and put it to the end of LRU list.
 416  421                           */
 417  422                          if (has_locks || hostp->nh_refs > 0) {
 418  423                                  TAILQ_REMOVE(&g->nlm_idle_hosts,
 419  424                                      hostp, nh_link);
 420  425                                  hostp->nh_idle_timeout = now + idle_period;
 421  426                                  TAILQ_INSERT_TAIL(&g->nlm_idle_hosts,
 422  427                                      hostp, nh_link);
 423  428                                  continue;
 424  429                          }
 425  430  
 426  431                          /*
 427  432                           * We're here if all the following conditions hold:
 428  433                           * 1) Host hasn't any locks or share reservations
 429  434                           * 2) Host is unused
 430  435                           * 3) Host wasn't touched by anyone at least for
 431  436                           *    g->cn_idle_tmo seconds.
 432  437                           *
 433  438                           * So, now we can destroy it.
 434  439                           */
 435  440                          nlm_host_unregister(g, hostp);
 436  441                          mutex_exit(&g->lock);
 437  442  
 438  443                          nlm_host_unmonitor(g, hostp);
 439  444                          nlm_host_destroy(hostp);
 440  445                          mutex_enter(&g->lock);
 441  446                          if (g->run_status == NLM_ST_STOPPING)
 442  447                                  break;
 443  448  
 444  449                  }
 445  450  
 446  451                  DTRACE_PROBE(gc__end);
 447  452          }
 448  453  
 449  454          DTRACE_PROBE1(gc__exit, struct nlm_globals *, g);
 450  455  
 451  456          /* Let others know that GC has died */
 452  457          g->nlm_gc_thread = NULL;
 453  458          mutex_exit(&g->lock);
 454  459  
 455  460          cv_broadcast(&g->nlm_gc_finish_cv);
 456  461          zthread_exit();
 457  462  }
 458  463  
 459  464  /*
 460  465   * Thread reclaim locks/shares acquired by the client side
 461  466   * on the given server represented by hostp.
 462  467   */
 463  468  static void
 464  469  nlm_reclaimer(struct nlm_host *hostp)
 465  470  {
 466  471          struct nlm_globals *g;
 467  472  
 468  473          mutex_enter(&hostp->nh_lock);
 469  474          hostp->nh_reclaimer = curthread;
 470  475          mutex_exit(&hostp->nh_lock);
 471  476  
 472  477          g = zone_getspecific(nlm_zone_key, curzone);
 473  478          nlm_reclaim_client(g, hostp);
 474  479  
 475  480          mutex_enter(&hostp->nh_lock);
 476  481          hostp->nh_flags &= ~NLM_NH_RECLAIM;
 477  482          hostp->nh_reclaimer = NULL;
 478  483          cv_broadcast(&hostp->nh_recl_cv);
 479  484          mutex_exit(&hostp->nh_lock);
 480  485  
 481  486          /*
 482  487           * Host was explicitly referenced before
 483  488           * nlm_reclaim() was called, release it
 484  489           * here.
 485  490           */
 486  491          nlm_host_release(g, hostp);
 487  492          zthread_exit();
 488  493  }
 489  494  
 490  495  /*
 491  496   * Copy a struct netobj.  (see xdr.h)
 492  497   */
 493  498  void
 494  499  nlm_copy_netobj(struct netobj *dst, struct netobj *src)
 495  500  {
 496  501          dst->n_len = src->n_len;
 497  502          dst->n_bytes = kmem_alloc(src->n_len, KM_SLEEP);
 498  503          bcopy(src->n_bytes, dst->n_bytes, src->n_len);
 499  504  }
 500  505  
 501  506  /*
 502  507   * An NLM specificw replacement for clnt_call().
 503  508   * nlm_clnt_call() is used by all RPC functions generated
 504  509   * from nlm_prot.x specification. The function is aware
 505  510   * about some pitfalls of NLM RPC procedures and has a logic
 506  511   * that handles them properly.
 507  512   */
 508  513  enum clnt_stat
 509  514  nlm_clnt_call(CLIENT *clnt, rpcproc_t procnum, xdrproc_t xdr_args,
 510  515      caddr_t argsp, xdrproc_t xdr_result, caddr_t resultp, struct timeval wait)
 511  516  {
 512  517          k_sigset_t oldmask;
 513  518          enum clnt_stat stat;
 514  519          bool_t sig_blocked = FALSE;
 515  520  
 516  521          /*
 517  522           * If NLM RPC procnum is one of the NLM _RES procedures
 518  523           * that are used to reply to asynchronous NLM RPC
 519  524           * (MSG calls), explicitly set RPC timeout to zero.

↓ open down ↓

383 lines elided

↑ open up ↑

 520  525           * Client doesn't send a reply to RES procedures, so
 521  526           * we don't need to wait anything.
 522  527           *
 523  528           * NOTE: we ignore NLM4_*_RES procnums because they are
 524  529           * equal to NLM_*_RES numbers.
 525  530           */
 526  531          if (procnum >= NLM_TEST_RES && procnum <= NLM_GRANTED_RES)
 527  532                  wait = nlm_rpctv_zero;
 528  533  
 529  534          /*
      535 +         * Default timeout value of 25 seconds can take
      536 +         * nlm_null_rpc() 150 seconds to return RPC_TIMEDOUT 
      537 +         * if it uses UDP and the destination port is 
      538 +         * unreachable.
      539 +         *
      540 +         * A shorter timeout value, e.g. 200 milliseconds,
      541 +         * will cause nlm_null_rpc() to time out after
      542 +         * 200 * (1 + 2 + 4 + 8 + 16 + 32) = 12.6 seconds
      543 +         * (with retries set to 5)
      544 +         */
      545 +        if (procnum == NLM_NULL)
      546 +                wait = nlm_nullrpc_wait;
      547 +
      548 +        /*
 530  549           * We need to block signals in case of NLM_CANCEL RPC
 531  550           * in order to prevent interruption of network RPC
 532  551           * calls.
 533  552           */
 534  553          if (procnum == NLM_CANCEL) {
 535  554                  k_sigset_t newmask;
 536  555  
 537  556                  sigfillset(&newmask);
 538  557                  sigreplace(&newmask, &oldmask);
 539  558                  sig_blocked = TRUE;

 540  559          }
 541  560  
 542  561          stat = clnt_call(clnt, procnum, xdr_args,
 543  562              argsp, xdr_result, resultp, wait);
 544  563  
 545  564          /*
 546  565           * Restore signal mask back if signals were blocked
 547  566           */
 548  567          if (sig_blocked)
 549  568                  sigreplace(&oldmask, (k_sigset_t *)NULL);
 550  569  
 551  570          return (stat);
 552  571  }
 553  572  
 554  573  /*
 555  574   * Suspend NLM client/server in the given zone.
 556  575   *
 557  576   * During suspend operation we mark those hosts
 558  577   * that have any locks with NLM_NH_SUSPEND flags,
 559  578   * so that they can be checked later, when resume
 560  579   * operation occurs.
 561  580   */
 562  581  static void
 563  582  nlm_suspend_zone(struct nlm_globals *g)
 564  583  {
 565  584          struct nlm_host *hostp;
 566  585          struct nlm_host_list all_hosts;
 567  586  
 568  587          /*
 569  588           * Note that while we're doing suspend, GC thread is active
 570  589           * and it can destroy some hosts while we're walking through
 571  590           * the hosts tree. To prevent that and make suspend logic
 572  591           * a bit more simple we put all hosts to local "all_hosts"
 573  592           * list and increment reference counter of each host.
 574  593           * This guaranties that no hosts will be released while
 575  594           * we're doing suspend.
 576  595           * NOTE: reference of each host must be dropped during
 577  596           * resume operation.
 578  597           */
 579  598          TAILQ_INIT(&all_hosts);
 580  599          mutex_enter(&g->lock);
 581  600          for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
 582  601              hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
 583  602                  /*
 584  603                   * If host is idle, remove it from idle list and
 585  604                   * clear idle flag. That is done to prevent GC
 586  605                   * from touching this host.
 587  606                   */
 588  607                  if (hostp->nh_flags & NLM_NH_INIDLE) {
 589  608                          TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
 590  609                          hostp->nh_flags &= ~NLM_NH_INIDLE;
 591  610                  }
 592  611  
 593  612                  hostp->nh_refs++;
 594  613                  TAILQ_INSERT_TAIL(&all_hosts, hostp, nh_link);
 595  614          }
 596  615  
 597  616          /*
 598  617           * Now we can walk through all hosts on the system
 599  618           * with zone globals lock released. The fact the
 600  619           * we have taken a reference to each host guaranties
 601  620           * that no hosts can be destroyed during that process.
 602  621           */
 603  622          mutex_exit(&g->lock);
 604  623          while ((hostp = TAILQ_FIRST(&all_hosts)) != NULL) {
 605  624                  mutex_enter(&hostp->nh_lock);
 606  625                  if (nlm_host_has_locks(hostp))
 607  626                          hostp->nh_flags |= NLM_NH_SUSPEND;
 608  627  
 609  628                  mutex_exit(&hostp->nh_lock);
 610  629                  TAILQ_REMOVE(&all_hosts, hostp, nh_link);
 611  630          }
 612  631  }
 613  632  
 614  633  /*
 615  634   * Resume NLM hosts for the given zone.
 616  635   *
 617  636   * nlm_resume_zone() is called after hosts were suspended
 618  637   * (see nlm_suspend_zone) and its main purpose to check
 619  638   * whether remote locks owned by hosts are still in consistent
 620  639   * state. If they aren't, resume function tries to reclaim
 621  640   * reclaim locks (for client side hosts) and clean locks (for
 622  641   * server side hosts).
 623  642   */
 624  643  static void
 625  644  nlm_resume_zone(struct nlm_globals *g)
 626  645  {
 627  646          struct nlm_host *hostp, *h_next;
 628  647  
 629  648          mutex_enter(&g->lock);
 630  649          hostp = avl_first(&g->nlm_hosts_tree);
 631  650  
 632  651          /*
 633  652           * In nlm_suspend_zone() the reference counter of each
 634  653           * host was incremented, so we can safely iterate through
 635  654           * all hosts without worrying that any host we touch will
 636  655           * be removed at the moment.
 637  656           */
 638  657          while (hostp != NULL) {
 639  658                  struct nlm_nsm nsm;
 640  659                  enum clnt_stat stat;
 641  660                  int32_t sm_state;
 642  661                  int error;
 643  662                  bool_t resume_failed = FALSE;
 644  663  
 645  664                  h_next = AVL_NEXT(&g->nlm_hosts_tree, hostp);
 646  665                  mutex_exit(&g->lock);
 647  666  
 648  667                  DTRACE_PROBE1(resume__host, struct nlm_host *, hostp);
 649  668  
 650  669                  /*
 651  670                   * Suspend operation marked that the host doesn't
 652  671                   * have any locks. Skip it.
 653  672                   */
 654  673                  if (!(hostp->nh_flags & NLM_NH_SUSPEND))
 655  674                          goto cycle_end;
 656  675  
 657  676                  error = nlm_nsm_init(&nsm, &hostp->nh_knc, &hostp->nh_addr);
 658  677                  if (error != 0) {
 659  678                          NLM_ERR("Resume: Failed to contact to NSM of host %s "
 660  679                              "[error=%d]\n", hostp->nh_name, error);
 661  680                          resume_failed = TRUE;
 662  681                          goto cycle_end;
 663  682                  }
 664  683  
 665  684                  stat = nlm_nsm_stat(&nsm, &sm_state);
 666  685                  if (stat != RPC_SUCCESS) {
 667  686                          NLM_ERR("Resume: Failed to call SM_STAT operation for "
 668  687                              "host %s [stat=%d]\n", hostp->nh_name, stat);
 669  688                          resume_failed = TRUE;
 670  689                          nlm_nsm_fini(&nsm);
 671  690                          goto cycle_end;
 672  691                  }
 673  692  
 674  693                  if (sm_state != hostp->nh_state) {
 675  694                          /*
 676  695                           * Current SM state of the host isn't equal
 677  696                           * to the one host had when it was suspended.
 678  697                           * Probably it was rebooted. Try to reclaim
 679  698                           * locks if the host has any on its client side.
 680  699                           * Also try to clean up its server side locks
 681  700                           * (if the host has any).
 682  701                           */
 683  702                          nlm_host_notify_client(hostp, sm_state);
 684  703                          nlm_host_notify_server(hostp, sm_state);
 685  704                  }
 686  705  
 687  706                  nlm_nsm_fini(&nsm);
 688  707  
 689  708  cycle_end:
 690  709                  if (resume_failed) {
 691  710                          /*
 692  711                           * Resume failed for the given host.
 693  712                           * Just clean up all resources it owns.
 694  713                           */
 695  714                          nlm_host_notify_server(hostp, 0);
 696  715                          nlm_client_cancel_all(g, hostp);
 697  716                  }
 698  717  
 699  718                  hostp->nh_flags &= ~NLM_NH_SUSPEND;
 700  719                  nlm_host_release(g, hostp);
 701  720                  hostp = h_next;
 702  721                  mutex_enter(&g->lock);
 703  722          }
 704  723  
 705  724          mutex_exit(&g->lock);
 706  725  }
 707  726  
 708  727  /*
 709  728   * NLM functions responsible for operations on NSM handle.
 710  729   */
 711  730  
 712  731  /*
 713  732   * Initialize knetconfig that is used for communication
 714  733   * with local statd via loopback interface.
 715  734   */
 716  735  static int
 717  736  nlm_init_local_knc(struct knetconfig *knc)
 718  737  {
 719  738          int error;
 720  739          vnode_t *vp;
 721  740  
 722  741          bzero(knc, sizeof (*knc));
 723  742          error = lookupname("/dev/tcp", UIO_SYSSPACE,
 724  743              FOLLOW, NULLVPP, &vp);
 725  744          if (error != 0)
 726  745                  return (error);
 727  746  
 728  747          knc->knc_semantics = NC_TPI_COTS;
 729  748          knc->knc_protofmly = NC_INET;
 730  749          knc->knc_proto = NC_TCP;
 731  750          knc->knc_rdev = vp->v_rdev;
 732  751          VN_RELE(vp);
 733  752  
 734  753  
 735  754          return (0);
 736  755  }
 737  756  
 738  757  /*
 739  758   * Initialize NSM handle that will be used to talk
 740  759   * to local statd via loopback interface.
 741  760   */
 742  761  static int
 743  762  nlm_nsm_init_local(struct nlm_nsm *nsm)
 744  763  {
 745  764          int error;
 746  765          struct knetconfig knc;
 747  766          struct sockaddr_in sin;
 748  767          struct netbuf nb;
 749  768  
 750  769          error = nlm_init_local_knc(&knc);
 751  770          if (error != 0)
 752  771                  return (error);
 753  772  
 754  773          bzero(&sin, sizeof (sin));
 755  774          sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 756  775          sin.sin_family = AF_INET;
 757  776  
 758  777          nb.buf = (char *)&sin;
 759  778          nb.len = nb.maxlen = sizeof (sin);
 760  779  
 761  780          return (nlm_nsm_init(nsm, &knc, &nb));
 762  781  }
 763  782  
 764  783  /*
 765  784   * Initialize NSM handle used for talking to statd
 766  785   */
 767  786  static int
 768  787  nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb)
 769  788  {
 770  789          enum clnt_stat stat;
 771  790          int error, retries;
 772  791  
 773  792          bzero(nsm, sizeof (*nsm));
 774  793          nsm->ns_knc = *knc;
 775  794          nlm_copy_netbuf(&nsm->ns_addr, nb);
 776  795  
 777  796          /*
 778  797           * Try several times to get the port of statd service,
 779  798           * If rpcbind_getaddr returns  RPC_PROGNOTREGISTERED,
 780  799           * retry an attempt, but wait for NLM_NSM_RPCBIND_TIMEOUT
 781  800           * seconds berofore.
 782  801           */
 783  802          for (retries = 0; retries < NLM_NSM_RPCBIND_RETRIES; retries++) {
 784  803                  stat = rpcbind_getaddr(&nsm->ns_knc, SM_PROG,
 785  804                      SM_VERS, &nsm->ns_addr);
 786  805                  if (stat != RPC_SUCCESS) {
 787  806                          if (stat == RPC_PROGNOTREGISTERED) {
 788  807                                  delay(SEC_TO_TICK(NLM_NSM_RPCBIND_TIMEOUT));
 789  808                                  continue;
 790  809                          }
 791  810                  }
 792  811  
 793  812                  break;
 794  813          }
 795  814  
 796  815          if (stat != RPC_SUCCESS) {
 797  816                  DTRACE_PROBE2(rpcbind__error, enum clnt_stat, stat,
 798  817                      int, retries);
 799  818                  error = ENOENT;
 800  819                  goto error;
 801  820          }
 802  821  
 803  822          /*
 804  823           * Create an RPC handle that'll be used for communication with local
 805  824           * statd using the status monitor protocol.
 806  825           */
 807  826          error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS,
 808  827              0, NLM_RPC_RETRIES, kcred, &nsm->ns_handle);
 809  828          if (error != 0)
 810  829                  goto error;
 811  830  
 812  831          /*
 813  832           * Create an RPC handle that'll be used for communication with the
 814  833           * local statd using the address registration protocol.
 815  834           */
 816  835          error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM,
 817  836              NSM_ADDR_V1, 0, NLM_RPC_RETRIES, kcred, &nsm->ns_addr_handle);
 818  837          if (error != 0)
 819  838                  goto error;
 820  839  
 821  840          sema_init(&nsm->ns_sem, 1, NULL, SEMA_DEFAULT, NULL);
 822  841          return (0);
 823  842  
 824  843  error:
 825  844          kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
 826  845          if (nsm->ns_handle)
 827  846                  CLNT_DESTROY(nsm->ns_handle);
 828  847  
 829  848          return (error);
 830  849  }
 831  850  
 832  851  static void
 833  852  nlm_nsm_fini(struct nlm_nsm *nsm)
 834  853  {
 835  854          kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
 836  855          CLNT_DESTROY(nsm->ns_addr_handle);
 837  856          nsm->ns_addr_handle = NULL;
 838  857          CLNT_DESTROY(nsm->ns_handle);
 839  858          nsm->ns_handle = NULL;
 840  859          sema_destroy(&nsm->ns_sem);
 841  860  }
 842  861  
 843  862  static enum clnt_stat
 844  863  nlm_nsm_simu_crash(struct nlm_nsm *nsm)
 845  864  {
 846  865          enum clnt_stat stat;
 847  866  
 848  867          sema_p(&nsm->ns_sem);
 849  868          nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 850  869          stat = sm_simu_crash_1(NULL, NULL, nsm->ns_handle);
 851  870          sema_v(&nsm->ns_sem);
 852  871  
 853  872          return (stat);
 854  873  }
 855  874  
 856  875  static enum clnt_stat
 857  876  nlm_nsm_stat(struct nlm_nsm *nsm, int32_t *out_stat)
 858  877  {
 859  878          struct sm_name args;
 860  879          struct sm_stat_res res;
 861  880          enum clnt_stat stat;
 862  881  
 863  882          args.mon_name = uts_nodename();
 864  883          bzero(&res, sizeof (res));
 865  884  
 866  885          sema_p(&nsm->ns_sem);
 867  886          nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 868  887          stat = sm_stat_1(&args, &res, nsm->ns_handle);
 869  888          sema_v(&nsm->ns_sem);
 870  889  
 871  890          if (stat == RPC_SUCCESS)
 872  891                  *out_stat = res.state;
 873  892  
 874  893          return (stat);
 875  894  }
 876  895  
 877  896  static enum clnt_stat
 878  897  nlm_nsm_mon(struct nlm_nsm *nsm, char *hostname, uint16_t priv)
 879  898  {
 880  899          struct mon args;
 881  900          struct sm_stat_res res;
 882  901          enum clnt_stat stat;
 883  902  
 884  903          bzero(&args, sizeof (args));
 885  904          bzero(&res, sizeof (res));
 886  905  
 887  906          args.mon_id.mon_name = hostname;
 888  907          args.mon_id.my_id.my_name = uts_nodename();
 889  908          args.mon_id.my_id.my_prog = NLM_PROG;
 890  909          args.mon_id.my_id.my_vers = NLM_SM;
 891  910          args.mon_id.my_id.my_proc = NLM_SM_NOTIFY1;
 892  911          bcopy(&priv, args.priv, sizeof (priv));
 893  912  
 894  913          sema_p(&nsm->ns_sem);
 895  914          nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 896  915          stat = sm_mon_1(&args, &res, nsm->ns_handle);
 897  916          sema_v(&nsm->ns_sem);
 898  917  
 899  918          return (stat);
 900  919  }
 901  920  
 902  921  static enum clnt_stat
 903  922  nlm_nsm_unmon(struct nlm_nsm *nsm, char *hostname)
 904  923  {
 905  924          struct mon_id args;
 906  925          struct sm_stat res;
 907  926          enum clnt_stat stat;
 908  927  
 909  928          bzero(&args, sizeof (args));
 910  929          bzero(&res, sizeof (res));
 911  930  
 912  931          args.mon_name = hostname;
 913  932          args.my_id.my_name = uts_nodename();
 914  933          args.my_id.my_prog = NLM_PROG;
 915  934          args.my_id.my_vers = NLM_SM;
 916  935          args.my_id.my_proc = NLM_SM_NOTIFY1;
 917  936  
 918  937          sema_p(&nsm->ns_sem);
 919  938          nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 920  939          stat = sm_unmon_1(&args, &res, nsm->ns_handle);
 921  940          sema_v(&nsm->ns_sem);
 922  941  
 923  942          return (stat);
 924  943  }
 925  944  
 926  945  static enum clnt_stat
 927  946  nlm_nsmaddr_reg(struct nlm_nsm *nsm, char *name, int family, netobj *address)
 928  947  {
 929  948          struct reg1args args = { 0 };
 930  949          struct reg1res res = { 0 };
 931  950          enum clnt_stat stat;
 932  951  
 933  952          args.family = family;
 934  953          args.name = name;
 935  954          args.address = *address;
 936  955  
 937  956          sema_p(&nsm->ns_sem);
 938  957          nlm_nsm_clnt_init(nsm->ns_addr_handle, nsm);
 939  958          stat = nsmaddrproc1_reg_1(&args, &res, nsm->ns_addr_handle);
 940  959          sema_v(&nsm->ns_sem);
 941  960  
 942  961          return (stat);
 943  962  }
 944  963  
 945  964  /*
 946  965   * Get NLM vhold object corresponding to vnode "vp".
 947  966   * If no such object was found, create a new one.
 948  967   *
 949  968   * The purpose of this function is to associate vhold
 950  969   * object with given vnode, so that:
 951  970   * 1) vnode is hold (VN_HOLD) while vhold object is alive.
 952  971   * 2) host has a track of all vnodes it touched by lock
 953  972   *    or share operations. These vnodes are accessible
 954  973   *    via collection of vhold objects.
 955  974   */
 956  975  struct nlm_vhold *
 957  976  nlm_vhold_get(struct nlm_host *hostp, vnode_t *vp)
 958  977  {
 959  978          struct nlm_vhold *nvp, *new_nvp = NULL;
 960  979  
 961  980          mutex_enter(&hostp->nh_lock);
 962  981          nvp = nlm_vhold_find_locked(hostp, vp);
 963  982          if (nvp != NULL)
 964  983                  goto out;
 965  984  
 966  985          /* nlm_vhold wasn't found, then create a new one */
 967  986          mutex_exit(&hostp->nh_lock);
 968  987          new_nvp = kmem_cache_alloc(nlm_vhold_cache, KM_SLEEP);
 969  988  
 970  989          /*
 971  990           * Check if another thread has already
 972  991           * created the same nlm_vhold.
 973  992           */
 974  993          mutex_enter(&hostp->nh_lock);
 975  994          nvp = nlm_vhold_find_locked(hostp, vp);
 976  995          if (nvp == NULL) {
 977  996                  nvp = new_nvp;
 978  997                  new_nvp = NULL;
 979  998  
 980  999                  TAILQ_INIT(&nvp->nv_slreqs);
 981 1000                  nvp->nv_vp = vp;
 982 1001                  nvp->nv_refcnt = 1;
 983 1002                  VN_HOLD(nvp->nv_vp);
 984 1003  
 985 1004                  VERIFY(mod_hash_insert(hostp->nh_vholds_by_vp,
 986 1005                      (mod_hash_key_t)vp, (mod_hash_val_t)nvp) == 0);
 987 1006                  TAILQ_INSERT_TAIL(&hostp->nh_vholds_list, nvp, nv_link);
 988 1007          }
 989 1008  
 990 1009  out:
 991 1010          mutex_exit(&hostp->nh_lock);
 992 1011          if (new_nvp != NULL)
 993 1012                  kmem_cache_free(nlm_vhold_cache, new_nvp);
 994 1013  
 995 1014          return (nvp);
 996 1015  }
 997 1016  
 998 1017  /*
 999 1018   * Drop a reference to vhold object nvp.
1000 1019   */
1001 1020  void
1002 1021  nlm_vhold_release(struct nlm_host *hostp, struct nlm_vhold *nvp)
1003 1022  {
1004 1023          if (nvp == NULL)
1005 1024                  return;
1006 1025  
1007 1026          mutex_enter(&hostp->nh_lock);
1008 1027          ASSERT(nvp->nv_refcnt > 0);
1009 1028          nvp->nv_refcnt--;
1010 1029          mutex_exit(&hostp->nh_lock);
1011 1030  }
1012 1031  
1013 1032  /*
1014 1033   * Clean all locks and share reservations on the
1015 1034   * given vhold object that were acquired by the
1016 1035   * given sysid
1017 1036   */
1018 1037  static void
1019 1038  nlm_vhold_clean(struct nlm_vhold *nvp, int sysid)
1020 1039  {
1021 1040          cleanlocks(nvp->nv_vp, IGN_PID, sysid);
1022 1041          cleanshares_by_sysid(nvp->nv_vp, sysid);
1023 1042  }
1024 1043  
1025 1044  static void
1026 1045  nlm_vhold_destroy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1027 1046  {
1028 1047          ASSERT(MUTEX_HELD(&hostp->nh_lock));
1029 1048  
1030 1049          VERIFY(mod_hash_remove(hostp->nh_vholds_by_vp,
1031 1050              (mod_hash_key_t)nvp->nv_vp,
1032 1051              (mod_hash_val_t)&nvp) == 0);
1033 1052  
1034 1053          TAILQ_REMOVE(&hostp->nh_vholds_list, nvp, nv_link);
1035 1054          VN_RELE(nvp->nv_vp);
1036 1055          nvp->nv_vp = NULL;
1037 1056  
1038 1057          kmem_cache_free(nlm_vhold_cache, nvp);
1039 1058  }
1040 1059  
1041 1060  /*
1042 1061   * Return TRUE if the given vhold is busy.
1043 1062   * Vhold object is considered to be "busy" when
1044 1063   * all the following conditions hold:
1045 1064   * 1) No one uses it at the moment;
1046 1065   * 2) It hasn't any locks;
1047 1066   * 3) It hasn't any share reservations;
1048 1067   */
1049 1068  static bool_t
1050 1069  nlm_vhold_busy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1051 1070  {
1052 1071          vnode_t *vp;
1053 1072          int sysid;
1054 1073  
1055 1074          ASSERT(MUTEX_HELD(&hostp->nh_lock));
1056 1075  
1057 1076          if (nvp->nv_refcnt > 0)
1058 1077                  return (TRUE);
1059 1078  
1060 1079          vp = nvp->nv_vp;
1061 1080          sysid = hostp->nh_sysid;
1062 1081          if (flk_has_remote_locks_for_sysid(vp, sysid) ||
1063 1082              shr_has_remote_shares(vp, sysid))
1064 1083                  return (TRUE);
1065 1084  
1066 1085          return (FALSE);
1067 1086  }
1068 1087  
1069 1088  /* ARGSUSED */
1070 1089  static int
1071 1090  nlm_vhold_ctor(void *datap, void *cdrarg, int kmflags)
1072 1091  {
1073 1092          struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1074 1093  
1075 1094          bzero(nvp, sizeof (*nvp));
1076 1095          return (0);
1077 1096  }
1078 1097  
1079 1098  /* ARGSUSED */
1080 1099  static void
1081 1100  nlm_vhold_dtor(void *datap, void *cdrarg)
1082 1101  {
1083 1102          struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1084 1103  
1085 1104          ASSERT(nvp->nv_refcnt == 0);
1086 1105          ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1087 1106          ASSERT(nvp->nv_vp == NULL);
1088 1107  }
1089 1108  
1090 1109  struct nlm_vhold *
1091 1110  nlm_vhold_find_locked(struct nlm_host *hostp, const vnode_t *vp)
1092 1111  {
1093 1112          struct nlm_vhold *nvp = NULL;
1094 1113  
1095 1114          ASSERT(MUTEX_HELD(&hostp->nh_lock));
1096 1115          (void) mod_hash_find(hostp->nh_vholds_by_vp,
1097 1116              (mod_hash_key_t)vp,
1098 1117              (mod_hash_val_t)&nvp);
1099 1118  
1100 1119          if (nvp != NULL)
1101 1120                  nvp->nv_refcnt++;
1102 1121  
1103 1122          return (nvp);
1104 1123  }
1105 1124  
1106 1125  /*
1107 1126   * NLM host functions
1108 1127   */
1109 1128  static void
1110 1129  nlm_copy_netbuf(struct netbuf *dst, struct netbuf *src)
1111 1130  {
1112 1131          ASSERT(src->len <= src->maxlen);
1113 1132  
1114 1133          dst->maxlen = src->maxlen;
1115 1134          dst->len = src->len;
1116 1135          dst->buf = kmem_zalloc(src->maxlen, KM_SLEEP);
1117 1136          bcopy(src->buf, dst->buf, src->len);
1118 1137  }
1119 1138  
1120 1139  /* ARGSUSED */
1121 1140  static int
1122 1141  nlm_host_ctor(void *datap, void *cdrarg, int kmflags)
1123 1142  {
1124 1143          struct nlm_host *hostp = (struct nlm_host *)datap;
1125 1144  
1126 1145          bzero(hostp, sizeof (*hostp));
1127 1146          return (0);
1128 1147  }
1129 1148  
1130 1149  /* ARGSUSED */
1131 1150  static void
1132 1151  nlm_host_dtor(void *datap, void *cdrarg)
1133 1152  {
1134 1153          struct nlm_host *hostp = (struct nlm_host *)datap;
1135 1154          ASSERT(hostp->nh_refs == 0);
1136 1155  }
1137 1156  
1138 1157  static void
1139 1158  nlm_host_unregister(struct nlm_globals *g, struct nlm_host *hostp)
1140 1159  {
1141 1160          ASSERT(hostp->nh_refs == 0);
1142 1161  
1143 1162          avl_remove(&g->nlm_hosts_tree, hostp);
1144 1163          VERIFY(mod_hash_remove(g->nlm_hosts_hash,
1145 1164              (mod_hash_key_t)(uintptr_t)hostp->nh_sysid,
1146 1165              (mod_hash_val_t)&hostp) == 0);
1147 1166          TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1148 1167          hostp->nh_flags &= ~NLM_NH_INIDLE;
1149 1168  }
1150 1169  
1151 1170  /*
1152 1171   * Free resources used by a host. This is called after the reference
1153 1172   * count has reached zero so it doesn't need to worry about locks.
1154 1173   */
1155 1174  static void
1156 1175  nlm_host_destroy(struct nlm_host *hostp)
1157 1176  {
1158 1177          ASSERT(hostp->nh_name != NULL);
1159 1178          ASSERT(hostp->nh_netid != NULL);
1160 1179          ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1161 1180  
1162 1181          strfree(hostp->nh_name);
1163 1182          strfree(hostp->nh_netid);
1164 1183          kmem_free(hostp->nh_addr.buf, hostp->nh_addr.maxlen);
1165 1184  
1166 1185          if (hostp->nh_sysid != LM_NOSYSID)
1167 1186                  nlm_sysid_free(hostp->nh_sysid);
1168 1187  
1169 1188          nlm_rpc_cache_destroy(hostp);
1170 1189  
1171 1190          ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1172 1191          mod_hash_destroy_ptrhash(hostp->nh_vholds_by_vp);
1173 1192  
1174 1193          mutex_destroy(&hostp->nh_lock);
1175 1194          cv_destroy(&hostp->nh_rpcb_cv);
1176 1195          cv_destroy(&hostp->nh_recl_cv);
1177 1196  
1178 1197          kmem_cache_free(nlm_hosts_cache, hostp);
1179 1198  }
1180 1199  
1181 1200  /*
1182 1201   * Cleanup SERVER-side state after a client restarts,
1183 1202   * or becomes unresponsive, or whatever.
1184 1203   *
1185 1204   * We unlock any active locks owned by the host.
1186 1205   * When rpc.lockd is shutting down,
1187 1206   * this function is called with newstate set to zero
1188 1207   * which allows us to cancel any pending async locks
1189 1208   * and clear the locking state.
1190 1209   *
1191 1210   * When "state" is 0, we don't update host's state,
1192 1211   * but cleanup all remote locks on the host.
1193 1212   * It's useful to call this function for resources
1194 1213   * cleanup.
1195 1214   */
1196 1215  void
1197 1216  nlm_host_notify_server(struct nlm_host *hostp, int32_t state)
1198 1217  {
1199 1218          struct nlm_vhold *nvp;
1200 1219          struct nlm_slreq *slr;
1201 1220          struct nlm_slreq_list slreqs2free;
1202 1221  
1203 1222          TAILQ_INIT(&slreqs2free);
1204 1223          mutex_enter(&hostp->nh_lock);
1205 1224          if (state != 0)
1206 1225                  hostp->nh_state = state;
1207 1226  
1208 1227          TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
1209 1228  
1210 1229                  /* cleanup sleeping requests at first */
1211 1230                  while ((slr = TAILQ_FIRST(&nvp->nv_slreqs)) != NULL) {
1212 1231                          TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
1213 1232  
1214 1233                          /*
1215 1234                           * Instead of freeing cancelled sleeping request
1216 1235                           * here, we add it to the linked list created
1217 1236                           * on the stack in order to do all frees outside
1218 1237                           * the critical section.
1219 1238                           */
1220 1239                          TAILQ_INSERT_TAIL(&slreqs2free, slr, nsr_link);
1221 1240                  }
1222 1241  
1223 1242                  nvp->nv_refcnt++;
1224 1243                  mutex_exit(&hostp->nh_lock);
1225 1244  
1226 1245                  nlm_vhold_clean(nvp, hostp->nh_sysid);
1227 1246  
1228 1247                  mutex_enter(&hostp->nh_lock);
1229 1248                  nvp->nv_refcnt--;
1230 1249          }
1231 1250  
1232 1251          mutex_exit(&hostp->nh_lock);
1233 1252          while ((slr = TAILQ_FIRST(&slreqs2free)) != NULL) {
1234 1253                  TAILQ_REMOVE(&slreqs2free, slr, nsr_link);
1235 1254                  kmem_free(slr, sizeof (*slr));
1236 1255          }
1237 1256  }
1238 1257  
1239 1258  /*
1240 1259   * Cleanup CLIENT-side state after a server restarts,
1241 1260   * or becomes unresponsive, or whatever.
1242 1261   *
1243 1262   * This is called by the local NFS statd when we receive a
1244 1263   * host state change notification.  (also nlm_svc_stopping)
1245 1264   *
1246 1265   * Deal with a server restart.  If we are stopping the
1247 1266   * NLM service, we'll have newstate == 0, and will just
1248 1267   * cancel all our client-side lock requests.  Otherwise,
1249 1268   * start the "recovery" process to reclaim any locks
1250 1269   * we hold on this server.
1251 1270   */
1252 1271  void
1253 1272  nlm_host_notify_client(struct nlm_host *hostp, int32_t state)
1254 1273  {
1255 1274          mutex_enter(&hostp->nh_lock);
1256 1275          hostp->nh_state = state;
1257 1276          if (hostp->nh_flags & NLM_NH_RECLAIM) {
1258 1277                  /*
1259 1278                   * Either host's state is up to date or
1260 1279                   * host is already in recovery.
1261 1280                   */
1262 1281                  mutex_exit(&hostp->nh_lock);
1263 1282                  return;
1264 1283          }
1265 1284  
1266 1285          hostp->nh_flags |= NLM_NH_RECLAIM;
1267 1286  
1268 1287          /*
1269 1288           * Host will be released by the recovery thread,
1270 1289           * thus we need to increment refcount.
1271 1290           */
1272 1291          hostp->nh_refs++;
1273 1292          mutex_exit(&hostp->nh_lock);
1274 1293  
1275 1294          (void) zthread_create(NULL, 0, nlm_reclaimer,
1276 1295              hostp, 0, minclsyspri);
1277 1296  }
1278 1297  
1279 1298  /*
1280 1299   * The function is called when NLM client detects that
1281 1300   * server has entered in grace period and client needs
1282 1301   * to wait until reclamation process (if any) does
1283 1302   * its job.
1284 1303   */
1285 1304  int
1286 1305  nlm_host_wait_grace(struct nlm_host *hostp)
1287 1306  {
1288 1307          struct nlm_globals *g;
1289 1308          int error = 0;
1290 1309  
1291 1310          g = zone_getspecific(nlm_zone_key, curzone);
1292 1311          mutex_enter(&hostp->nh_lock);
1293 1312  
1294 1313          do {
1295 1314                  int rc;
1296 1315  
1297 1316                  rc = cv_timedwait_sig(&hostp->nh_recl_cv,
1298 1317                      &hostp->nh_lock, ddi_get_lbolt() +
1299 1318                      SEC_TO_TICK(g->retrans_tmo));
1300 1319  
1301 1320                  if (rc == 0) {
1302 1321                          error = EINTR;
1303 1322                          break;
1304 1323                  }
1305 1324          } while (hostp->nh_flags & NLM_NH_RECLAIM);
1306 1325  
1307 1326          mutex_exit(&hostp->nh_lock);
1308 1327          return (error);
1309 1328  }
1310 1329  
1311 1330  /*
1312 1331   * Create a new NLM host.
1313 1332   *
1314 1333   * NOTE: The in-kernel RPC (kRPC) subsystem uses TLI/XTI,
1315 1334   * which needs both a knetconfig and an address when creating
1316 1335   * endpoints. Thus host object stores both knetconfig and
1317 1336   * netid.
1318 1337   */
1319 1338  static struct nlm_host *
1320 1339  nlm_host_create(char *name, const char *netid,
1321 1340      struct knetconfig *knc, struct netbuf *naddr)
1322 1341  {
1323 1342          struct nlm_host *host;
1324 1343  
1325 1344          host = kmem_cache_alloc(nlm_hosts_cache, KM_SLEEP);
1326 1345  
1327 1346          mutex_init(&host->nh_lock, NULL, MUTEX_DEFAULT, NULL);
1328 1347          cv_init(&host->nh_rpcb_cv, NULL, CV_DEFAULT, NULL);
1329 1348          cv_init(&host->nh_recl_cv, NULL, CV_DEFAULT, NULL);
1330 1349  
1331 1350          host->nh_sysid = LM_NOSYSID;
1332 1351          host->nh_refs = 1;
1333 1352          host->nh_name = strdup(name);
1334 1353          host->nh_netid = strdup(netid);
1335 1354          host->nh_knc = *knc;
1336 1355          nlm_copy_netbuf(&host->nh_addr, naddr);
1337 1356  
1338 1357          host->nh_state = 0;
1339 1358          host->nh_rpcb_state = NRPCB_NEED_UPDATE;
1340 1359          host->nh_flags = 0;
1341 1360  
1342 1361          host->nh_vholds_by_vp = mod_hash_create_ptrhash("nlm vholds hash",
1343 1362              32, mod_hash_null_valdtor, sizeof (vnode_t));
1344 1363  
1345 1364          TAILQ_INIT(&host->nh_vholds_list);
1346 1365          TAILQ_INIT(&host->nh_rpchc);
1347 1366  
1348 1367          return (host);
1349 1368  }
1350 1369  
1351 1370  /*
1352 1371   * Cancel all client side sleeping locks owned by given host.
1353 1372   */
1354 1373  void
1355 1374  nlm_host_cancel_slocks(struct nlm_globals *g, struct nlm_host *hostp)
1356 1375  {
1357 1376          struct nlm_slock *nslp;
1358 1377  
1359 1378          mutex_enter(&g->lock);
1360 1379          TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
1361 1380                  if (nslp->nsl_host == hostp) {
1362 1381                          nslp->nsl_state = NLM_SL_CANCELLED;
1363 1382                          cv_broadcast(&nslp->nsl_cond);
1364 1383                  }
1365 1384          }
1366 1385  
1367 1386          mutex_exit(&g->lock);
1368 1387  }
1369 1388  
1370 1389  /*
1371 1390   * Garbage collect stale vhold objects.
1372 1391   *
1373 1392   * In other words check whether vnodes that are
1374 1393   * held by vhold objects still have any locks
1375 1394   * or shares or still in use. If they aren't,
1376 1395   * just destroy them.
1377 1396   */
1378 1397  static void
1379 1398  nlm_host_gc_vholds(struct nlm_host *hostp)
1380 1399  {
1381 1400          struct nlm_vhold *nvp;
1382 1401  
1383 1402          ASSERT(MUTEX_HELD(&hostp->nh_lock));
1384 1403  
1385 1404          nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
1386 1405          while (nvp != NULL) {
1387 1406                  struct nlm_vhold *nvp_tmp;
1388 1407  
1389 1408                  if (nlm_vhold_busy(hostp, nvp)) {
1390 1409                          nvp = TAILQ_NEXT(nvp, nv_link);
1391 1410                          continue;
1392 1411                  }
1393 1412  
1394 1413                  nvp_tmp = TAILQ_NEXT(nvp, nv_link);
1395 1414                  nlm_vhold_destroy(hostp, nvp);
1396 1415                  nvp = nvp_tmp;
1397 1416          }
1398 1417  }
1399 1418  
1400 1419  /*
1401 1420   * Check whether the given host has any
1402 1421   * server side locks or share reservations.
1403 1422   */
1404 1423  static bool_t
1405 1424  nlm_host_has_srv_locks(struct nlm_host *hostp)
1406 1425  {
1407 1426          /*
1408 1427           * It's cheap and simple: if server has
1409 1428           * any locks/shares there must be vhold
1410 1429           * object storing the affected vnode.
1411 1430           *
1412 1431           * NOTE: We don't need to check sleeping
1413 1432           * locks on the server side, because if
1414 1433           * server side sleeping lock is alive,
1415 1434           * there must be a vhold object corresponding
1416 1435           * to target vnode.
1417 1436           */
1418 1437          ASSERT(MUTEX_HELD(&hostp->nh_lock));
1419 1438          if (!TAILQ_EMPTY(&hostp->nh_vholds_list))
1420 1439                  return (TRUE);
1421 1440  
1422 1441          return (FALSE);
1423 1442  }
1424 1443  
1425 1444  /*
1426 1445   * Check whether the given host has any client side
1427 1446   * locks or share reservations.
1428 1447   */
1429 1448  static bool_t
1430 1449  nlm_host_has_cli_locks(struct nlm_host *hostp)
1431 1450  {
1432 1451          ASSERT(MUTEX_HELD(&hostp->nh_lock));
1433 1452  
1434 1453          /*
1435 1454           * XXX: It's not the way I'd like to do the check,
1436 1455           * because flk_sysid_has_locks() can be very
1437 1456           * expensive by design. Unfortunatelly it iterates
1438 1457           * through all locks on the system, doesn't matter
1439 1458           * were they made on remote system via NLM or
1440 1459           * on local system via reclock. To understand the
1441 1460           * problem, consider that there're dozens of thousands
1442 1461           * of locks that are made on some ZFS dataset. And there's
1443 1462           * another dataset shared by NFS where NLM client had locks
1444 1463           * some time ago, but doesn't have them now.
1445 1464           * In this case flk_sysid_has_locks() will iterate
1446 1465           * thrught dozens of thousands locks until it returns us
1447 1466           * FALSE.
1448 1467           * Oh, I hope that in shiny future somebody will make
1449 1468           * local lock manager (os/flock.c) better, so that
1450 1469           * it'd be more friedly to remote locks and
1451 1470           * flk_sysid_has_locks() wouldn't be so expensive.
1452 1471           */
1453 1472          if (flk_sysid_has_locks(hostp->nh_sysid |
1454 1473              LM_SYSID_CLIENT, FLK_QUERY_ACTIVE))
1455 1474                  return (TRUE);
1456 1475  
1457 1476          /*
1458 1477           * Check whether host has any share reservations
1459 1478           * registered on the client side.
1460 1479           */
1461 1480          if (hostp->nh_shrlist != NULL)
1462 1481                  return (TRUE);
1463 1482  
1464 1483          return (FALSE);
1465 1484  }
1466 1485  
1467 1486  /*
1468 1487   * Determine whether the given host owns any
1469 1488   * locks or share reservations.
1470 1489   */
1471 1490  static bool_t
1472 1491  nlm_host_has_locks(struct nlm_host *hostp)
1473 1492  {
1474 1493          if (nlm_host_has_srv_locks(hostp))
1475 1494                  return (TRUE);
1476 1495  
1477 1496          return (nlm_host_has_cli_locks(hostp));
1478 1497  }
1479 1498  
1480 1499  /*
1481 1500   * This function compares only addresses of two netbufs
1482 1501   * that belong to NC_TCP[6] or NC_UDP[6] protofamily.
1483 1502   * Port part of netbuf is ignored.
1484 1503   *
1485 1504   * Return values:
1486 1505   *  -1: nb1's address is "smaller" than nb2's
1487 1506   *   0: addresses are equal
1488 1507   *   1: nb1's address is "greater" than nb2's
1489 1508   */
1490 1509  static int
1491 1510  nlm_netbuf_addrs_cmp(struct netbuf *nb1, struct netbuf *nb2)
1492 1511  {
1493 1512          union nlm_addr {
1494 1513                  struct sockaddr sa;
1495 1514                  struct sockaddr_in sin;
1496 1515                  struct sockaddr_in6 sin6;
1497 1516          } *na1, *na2;
1498 1517          int res;
1499 1518  
1500 1519          /* LINTED E_BAD_PTR_CAST_ALIGN */
1501 1520          na1 = (union nlm_addr *)nb1->buf;
1502 1521          /* LINTED E_BAD_PTR_CAST_ALIGN */
1503 1522          na2 = (union nlm_addr *)nb2->buf;
1504 1523  
1505 1524          if (na1->sa.sa_family < na2->sa.sa_family)
1506 1525                  return (-1);
1507 1526          if (na1->sa.sa_family > na2->sa.sa_family)
1508 1527                  return (1);
1509 1528  
1510 1529          switch (na1->sa.sa_family) {
1511 1530          case AF_INET:
1512 1531                  res = memcmp(&na1->sin.sin_addr, &na2->sin.sin_addr,
1513 1532                      sizeof (na1->sin.sin_addr));
1514 1533                  break;
1515 1534          case AF_INET6:
1516 1535                  res = memcmp(&na1->sin6.sin6_addr, &na2->sin6.sin6_addr,
1517 1536                      sizeof (na1->sin6.sin6_addr));
1518 1537                  break;
1519 1538          default:
1520 1539                  VERIFY(0);
1521 1540                  return (0);
1522 1541          }
1523 1542  
1524 1543          return (SIGN(res));
1525 1544  }
1526 1545  
1527 1546  /*
1528 1547   * Compare two nlm hosts.
1529 1548   * Return values:
1530 1549   * -1: host1 is "smaller" than host2
1531 1550   *  0: host1 is equal to host2
1532 1551   *  1: host1 is "greater" than host2
1533 1552   */
1534 1553  int
1535 1554  nlm_host_cmp(const void *p1, const void *p2)
1536 1555  {
1537 1556          struct nlm_host *h1 = (struct nlm_host *)p1;
1538 1557          struct nlm_host *h2 = (struct nlm_host *)p2;
1539 1558          int res;
1540 1559  
1541 1560          res = strcmp(h1->nh_netid, h2->nh_netid);
1542 1561          if (res != 0)
1543 1562                  return (SIGN(res));
1544 1563  
1545 1564          res = nlm_netbuf_addrs_cmp(&h1->nh_addr, &h2->nh_addr);
1546 1565          return (res);
1547 1566  }
1548 1567  
1549 1568  /*
1550 1569   * Find the host specified by...  (see below)
1551 1570   * If found, increment the ref count.
1552 1571   */
1553 1572  static struct nlm_host *
1554 1573  nlm_host_find_locked(struct nlm_globals *g, const char *netid,
1555 1574      struct netbuf *naddr, avl_index_t *wherep)
1556 1575  {
1557 1576          struct nlm_host *hostp, key;
1558 1577          avl_index_t pos;
1559 1578  
1560 1579          ASSERT(MUTEX_HELD(&g->lock));
1561 1580  
1562 1581          key.nh_netid = (char *)netid;
1563 1582          key.nh_addr.buf = naddr->buf;
1564 1583          key.nh_addr.len = naddr->len;
1565 1584          key.nh_addr.maxlen = naddr->maxlen;
1566 1585  
1567 1586          hostp = avl_find(&g->nlm_hosts_tree, &key, &pos);
1568 1587  
1569 1588          if (hostp != NULL) {
1570 1589                  /*
1571 1590                   * Host is inuse now. Remove it from idle
1572 1591                   * hosts list if needed.
1573 1592                   */
1574 1593                  if (hostp->nh_flags & NLM_NH_INIDLE) {
1575 1594                          TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1576 1595                          hostp->nh_flags &= ~NLM_NH_INIDLE;
1577 1596                  }
1578 1597  
1579 1598                  hostp->nh_refs++;
1580 1599          }
1581 1600          if (wherep != NULL)
1582 1601                  *wherep = pos;
1583 1602  
1584 1603          return (hostp);
1585 1604  }
1586 1605  
1587 1606  /*
1588 1607   * Find NLM host for the given name and address.
1589 1608   */
1590 1609  struct nlm_host *
1591 1610  nlm_host_find(struct nlm_globals *g, const char *netid,
1592 1611      struct netbuf *addr)
1593 1612  {
1594 1613          struct nlm_host *hostp = NULL;
1595 1614  
1596 1615          mutex_enter(&g->lock);
1597 1616          if (g->run_status != NLM_ST_UP)
1598 1617                  goto out;
1599 1618  
1600 1619          hostp = nlm_host_find_locked(g, netid, addr, NULL);
1601 1620  
1602 1621  out:
1603 1622          mutex_exit(&g->lock);
1604 1623          return (hostp);
1605 1624  }
1606 1625  
1607 1626  
1608 1627  /*
1609 1628   * Find or create an NLM host for the given name and address.
1610 1629   *
1611 1630   * The remote host is determined by all of: name, netid, address.
1612 1631   * Note that the netid is whatever nlm_svc_add_ep() gave to
1613 1632   * svc_tli_kcreate() for the service binding.  If any of these
1614 1633   * are different, allocate a new host (new sysid).
1615 1634   */
1616 1635  struct nlm_host *
1617 1636  nlm_host_findcreate(struct nlm_globals *g, char *name,
1618 1637      const char *netid, struct netbuf *addr)
1619 1638  {
1620 1639          int err;
1621 1640          struct nlm_host *host, *newhost = NULL;
1622 1641          struct knetconfig knc;
1623 1642          avl_index_t where;
1624 1643  
1625 1644          mutex_enter(&g->lock);
1626 1645          if (g->run_status != NLM_ST_UP) {
1627 1646                  mutex_exit(&g->lock);
1628 1647                  return (NULL);
1629 1648          }
1630 1649  
1631 1650          host = nlm_host_find_locked(g, netid, addr, NULL);
1632 1651          mutex_exit(&g->lock);
1633 1652          if (host != NULL)
1634 1653                  return (host);
1635 1654  
1636 1655          err = nlm_knc_from_netid(netid, &knc);
1637 1656          if (err != 0)
1638 1657                  return (NULL);
1639 1658          /*
1640 1659           * Do allocations (etc.) outside of mutex,
1641 1660           * and then check again before inserting.
1642 1661           */
1643 1662          newhost = nlm_host_create(name, netid, &knc, addr);
1644 1663          newhost->nh_sysid = nlm_sysid_alloc();
1645 1664          if (newhost->nh_sysid == LM_NOSYSID)
1646 1665                  goto out;
1647 1666  
1648 1667          mutex_enter(&g->lock);
1649 1668          host = nlm_host_find_locked(g, netid, addr, &where);
1650 1669          if (host == NULL) {
1651 1670                  host = newhost;
1652 1671                  newhost = NULL;
1653 1672  
1654 1673                  /*
1655 1674                   * Insert host to the hosts AVL tree that is
1656 1675                   * used to lookup by <netid, address> pair.
1657 1676                   */
1658 1677                  avl_insert(&g->nlm_hosts_tree, host, where);
1659 1678  
1660 1679                  /*
1661 1680                   * Insert host to the hosts hash table that is
1662 1681                   * used to lookup host by sysid.
1663 1682                   */
1664 1683                  VERIFY(mod_hash_insert(g->nlm_hosts_hash,
1665 1684                      (mod_hash_key_t)(uintptr_t)host->nh_sysid,
1666 1685                      (mod_hash_val_t)host) == 0);
1667 1686          }
1668 1687  
1669 1688          mutex_exit(&g->lock);
1670 1689  
1671 1690  out:
1672 1691          if (newhost != NULL) {
1673 1692                  /*
1674 1693                   * We do not need the preallocated nlm_host
1675 1694                   * so decrement the reference counter
1676 1695                   * and destroy it.
1677 1696                   */
1678 1697                  newhost->nh_refs--;
1679 1698                  nlm_host_destroy(newhost);
1680 1699          }
1681 1700  
1682 1701          return (host);
1683 1702  }
1684 1703  
1685 1704  /*
1686 1705   * Find the NLM host that matches the value of 'sysid'.
1687 1706   * If found, return it with a new ref,
1688 1707   * else return NULL.
1689 1708   */
1690 1709  struct nlm_host *
1691 1710  nlm_host_find_by_sysid(struct nlm_globals *g, sysid_t sysid)
1692 1711  {
1693 1712          struct nlm_host *hostp = NULL;
1694 1713  
1695 1714          mutex_enter(&g->lock);
1696 1715          if (g->run_status != NLM_ST_UP)
1697 1716                  goto out;
1698 1717  
1699 1718          (void) mod_hash_find(g->nlm_hosts_hash,
1700 1719              (mod_hash_key_t)(uintptr_t)sysid,
1701 1720              (mod_hash_val_t)&hostp);
1702 1721  
1703 1722          if (hostp == NULL)
1704 1723                  goto out;
1705 1724  
1706 1725          /*
1707 1726           * Host is inuse now. Remove it
1708 1727           * from idle hosts list if needed.
1709 1728           */
1710 1729          if (hostp->nh_flags & NLM_NH_INIDLE) {
1711 1730                  TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1712 1731                  hostp->nh_flags &= ~NLM_NH_INIDLE;
1713 1732          }
1714 1733  
1715 1734          hostp->nh_refs++;
1716 1735  
1717 1736  out:
1718 1737          mutex_exit(&g->lock);
1719 1738          return (hostp);
1720 1739  }
1721 1740  
1722 1741  /*
1723 1742   * Release the given host.
1724 1743   * I.e. drop a reference that was taken earlier by one of
1725 1744   * the following functions: nlm_host_findcreate(), nlm_host_find(),
1726 1745   * nlm_host_find_by_sysid().
1727 1746   *
1728 1747   * When the very last reference is dropped, host is moved to
1729 1748   * so-called "idle state". All hosts that are in idle state
1730 1749   * have an idle timeout. If timeout is expired, GC thread
1731 1750   * checks whether hosts have any locks and if they heven't
1732 1751   * any, it removes them.
1733 1752   * NOTE: only unused hosts can be in idle state.
1734 1753   */
1735 1754  void
1736 1755  nlm_host_release(struct nlm_globals *g, struct nlm_host *hostp)
1737 1756  {
1738 1757          if (hostp == NULL)
1739 1758                  return;
1740 1759  
1741 1760          mutex_enter(&g->lock);
1742 1761          ASSERT(hostp->nh_refs > 0);
1743 1762  
1744 1763          hostp->nh_refs--;
1745 1764          if (hostp->nh_refs != 0) {
1746 1765                  mutex_exit(&g->lock);
1747 1766                  return;
1748 1767          }
1749 1768  
1750 1769          /*
1751 1770           * The very last reference to the host was dropped,
1752 1771           * thus host is unused now. Set its idle timeout
1753 1772           * and move it to the idle hosts LRU list.
1754 1773           */
1755 1774          hostp->nh_idle_timeout = ddi_get_lbolt() +
1756 1775              SEC_TO_TICK(g->cn_idle_tmo);
1757 1776  
1758 1777          ASSERT((hostp->nh_flags & NLM_NH_INIDLE) == 0);
1759 1778          TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, hostp, nh_link);
1760 1779          hostp->nh_flags |= NLM_NH_INIDLE;
1761 1780          mutex_exit(&g->lock);
1762 1781  }
1763 1782  
1764 1783  /*
1765 1784   * Unregister this NLM host (NFS client) with the local statd
1766 1785   * due to idleness (no locks held for a while).
1767 1786   */
1768 1787  void
1769 1788  nlm_host_unmonitor(struct nlm_globals *g, struct nlm_host *host)
1770 1789  {
1771 1790          enum clnt_stat stat;
1772 1791  
1773 1792          VERIFY(host->nh_refs == 0);
1774 1793          if (!(host->nh_flags & NLM_NH_MONITORED))
1775 1794                  return;
1776 1795  
1777 1796          host->nh_flags &= ~NLM_NH_MONITORED;
1778 1797          stat = nlm_nsm_unmon(&g->nlm_nsm, host->nh_name);
1779 1798          if (stat != RPC_SUCCESS) {
1780 1799                  NLM_WARN("NLM: Failed to contact statd, stat=%d\n", stat);
1781 1800                  return;
1782 1801          }
1783 1802  }
1784 1803  
1785 1804  /*
1786 1805   * Ask the local NFS statd to begin monitoring this host.
1787 1806   * It will call us back when that host restarts, using the
1788 1807   * prog,vers,proc specified below, i.e. NLM_SM_NOTIFY1,
1789 1808   * which is handled in nlm_do_notify1().
1790 1809   */
1791 1810  void
1792 1811  nlm_host_monitor(struct nlm_globals *g, struct nlm_host *host, int state)
1793 1812  {
1794 1813          int family;
1795 1814          netobj obj;
1796 1815          enum clnt_stat stat;
1797 1816  
1798 1817          if (state != 0 && host->nh_state == 0) {
1799 1818                  /*
1800 1819                   * This is the first time we have seen an NSM state
1801 1820                   * Value for this host. We record it here to help
1802 1821                   * detect host reboots.
1803 1822                   */
1804 1823                  host->nh_state = state;
1805 1824          }
1806 1825  
1807 1826          mutex_enter(&host->nh_lock);
1808 1827          if (host->nh_flags & NLM_NH_MONITORED) {
1809 1828                  mutex_exit(&host->nh_lock);
1810 1829                  return;
1811 1830          }
1812 1831  
1813 1832          host->nh_flags |= NLM_NH_MONITORED;
1814 1833          mutex_exit(&host->nh_lock);
1815 1834  
1816 1835          /*
1817 1836           * Before we begin monitoring the host register the network address
1818 1837           * associated with this hostname.
1819 1838           */
1820 1839          nlm_netbuf_to_netobj(&host->nh_addr, &family, &obj);
1821 1840          stat = nlm_nsmaddr_reg(&g->nlm_nsm, host->nh_name, family, &obj);
1822 1841          if (stat != RPC_SUCCESS) {
1823 1842                  NLM_WARN("Failed to register address, stat=%d\n", stat);
1824 1843                  mutex_enter(&g->lock);
1825 1844                  host->nh_flags &= ~NLM_NH_MONITORED;
1826 1845                  mutex_exit(&g->lock);
1827 1846  
1828 1847                  return;
1829 1848          }
1830 1849  
1831 1850          /*
1832 1851           * Tell statd how to call us with status updates for
1833 1852           * this host. Updates arrive via nlm_do_notify1().
1834 1853           *
1835 1854           * We put our assigned system ID value in the priv field to
1836 1855           * make it simpler to find the host if we are notified of a
1837 1856           * host restart.
1838 1857           */
1839 1858          stat = nlm_nsm_mon(&g->nlm_nsm, host->nh_name, host->nh_sysid);
1840 1859          if (stat != RPC_SUCCESS) {
1841 1860                  NLM_WARN("Failed to contact local NSM, stat=%d\n", stat);
1842 1861                  mutex_enter(&g->lock);
1843 1862                  host->nh_flags &= ~NLM_NH_MONITORED;
1844 1863                  mutex_exit(&g->lock);
1845 1864  
1846 1865                  return;
1847 1866          }
1848 1867  }
1849 1868  
1850 1869  int
1851 1870  nlm_host_get_state(struct nlm_host *hostp)
1852 1871  {
1853 1872  
1854 1873          return (hostp->nh_state);
1855 1874  }
1856 1875  
1857 1876  /*
1858 1877   * NLM client/server sleeping locks
1859 1878   */
1860 1879  
1861 1880  /*
1862 1881   * Register client side sleeping lock.
1863 1882   *
1864 1883   * Our client code calls this to keep information
1865 1884   * about sleeping lock somewhere. When it receives
1866 1885   * grant callback from server or when it just
1867 1886   * needs to remove all sleeping locks from vnode,
1868 1887   * it uses this information for remove/apply lock
1869 1888   * properly.
1870 1889   */
1871 1890  struct nlm_slock *
1872 1891  nlm_slock_register(
1873 1892          struct nlm_globals *g,
1874 1893          struct nlm_host *host,
1875 1894          struct nlm4_lock *lock,
1876 1895          struct vnode *vp)
1877 1896  {
1878 1897          struct nlm_slock *nslp;
1879 1898  
1880 1899          nslp = kmem_zalloc(sizeof (*nslp), KM_SLEEP);
1881 1900          cv_init(&nslp->nsl_cond, NULL, CV_DEFAULT, NULL);
1882 1901          nslp->nsl_lock = *lock;
1883 1902          nlm_copy_netobj(&nslp->nsl_fh, &nslp->nsl_lock.fh);
1884 1903          nslp->nsl_state = NLM_SL_BLOCKED;
1885 1904          nslp->nsl_host = host;
1886 1905          nslp->nsl_vp = vp;
1887 1906  
1888 1907          mutex_enter(&g->lock);
1889 1908          TAILQ_INSERT_TAIL(&g->nlm_slocks, nslp, nsl_link);
1890 1909          mutex_exit(&g->lock);
1891 1910  
1892 1911          return (nslp);
1893 1912  }
1894 1913  
1895 1914  /*
1896 1915   * Remove this lock from the wait list and destroy it.
1897 1916   */
1898 1917  void
1899 1918  nlm_slock_unregister(struct nlm_globals *g, struct nlm_slock *nslp)
1900 1919  {
1901 1920          mutex_enter(&g->lock);
1902 1921          TAILQ_REMOVE(&g->nlm_slocks, nslp, nsl_link);
1903 1922          mutex_exit(&g->lock);
1904 1923  
1905 1924          kmem_free(nslp->nsl_fh.n_bytes, nslp->nsl_fh.n_len);
1906 1925          cv_destroy(&nslp->nsl_cond);
1907 1926          kmem_free(nslp, sizeof (*nslp));
1908 1927  }
1909 1928  
1910 1929  /*
1911 1930   * Wait for a granted callback or cancellation event
1912 1931   * for a sleeping lock.
1913 1932   *
1914 1933   * If a signal interrupted the wait or if the lock
1915 1934   * was cancelled, return EINTR - the caller must arrange to send
1916 1935   * a cancellation to the server.
1917 1936   *
1918 1937   * If timeout occurred, return ETIMEDOUT - the caller must
1919 1938   * resend the lock request to the server.
1920 1939   *
1921 1940   * On success return 0.
1922 1941   */
1923 1942  int
1924 1943  nlm_slock_wait(struct nlm_globals *g,
1925 1944      struct nlm_slock *nslp, uint_t timeo_secs)
1926 1945  {
1927 1946          clock_t timeo_ticks;
1928 1947          int cv_res, error;
1929 1948  
1930 1949          /*
1931 1950           * If the granted message arrived before we got here,
1932 1951           * nslp->nsl_state will be NLM_SL_GRANTED - in that case don't sleep.
1933 1952           */
1934 1953          cv_res = 1;
1935 1954          timeo_ticks = ddi_get_lbolt() + SEC_TO_TICK(timeo_secs);
1936 1955  
1937 1956          mutex_enter(&g->lock);
1938 1957          while (nslp->nsl_state == NLM_SL_BLOCKED && cv_res > 0) {
1939 1958                  cv_res = cv_timedwait_sig(&nslp->nsl_cond,
1940 1959                      &g->lock, timeo_ticks);
1941 1960          }
1942 1961  
1943 1962          /*
1944 1963           * No matter why we wake up, if the lock was
1945 1964           * cancelled, let the function caller to know
1946 1965           * about it by returning EINTR.
1947 1966           */
1948 1967          if (nslp->nsl_state == NLM_SL_CANCELLED) {
1949 1968                  error = EINTR;
1950 1969                  goto out;
1951 1970          }
1952 1971  
1953 1972          if (cv_res <= 0) {
1954 1973                  /* We were woken up either by timeout or by interrupt */
1955 1974                  error = (cv_res < 0) ? ETIMEDOUT : EINTR;
1956 1975  
1957 1976                  /*
1958 1977                   * The granted message may arrive after the
1959 1978                   * interrupt/timeout but before we manage to lock the
1960 1979                   * mutex. Detect this by examining nslp.
1961 1980                   */
1962 1981                  if (nslp->nsl_state == NLM_SL_GRANTED)
1963 1982                          error = 0;
1964 1983          } else { /* Awaken via cv_signal()/cv_broadcast() or didn't block */
1965 1984                  error = 0;
1966 1985                  VERIFY(nslp->nsl_state == NLM_SL_GRANTED);
1967 1986          }
1968 1987  
1969 1988  out:
1970 1989          mutex_exit(&g->lock);
1971 1990          return (error);
1972 1991  }
1973 1992  
1974 1993  /*
1975 1994   * Mark client side sleeping lock as granted
1976 1995   * and wake up a process blocked on the lock.
1977 1996   * Called from server side NLM_GRANT handler.
1978 1997   *
1979 1998   * If sleeping lock is found return 0, otherwise
1980 1999   * return ENOENT.
1981 2000   */
1982 2001  int
1983 2002  nlm_slock_grant(struct nlm_globals *g,
1984 2003      struct nlm_host *hostp, struct nlm4_lock *alock)
1985 2004  {
1986 2005          struct nlm_slock *nslp;
1987 2006          int error = ENOENT;
1988 2007  
1989 2008          mutex_enter(&g->lock);
1990 2009          TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
1991 2010                  if ((nslp->nsl_state != NLM_SL_BLOCKED) ||
1992 2011                      (nslp->nsl_host != hostp))
1993 2012                          continue;
1994 2013  
1995 2014                  if (alock->svid         == nslp->nsl_lock.svid &&
1996 2015                      alock->l_offset     == nslp->nsl_lock.l_offset &&
1997 2016                      alock->l_len        == nslp->nsl_lock.l_len &&
1998 2017                      alock->fh.n_len     == nslp->nsl_lock.fh.n_len &&
1999 2018                      bcmp(alock->fh.n_bytes, nslp->nsl_lock.fh.n_bytes,
2000 2019                      nslp->nsl_lock.fh.n_len) == 0) {
2001 2020                          nslp->nsl_state = NLM_SL_GRANTED;
2002 2021                          cv_broadcast(&nslp->nsl_cond);
2003 2022                          error = 0;
2004 2023                          break;
2005 2024                  }
2006 2025          }
2007 2026  
2008 2027          mutex_exit(&g->lock);
2009 2028          return (error);
2010 2029  }
2011 2030  
2012 2031  /*
2013 2032   * Register sleeping lock request corresponding to
2014 2033   * flp on the given vhold object.
2015 2034   * On success function returns 0, otherwise (if
2016 2035   * lock request with the same flp is already
2017 2036   * registered) function returns EEXIST.
2018 2037   */
2019 2038  int
2020 2039  nlm_slreq_register(struct nlm_host *hostp, struct nlm_vhold *nvp,
2021 2040          struct flock64 *flp)
2022 2041  {
2023 2042          struct nlm_slreq *slr, *new_slr = NULL;
2024 2043          int ret = EEXIST;
2025 2044  
2026 2045          mutex_enter(&hostp->nh_lock);
2027 2046          slr = nlm_slreq_find_locked(hostp, nvp, flp);
2028 2047          if (slr != NULL)
2029 2048                  goto out;
2030 2049  
2031 2050          mutex_exit(&hostp->nh_lock);
2032 2051          new_slr = kmem_zalloc(sizeof (*slr), KM_SLEEP);
2033 2052          bcopy(flp, &new_slr->nsr_fl, sizeof (*flp));
2034 2053  
2035 2054          mutex_enter(&hostp->nh_lock);
2036 2055          slr = nlm_slreq_find_locked(hostp, nvp, flp);
2037 2056          if (slr == NULL) {
2038 2057                  slr = new_slr;
2039 2058                  new_slr = NULL;
2040 2059                  ret = 0;
2041 2060  
2042 2061                  TAILQ_INSERT_TAIL(&nvp->nv_slreqs, slr, nsr_link);
2043 2062          }
2044 2063  
2045 2064  out:
2046 2065          mutex_exit(&hostp->nh_lock);
2047 2066          if (new_slr != NULL)
2048 2067                  kmem_free(new_slr, sizeof (*new_slr));
2049 2068  
2050 2069          return (ret);
2051 2070  }
2052 2071  
2053 2072  /*
2054 2073   * Unregister sleeping lock request corresponding
2055 2074   * to flp from the given vhold object.
2056 2075   * On success function returns 0, otherwise (if
2057 2076   * lock request corresponding to flp isn't found
2058 2077   * on the given vhold) function returns ENOENT.
2059 2078   */
2060 2079  int
2061 2080  nlm_slreq_unregister(struct nlm_host *hostp, struct nlm_vhold *nvp,
2062 2081          struct flock64 *flp)
2063 2082  {
2064 2083          struct nlm_slreq *slr;
2065 2084  
2066 2085          mutex_enter(&hostp->nh_lock);
2067 2086          slr = nlm_slreq_find_locked(hostp, nvp, flp);
2068 2087          if (slr == NULL) {
2069 2088                  mutex_exit(&hostp->nh_lock);
2070 2089                  return (ENOENT);
2071 2090          }
2072 2091  
2073 2092          TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
2074 2093          mutex_exit(&hostp->nh_lock);
2075 2094  
2076 2095          kmem_free(slr, sizeof (*slr));
2077 2096          return (0);
2078 2097  }
2079 2098  
2080 2099  /*
2081 2100   * Find sleeping lock request on the given vhold object by flp.
2082 2101   */
2083 2102  struct nlm_slreq *
2084 2103  nlm_slreq_find_locked(struct nlm_host *hostp, struct nlm_vhold *nvp,
2085 2104      struct flock64 *flp)
2086 2105  {
2087 2106          struct nlm_slreq *slr = NULL;
2088 2107  
2089 2108          ASSERT(MUTEX_HELD(&hostp->nh_lock));
2090 2109          TAILQ_FOREACH(slr, &nvp->nv_slreqs, nsr_link) {
2091 2110                  if (slr->nsr_fl.l_start         == flp->l_start &&
2092 2111                      slr->nsr_fl.l_len           == flp->l_len   &&
2093 2112                      slr->nsr_fl.l_pid           == flp->l_pid   &&
2094 2113                      slr->nsr_fl.l_type          == flp->l_type)
2095 2114                          break;
2096 2115          }
2097 2116  
2098 2117          return (slr);
2099 2118  }
2100 2119  
2101 2120  /*
2102 2121   * NLM tracks active share reservations made on the client side.
2103 2122   * It needs to have a track of share reservations for two purposes
2104 2123   * 1) to determine if nlm_host is busy (if it has active locks and/or
2105 2124   *    share reservations, it is)
2106 2125   * 2) to recover active share reservations when NLM server reports
2107 2126   *    that it has rebooted.
2108 2127   *
2109 2128   * Unfortunately Illumos local share reservations manager (see os/share.c)
2110 2129   * doesn't have an ability to lookup all reservations on the system
2111 2130   * by sysid (like local lock manager) or get all reservations by sysid.
2112 2131   * It tracks reservations per vnode and is able to get/looup them
2113 2132   * on particular vnode. It's not what NLM needs. Thus it has that ugly
2114 2133   * share reservations tracking scheme.
2115 2134   */
2116 2135  
2117 2136  void
2118 2137  nlm_shres_track(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2119 2138  {
2120 2139          struct nlm_shres *nsp, *nsp_new;
2121 2140  
2122 2141          /*
2123 2142           * NFS code must fill the s_owner, so that
2124 2143           * s_own_len is never 0.
2125 2144           */
2126 2145          ASSERT(shrp->s_own_len > 0);
2127 2146          nsp_new = nlm_shres_create_item(shrp, vp);
2128 2147  
2129 2148          mutex_enter(&hostp->nh_lock);
2130 2149          for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next)
2131 2150                  if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr))
2132 2151                          break;
2133 2152  
2134 2153          if (nsp != NULL) {
2135 2154                  /*
2136 2155                   * Found a duplicate. Do nothing.
2137 2156                   */
2138 2157  
2139 2158                  goto out;
2140 2159          }
2141 2160  
2142 2161          nsp = nsp_new;
2143 2162          nsp_new = NULL;
2144 2163          nsp->ns_next = hostp->nh_shrlist;
2145 2164          hostp->nh_shrlist = nsp;
2146 2165  
2147 2166  out:
2148 2167          mutex_exit(&hostp->nh_lock);
2149 2168          if (nsp_new != NULL)
2150 2169                  nlm_shres_destroy_item(nsp_new);
2151 2170  }
2152 2171  
2153 2172  void
2154 2173  nlm_shres_untrack(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2155 2174  {
2156 2175          struct nlm_shres *nsp, *nsp_prev = NULL;
2157 2176  
2158 2177          mutex_enter(&hostp->nh_lock);
2159 2178          nsp = hostp->nh_shrlist;
2160 2179          while (nsp != NULL) {
2161 2180                  if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) {
2162 2181                          struct nlm_shres *nsp_del;
2163 2182  
2164 2183                          nsp_del = nsp;
2165 2184                          nsp = nsp->ns_next;
2166 2185                          if (nsp_prev != NULL)
2167 2186                                  nsp_prev->ns_next = nsp;
2168 2187                          else
2169 2188                                  hostp->nh_shrlist = nsp;
2170 2189  
2171 2190                          nlm_shres_destroy_item(nsp_del);
2172 2191                          continue;
2173 2192                  }
2174 2193  
2175 2194                  nsp_prev = nsp;
2176 2195                  nsp = nsp->ns_next;
2177 2196          }
2178 2197  
2179 2198          mutex_exit(&hostp->nh_lock);
2180 2199  }
2181 2200  
2182 2201  /*
2183 2202   * Get a _copy_ of the list of all active share reservations
2184 2203   * made by the given host.
2185 2204   * NOTE: the list function returns _must_ be released using
2186 2205   *       nlm_free_shrlist().
2187 2206   */
2188 2207  struct nlm_shres *
2189 2208  nlm_get_active_shres(struct nlm_host *hostp)
2190 2209  {
2191 2210          struct nlm_shres *nsp, *nslist = NULL;
2192 2211  
2193 2212          mutex_enter(&hostp->nh_lock);
2194 2213          for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) {
2195 2214                  struct nlm_shres *nsp_new;
2196 2215  
2197 2216                  nsp_new = nlm_shres_create_item(nsp->ns_shr, nsp->ns_vp);
2198 2217                  nsp_new->ns_next = nslist;
2199 2218                  nslist = nsp_new;
2200 2219          }
2201 2220  
2202 2221          mutex_exit(&hostp->nh_lock);
2203 2222          return (nslist);
2204 2223  }
2205 2224  
2206 2225  /*
2207 2226   * Free memory allocated for the active share reservations
2208 2227   * list created by nlm_get_active_shres() function.
2209 2228   */
2210 2229  void
2211 2230  nlm_free_shrlist(struct nlm_shres *nslist)
2212 2231  {
2213 2232          struct nlm_shres *nsp;
2214 2233  
2215 2234          while (nslist != NULL) {
2216 2235                  nsp =  nslist;
2217 2236                  nslist = nslist->ns_next;
2218 2237  
2219 2238                  nlm_shres_destroy_item(nsp);
2220 2239          }
2221 2240  }
2222 2241  
2223 2242  static bool_t
2224 2243  nlm_shres_equal(struct shrlock *shrp1, struct shrlock *shrp2)
2225 2244  {
2226 2245          if (shrp1->s_sysid      == shrp2->s_sysid       &&
2227 2246              shrp1->s_pid        == shrp2->s_pid         &&
2228 2247              shrp1->s_own_len    == shrp2->s_own_len     &&
2229 2248              bcmp(shrp1->s_owner, shrp2->s_owner,
2230 2249              shrp1->s_own_len) == 0)
2231 2250                  return (TRUE);
2232 2251  
2233 2252          return (FALSE);
2234 2253  }
2235 2254  
2236 2255  static struct nlm_shres *
2237 2256  nlm_shres_create_item(struct shrlock *shrp, vnode_t *vp)
2238 2257  {
2239 2258          struct nlm_shres *nsp;
2240 2259  
2241 2260          nsp = kmem_alloc(sizeof (*nsp), KM_SLEEP);
2242 2261          nsp->ns_shr = kmem_alloc(sizeof (*shrp), KM_SLEEP);
2243 2262          bcopy(shrp, nsp->ns_shr, sizeof (*shrp));
2244 2263          nsp->ns_shr->s_owner = kmem_alloc(shrp->s_own_len, KM_SLEEP);
2245 2264          bcopy(shrp->s_owner, nsp->ns_shr->s_owner, shrp->s_own_len);
2246 2265          nsp->ns_vp = vp;
2247 2266  
2248 2267          return (nsp);
2249 2268  }
2250 2269  
2251 2270  static void
2252 2271  nlm_shres_destroy_item(struct nlm_shres *nsp)
2253 2272  {
2254 2273          kmem_free(nsp->ns_shr->s_owner,
2255 2274              nsp->ns_shr->s_own_len);
2256 2275          kmem_free(nsp->ns_shr, sizeof (struct shrlock));
2257 2276          kmem_free(nsp, sizeof (*nsp));
2258 2277  }
2259 2278  
2260 2279  /*
2261 2280   * Called by klmmod.c when lockd adds a network endpoint
2262 2281   * on which we should begin RPC services.
2263 2282   */
2264 2283  int
2265 2284  nlm_svc_add_ep(struct file *fp, const char *netid, struct knetconfig *knc)
2266 2285  {
2267 2286          SVCMASTERXPRT *xprt = NULL;
2268 2287          int error;
2269 2288  
2270 2289          error = svc_tli_kcreate(fp, 0, (char *)netid, NULL, &xprt,
2271 2290              &nlm_sct, NULL, NLM_SVCPOOL_ID, FALSE);
2272 2291          if (error != 0)
2273 2292                  return (error);
2274 2293  
2275 2294          (void) nlm_knc_to_netid(knc);
2276 2295          return (0);
2277 2296  }
2278 2297  
2279 2298  /*
2280 2299   * Start NLM service.
2281 2300   */
2282 2301  int
2283 2302  nlm_svc_starting(struct nlm_globals *g, struct file *fp,
2284 2303      const char *netid, struct knetconfig *knc)
2285 2304  {
2286 2305          int error;
2287 2306          enum clnt_stat stat;
2288 2307  
2289 2308          VERIFY(g->run_status == NLM_ST_STARTING);
2290 2309          VERIFY(g->nlm_gc_thread == NULL);
2291 2310  
2292 2311          error = nlm_nsm_init_local(&g->nlm_nsm);
2293 2312          if (error != 0) {
2294 2313                  NLM_ERR("Failed to initialize NSM handler "
2295 2314                      "(error=%d)\n", error);
2296 2315                  g->run_status = NLM_ST_DOWN;
2297 2316                  return (error);
2298 2317          }
2299 2318  
2300 2319          error = EIO;
2301 2320  
2302 2321          /*
2303 2322           * Create an NLM garbage collector thread that will
2304 2323           * clean up stale vholds and hosts objects.
2305 2324           */
2306 2325          g->nlm_gc_thread = zthread_create(NULL, 0, nlm_gc,
2307 2326              g, 0, minclsyspri);
2308 2327  
2309 2328          /*
2310 2329           * Send SIMU_CRASH to local statd to report that
2311 2330           * NLM started, so that statd can report other hosts
2312 2331           * about NLM state change.
2313 2332           */
2314 2333  
2315 2334          stat = nlm_nsm_simu_crash(&g->nlm_nsm);
2316 2335          if (stat != RPC_SUCCESS) {
2317 2336                  NLM_ERR("Failed to connect to local statd "
2318 2337                      "(rpcerr=%d)\n", stat);
2319 2338                  goto shutdown_lm;
2320 2339          }
2321 2340  
2322 2341          stat = nlm_nsm_stat(&g->nlm_nsm, &g->nsm_state);
2323 2342          if (stat != RPC_SUCCESS) {
2324 2343                  NLM_ERR("Failed to get the status of local statd "
2325 2344                      "(rpcerr=%d)\n", stat);
2326 2345                  goto shutdown_lm;
2327 2346          }
2328 2347  
2329 2348          g->grace_threshold = ddi_get_lbolt() +
2330 2349              SEC_TO_TICK(g->grace_period);
2331 2350  
2332 2351          /* Register endpoint used for communications with local NLM */
2333 2352          error = nlm_svc_add_ep(fp, netid, knc);
2334 2353          if (error != 0)
2335 2354                  goto shutdown_lm;
2336 2355  
2337 2356          (void) svc_pool_control(NLM_SVCPOOL_ID,
2338 2357              SVCPSET_SHUTDOWN_PROC, (void *)nlm_pool_shutdown);
2339 2358          g->run_status = NLM_ST_UP;
2340 2359          return (0);
2341 2360  
2342 2361  shutdown_lm:
2343 2362          mutex_enter(&g->lock);
2344 2363          g->run_status = NLM_ST_STOPPING;
2345 2364          mutex_exit(&g->lock);
2346 2365  
2347 2366          nlm_svc_stopping(g);
2348 2367          return (error);
2349 2368  }
2350 2369  
2351 2370  /*
2352 2371   * Called when the server pool is destroyed, so that
2353 2372   * all transports are closed and no any server threads
2354 2373   * exist.
2355 2374   *
2356 2375   * Just call lm_shutdown() to shut NLM down properly.
2357 2376   */
2358 2377  static void
2359 2378  nlm_pool_shutdown(void)
2360 2379  {
2361 2380          (void) lm_shutdown();
2362 2381  }
2363 2382  
2364 2383  /*
2365 2384   * Stop NLM service, cleanup all resources
2366 2385   * NLM owns at the moment.
2367 2386   *
2368 2387   * NOTE: NFS code can call NLM while it's
2369 2388   * stopping or even if it's shut down. Any attempt
2370 2389   * to lock file either on client or on the server
2371 2390   * will fail if NLM isn't in NLM_ST_UP state.
2372 2391   */
2373 2392  void
2374 2393  nlm_svc_stopping(struct nlm_globals *g)
2375 2394  {
2376 2395          mutex_enter(&g->lock);
2377 2396          ASSERT(g->run_status == NLM_ST_STOPPING);
2378 2397  
2379 2398          /*
2380 2399           * Ask NLM GC thread to exit and wait until it dies.
2381 2400           */
2382 2401          cv_signal(&g->nlm_gc_sched_cv);
2383 2402          while (g->nlm_gc_thread != NULL)
2384 2403                  cv_wait(&g->nlm_gc_finish_cv, &g->lock);
2385 2404  
2386 2405          mutex_exit(&g->lock);
2387 2406  
2388 2407          /*
2389 2408           * Cleanup locks owned by NLM hosts.
2390 2409           * NOTE: New hosts won't be created while
2391 2410           * NLM is stopping.
2392 2411           */
2393 2412          while (!avl_is_empty(&g->nlm_hosts_tree)) {
2394 2413                  struct nlm_host *hostp;
2395 2414                  int busy_hosts = 0;
2396 2415  
2397 2416                  /*
2398 2417                   * Iterate through all NLM hosts in the system
2399 2418                   * and drop the locks they own by force.
2400 2419                   */
2401 2420                  hostp = avl_first(&g->nlm_hosts_tree);
2402 2421                  while (hostp != NULL) {
2403 2422                          /* Cleanup all client and server side locks */
2404 2423                          nlm_client_cancel_all(g, hostp);
2405 2424                          nlm_host_notify_server(hostp, 0);
2406 2425  
2407 2426                          mutex_enter(&hostp->nh_lock);
2408 2427                          nlm_host_gc_vholds(hostp);
2409 2428                          if (hostp->nh_refs > 0 || nlm_host_has_locks(hostp)) {
2410 2429                                  /*
2411 2430                                   * Oh, it seems the host is still busy, let
2412 2431                                   * it some time to release and go to the
2413 2432                                   * next one.
2414 2433                                   */
2415 2434  
2416 2435                                  mutex_exit(&hostp->nh_lock);
2417 2436                                  hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2418 2437                                  busy_hosts++;
2419 2438                                  continue;
2420 2439                          }
2421 2440  
2422 2441                          mutex_exit(&hostp->nh_lock);
2423 2442                          hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2424 2443                  }
2425 2444  
2426 2445                  /*
2427 2446                   * All hosts go to nlm_idle_hosts list after
2428 2447                   * all locks they own are cleaned up and last refereces
2429 2448                   * were dropped. Just destroy all hosts in nlm_idle_hosts
2430 2449                   * list, they can not be removed from there while we're
2431 2450                   * in stopping state.
2432 2451                   */
2433 2452                  while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
2434 2453                          nlm_host_unregister(g, hostp);
2435 2454                          nlm_host_destroy(hostp);
2436 2455                  }
2437 2456  
2438 2457                  if (busy_hosts > 0) {
2439 2458                          /*
2440 2459                           * There're some hosts that weren't cleaned
2441 2460                           * up. Probably they're in resource cleanup
2442 2461                           * process. Give them some time to do drop
2443 2462                           * references.
2444 2463                           */
2445 2464                          delay(MSEC_TO_TICK(500));
2446 2465                  }
2447 2466          }
2448 2467  
2449 2468          ASSERT(TAILQ_EMPTY(&g->nlm_slocks));
2450 2469  
2451 2470          nlm_nsm_fini(&g->nlm_nsm);
2452 2471          g->lockd_pid = 0;
2453 2472          g->run_status = NLM_ST_DOWN;
2454 2473  }
2455 2474  
2456 2475  /*
2457 2476   * Returns TRUE if the given vnode has
2458 2477   * any active or sleeping locks.
2459 2478   */
2460 2479  int
2461 2480  nlm_vp_active(const vnode_t *vp)
2462 2481  {
2463 2482          struct nlm_globals *g;
2464 2483          struct nlm_host *hostp;
2465 2484          struct nlm_vhold *nvp;
2466 2485          int active = 0;
2467 2486  
2468 2487          g = zone_getspecific(nlm_zone_key, curzone);
2469 2488  
2470 2489          /*
2471 2490           * Server side NLM has locks on the given vnode
2472 2491           * if there exist a vhold object that holds
2473 2492           * the given vnode "vp" in one of NLM hosts.
2474 2493           */
2475 2494          mutex_enter(&g->lock);
2476 2495          hostp = avl_first(&g->nlm_hosts_tree);
2477 2496          while (hostp != NULL) {
2478 2497                  mutex_enter(&hostp->nh_lock);
2479 2498                  nvp = nlm_vhold_find_locked(hostp, vp);
2480 2499                  mutex_exit(&hostp->nh_lock);
2481 2500                  if (nvp != NULL) {
2482 2501                          active = 1;
2483 2502                          break;
2484 2503                  }
2485 2504  
2486 2505                  hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2487 2506          }
2488 2507  
2489 2508          mutex_exit(&g->lock);
2490 2509          return (active);
2491 2510  }
2492 2511  
2493 2512  /*
2494 2513   * Called right before NFS export is going to
2495 2514   * dissapear. The function finds all vnodes
2496 2515   * belonging to the given export and cleans
2497 2516   * all remote locks and share reservations
2498 2517   * on them.
2499 2518   */
2500 2519  void
2501 2520  nlm_unexport(struct exportinfo *exi)
2502 2521  {
2503 2522          struct nlm_globals *g;
2504 2523          struct nlm_host *hostp;
2505 2524  
2506 2525          g = zone_getspecific(nlm_zone_key, curzone);
2507 2526  
2508 2527          mutex_enter(&g->lock);
2509 2528          hostp = avl_first(&g->nlm_hosts_tree);
2510 2529          while (hostp != NULL) {
2511 2530                  struct nlm_vhold *nvp;
2512 2531  
2513 2532                  mutex_enter(&hostp->nh_lock);
2514 2533                  TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
2515 2534                          vnode_t *vp;
2516 2535  
2517 2536                          nvp->nv_refcnt++;
2518 2537                          mutex_exit(&hostp->nh_lock);
2519 2538  
2520 2539                          vp = nvp->nv_vp;
2521 2540  
2522 2541                          if (!EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid))
2523 2542                                  goto next_iter;
2524 2543  
2525 2544                          /*
2526 2545                           * Ok, it we found out that vnode vp is under
2527 2546                           * control by the exportinfo exi, now we need
2528 2547                           * to drop all locks from this vnode, let's
2529 2548                           * do it.
2530 2549                           */
2531 2550                          nlm_vhold_clean(nvp, hostp->nh_sysid);
2532 2551  
2533 2552                  next_iter:
2534 2553                          mutex_enter(&hostp->nh_lock);
2535 2554                          nvp->nv_refcnt--;
2536 2555                  }
2537 2556  
2538 2557                  mutex_exit(&hostp->nh_lock);
2539 2558                  hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2540 2559          }
2541 2560  
2542 2561          mutex_exit(&g->lock);
2543 2562  }
2544 2563  
2545 2564  /*
2546 2565   * Allocate new unique sysid.
2547 2566   * In case of failure (no available sysids)
2548 2567   * return LM_NOSYSID.
2549 2568   */
2550 2569  sysid_t
2551 2570  nlm_sysid_alloc(void)
2552 2571  {
2553 2572          sysid_t ret_sysid = LM_NOSYSID;
2554 2573  
2555 2574          rw_enter(&lm_lck, RW_WRITER);
2556 2575          if (nlm_sysid_nidx > LM_SYSID_MAX)
2557 2576                  nlm_sysid_nidx = LM_SYSID;
2558 2577  
2559 2578          if (!BT_TEST(nlm_sysid_bmap, nlm_sysid_nidx)) {
2560 2579                  BT_SET(nlm_sysid_bmap, nlm_sysid_nidx);
2561 2580                  ret_sysid = nlm_sysid_nidx++;
2562 2581          } else {
2563 2582                  index_t id;
2564 2583  
2565 2584                  id = bt_availbit(nlm_sysid_bmap, NLM_BMAP_NITEMS);
2566 2585                  if (id > 0) {
2567 2586                          nlm_sysid_nidx = id + 1;
2568 2587                          ret_sysid = id;
2569 2588                          BT_SET(nlm_sysid_bmap, id);
2570 2589                  }
2571 2590          }
2572 2591  
2573 2592          rw_exit(&lm_lck);
2574 2593          return (ret_sysid);
2575 2594  }
2576 2595  
2577 2596  void
2578 2597  nlm_sysid_free(sysid_t sysid)
2579 2598  {
2580 2599          ASSERT(sysid >= LM_SYSID && sysid <= LM_SYSID_MAX);
2581 2600  
2582 2601          rw_enter(&lm_lck, RW_WRITER);
2583 2602          ASSERT(BT_TEST(nlm_sysid_bmap, sysid));
2584 2603          BT_CLEAR(nlm_sysid_bmap, sysid);
2585 2604          rw_exit(&lm_lck);
2586 2605  }
2587 2606  
2588 2607  /*
2589 2608   * Return true if the request came from a local caller.
2590 2609   * By necessity, this "knows" the netid names invented
2591 2610   * in lm_svc() and nlm_netid_from_knetconfig().
2592 2611   */
2593 2612  bool_t
2594 2613  nlm_caller_is_local(SVCXPRT *transp)
2595 2614  {
2596 2615          char *netid;
2597 2616          struct netbuf *rtaddr;
2598 2617  
2599 2618          netid = svc_getnetid(transp);
2600 2619          rtaddr = svc_getrpccaller(transp);
2601 2620  
2602 2621          if (netid == NULL)
2603 2622                  return (FALSE);
2604 2623  
2605 2624          if (strcmp(netid, "ticlts") == 0 ||
2606 2625              strcmp(netid, "ticotsord") == 0)
2607 2626                  return (TRUE);
2608 2627  
2609 2628          if (strcmp(netid, "tcp") == 0 || strcmp(netid, "udp") == 0) {
2610 2629                  struct sockaddr_in *sin = (void *)rtaddr->buf;
2611 2630                  if (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK))
2612 2631                          return (TRUE);
2613 2632          }
2614 2633          if (strcmp(netid, "tcp6") == 0 || strcmp(netid, "udp6") == 0) {
2615 2634                  struct sockaddr_in6 *sin6 = (void *)rtaddr->buf;
2616 2635                  if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
2617 2636                          return (TRUE);
2618 2637          }
2619 2638  
2620 2639          return (FALSE); /* unknown transport */
2621 2640  }
2622 2641  
2623 2642  /*
2624 2643   * Get netid string correspondig to the given knetconfig.
2625 2644   * If not done already, save knc->knc_rdev in our table.
2626 2645   */
2627 2646  const char *
2628 2647  nlm_knc_to_netid(struct knetconfig *knc)
2629 2648  {
2630 2649          int i;
2631 2650          dev_t rdev;
2632 2651          struct nlm_knc *nc;
2633 2652          const char *netid = NULL;
2634 2653  
2635 2654          rw_enter(&lm_lck, RW_READER);
2636 2655          for (i = 0; i < NLM_KNCS; i++) {
2637 2656                  nc = &nlm_netconfigs[i];
2638 2657  
2639 2658                  if (nc->n_knc.knc_semantics == knc->knc_semantics &&
2640 2659                      strcmp(nc->n_knc.knc_protofmly,
2641 2660                      knc->knc_protofmly) == 0) {
2642 2661                          netid = nc->n_netid;
2643 2662                          rdev = nc->n_knc.knc_rdev;
2644 2663                          break;
2645 2664                  }
2646 2665          }
2647 2666          rw_exit(&lm_lck);
2648 2667  
2649 2668          if (netid != NULL && rdev == NODEV) {
2650 2669                  rw_enter(&lm_lck, RW_WRITER);
2651 2670                  if (nc->n_knc.knc_rdev == NODEV)
2652 2671                          nc->n_knc.knc_rdev = knc->knc_rdev;
2653 2672                  rw_exit(&lm_lck);
2654 2673          }
2655 2674  
2656 2675          return (netid);
2657 2676  }
2658 2677  
2659 2678  /*
2660 2679   * Get a knetconfig corresponding to the given netid.
2661 2680   * If there's no knetconfig for this netid, ENOENT
2662 2681   * is returned.
2663 2682   */
2664 2683  int
2665 2684  nlm_knc_from_netid(const char *netid, struct knetconfig *knc)
2666 2685  {
2667 2686          int i, ret;
2668 2687  
2669 2688          ret = ENOENT;
2670 2689          for (i = 0; i < NLM_KNCS; i++) {
2671 2690                  struct nlm_knc *nknc;
2672 2691  
2673 2692                  nknc = &nlm_netconfigs[i];
2674 2693                  if (strcmp(netid, nknc->n_netid) == 0 &&
2675 2694                      nknc->n_knc.knc_rdev != NODEV) {
2676 2695                          *knc = nknc->n_knc;
2677 2696                          ret = 0;
2678 2697                          break;
2679 2698                  }
2680 2699          }
2681 2700  
2682 2701          return (ret);
2683 2702  }
2684 2703  
2685 2704  void
2686 2705  nlm_cprsuspend(void)
2687 2706  {
2688 2707          struct nlm_globals *g;
2689 2708  
2690 2709          rw_enter(&lm_lck, RW_READER);
2691 2710          TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2692 2711                  nlm_suspend_zone(g);
2693 2712  
2694 2713          rw_exit(&lm_lck);
2695 2714  }
2696 2715  
2697 2716  void
2698 2717  nlm_cprresume(void)
2699 2718  {
2700 2719          struct nlm_globals *g;
2701 2720  
2702 2721          rw_enter(&lm_lck, RW_READER);
2703 2722          TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2704 2723                  nlm_resume_zone(g);
2705 2724  
2706 2725          rw_exit(&lm_lck);
2707 2726  }
2708 2727  
2709 2728  static void
2710 2729  nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm)
2711 2730  {
2712 2731          (void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0,
2713 2732              NLM_RPC_RETRIES, kcred);
2714 2733  }
2715 2734  
2716 2735  static void
2717 2736  nlm_netbuf_to_netobj(struct netbuf *addr, int *family, netobj *obj)
2718 2737  {
2719 2738          /* LINTED pointer alignment */
2720 2739          struct sockaddr *sa = (struct sockaddr *)addr->buf;
2721 2740  
2722 2741          *family = sa->sa_family;
2723 2742  
2724 2743          switch (sa->sa_family) {
2725 2744          case AF_INET: {
2726 2745                  /* LINTED pointer alignment */
2727 2746                  struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2728 2747  
2729 2748                  obj->n_len = sizeof (sin->sin_addr);
2730 2749                  obj->n_bytes = (char *)&sin->sin_addr;
2731 2750                  break;
2732 2751          }
2733 2752  
2734 2753          case AF_INET6: {
2735 2754                  /* LINTED pointer alignment */
2736 2755                  struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2737 2756  
2738 2757                  obj->n_len = sizeof (sin6->sin6_addr);
2739 2758                  obj->n_bytes = (char *)&sin6->sin6_addr;
2740 2759                  break;
2741 2760          }
2742 2761  
2743 2762          default:
2744 2763                  VERIFY(0);
2745 2764                  break;
2746 2765          }
2747 2766  }

↓ open down ↓

2208 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX