dlpx-os-diff Old usr/src/uts/common/klm/nlm

   1 /*
   2  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
   3  * Authors: Doug Rabson <dfr@rabson.org>
   4  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25  * SUCH DAMAGE.
  26  */
  27 
  28 /*
  29  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  30  * Copyright (c) 2012 by Delphix. All rights reserved.
  31  */
  32 
  33 /*
  34  * NFS Lock Manager service functions (nlm_do_...)
  35  * Called from nlm_rpc_svc.c wrappers.
  36  *
  37  * Source code derived from FreeBSD nlm_prot_impl.c
  38  */
  39 
  40 #include <sys/param.h>
  41 #include <sys/systm.h>
  42 #include <sys/thread.h>
  43 #include <sys/fcntl.h>
  44 #include <sys/flock.h>
  45 #include <sys/mount.h>
  46 #include <sys/priv.h>
  47 #include <sys/proc.h>
  48 #include <sys/share.h>
  49 #include <sys/socket.h>
  50 #include <sys/syscall.h>
  51 #include <sys/syslog.h>
  52 #include <sys/systm.h>
  53 #include <sys/taskq.h>
  54 #include <sys/unistd.h>
  55 #include <sys/vnode.h>
  56 #include <sys/vfs.h>
  57 #include <sys/queue.h>
  58 #include <sys/sdt.h>
  59 #include <netinet/in.h>
  60 
  61 #include <rpc/rpc.h>
  62 #include <rpc/xdr.h>
  63 #include <rpc/pmap_prot.h>
  64 #include <rpc/pmap_clnt.h>
  65 #include <rpc/rpcb_prot.h>
  66 
  67 #include <rpcsvc/nlm_prot.h>
  68 #include <rpcsvc/sm_inter.h>
  69 
  70 #include <nfs/nfs.h>
  71 #include <nfs/nfs_clnt.h>
  72 #include <nfs/export.h>
  73 #include <nfs/rnode.h>
  74 
  75 #include "nlm_impl.h"
  76 
  77 #define NLM_IN_GRACE(g) (ddi_get_lbolt() < (g)->grace_threshold)
  78 
  79 struct nlm_block_cb_data {
  80         struct nlm_host         *hostp;
  81         struct nlm_vhold        *nvp;
  82         struct flock64          *flp;
  83 };
  84 
  85 /*
  86  * Invoke an asyncronous RPC callbeck
  87  * (used when NLM server needs to reply to MSG NLM procedure).
  88  */
  89 #define NLM_INVOKE_CALLBACK(descr, rpcp, resp, callb)                   \
  90         do {                                                            \
  91                 enum clnt_stat _stat;                                   \
  92                                                                         \
  93                 _stat = (*(callb))(resp, NULL, (rpcp)->nr_handle);   \
  94                 if (_stat != RPC_SUCCESS && _stat != RPC_TIMEDOUT) {    \
  95                         struct rpc_err _err;                            \
  96                                                                         \
  97                         CLNT_GETERR((rpcp)->nr_handle, &_err);           \
  98                         NLM_ERR("NLM: %s callback failed: "             \
  99                             "stat %d, err %d\n", descr, _stat,          \
 100                             _err.re_errno);                             \
 101                 }                                                       \
 102                                                                         \
 103         _NOTE(CONSTCOND) } while (0)
 104 
 105 static void nlm_block(
 106         nlm4_lockargs *lockargs,
 107         struct nlm_host *host,
 108         struct nlm_vhold *nvp,
 109         nlm_rpc_t *rpcp,
 110         struct flock64 *fl,
 111         nlm_testargs_cb grant_cb);
 112 
 113 static vnode_t *nlm_fh_to_vp(struct netobj *);
 114 static struct nlm_vhold *nlm_fh_to_vhold(struct nlm_host *, struct netobj *);
 115 static void nlm_init_shrlock(struct shrlock *, nlm4_share *, struct nlm_host *);
 116 static callb_cpr_t *nlm_block_callback(flk_cb_when_t, void *);
 117 static int nlm_vop_frlock(vnode_t *, int, flock64_t *, int, offset_t,
 118     struct flk_callback *, cred_t *, caller_context_t *);
 119 
 120 /*
 121  * Convert a lock from network to local form, and
 122  * check for valid range (no overflow).
 123  */
 124 static int
 125 nlm_init_flock(struct flock64 *fl, struct nlm4_lock *nl,
 126         struct nlm_host *host, rpcvers_t vers, short type)
 127 {
 128         uint64_t off, len;
 129 
 130         bzero(fl, sizeof (*fl));
 131         off = nl->l_offset;
 132         len = nl->l_len;
 133 
 134         if (vers < NLM4_VERS) {
 135                 /*
 136                  * Make sure range is valid for 32-bit client.
 137                  * Also allow len == ~0 to mean lock to EOF,
 138                  * which is supposed to be l_len == 0.
 139                  */
 140                 if (len == MAX_UOFF32)
 141                         len = 0;
 142                 if (off > MAX_UOFF32 || len > MAX_UOFF32)
 143                         return (EINVAL);
 144                 if (off + len > MAX_UOFF32 + 1)
 145                         return (EINVAL);
 146         } else {
 147                 /*
 148                  * Check range for 64-bit client (no overflow).
 149                  * Again allow len == ~0 to mean lock to EOF.
 150                  */
 151                 if (len == MAX_U_OFFSET_T)
 152                         len = 0;
 153                 if (len != 0 && off + (len - 1) < off)
 154                         return (EINVAL);
 155         }
 156 
 157         fl->l_type = type;
 158         fl->l_whence = SEEK_SET;
 159         fl->l_start = off;
 160         fl->l_len = len;
 161         fl->l_sysid = host->nh_sysid;
 162         fl->l_pid = nl->svid;
 163         /* l_pad */
 164 
 165         return (0);
 166 }
 167 
 168 /*
 169  * Gets vnode from client's filehandle
 170  * NOTE: Holds vnode, it _must_ be explicitly
 171  * released by VN_RELE().
 172  */
 173 static vnode_t *
 174 nlm_fh_to_vp(struct netobj *fh)
 175 {
 176         fhandle_t *fhp;
 177 
 178         /*
 179          * Get a vnode pointer for the given NFS file handle.
 180          * Note that it could be an NFSv2 for NFSv3 handle,
 181          * which means the size might vary.  (don't copy)
 182          */
 183         if (fh->n_len < sizeof (*fhp))
 184                 return (NULL);
 185 
 186         /* We know this is aligned (kmem_alloc) */
 187         /* LINTED E_BAD_PTR_CAST_ALIGN */
 188         fhp = (fhandle_t *)fh->n_bytes;
 189         return (lm_fhtovp(fhp));
 190 }
 191 
 192 /*
 193  * Get vhold from client's filehandle, but in contrast to
 194  * The function tries to check some access rights as well.
 195  *
 196  * NOTE: vhold object _must_ be explicitly released by
 197  * nlm_vhold_release().
 198  */
 199 static struct nlm_vhold *
 200 nlm_fh_to_vhold(struct nlm_host *hostp, struct netobj *fh)
 201 {
 202         vnode_t *vp;
 203         struct nlm_vhold *nvp;
 204 
 205         vp = nlm_fh_to_vp(fh);
 206         if (vp == NULL)
 207                 return (NULL);
 208 
 209 
 210         nvp = nlm_vhold_get(hostp, vp);
 211 
 212         /*
 213          * Both nlm_fh_to_vp() and nlm_vhold_get()
 214          * do VN_HOLD(), so we need to drop one
 215          * reference on vnode.
 216          */
 217         VN_RELE(vp);
 218         return (nvp);
 219 }
 220 
 221 /* ******************************************************************* */
 222 
 223 /*
 224  * NLM implementation details, called from the RPC svc code.
 225  */
 226 
 227 /*
 228  * Call-back from NFS statd, used to notify that one of our
 229  * hosts had a status change. The host can be either an
 230  * NFS client, NFS server or both.
 231  * According to NSM protocol description, the state is a
 232  * number that is increases monotonically each time the
 233  * state of host changes. An even number indicates that
 234  * the host is down, while an odd number indicates that
 235  * the host is up.
 236  *
 237  * Here we ignore this even/odd difference of status number
 238  * reported by the NSM, we launch notification handlers
 239  * every time the state is changed. The reason we why do so
 240  * is that client and server can talk to each other using
 241  * connectionless transport and it's easy to lose packet
 242  * containing NSM notification with status number update.
 243  *
 244  * In nlm_host_monitor(), we put the sysid in the private data
 245  * that statd carries in this callback, so we can easliy find
 246  * the host this call applies to.
 247  */
 248 /* ARGSUSED */
 249 void
 250 nlm_do_notify1(nlm_sm_status *argp, void *res, struct svc_req *sr)
 251 {
 252         struct nlm_globals *g;
 253         struct nlm_host *host;
 254         uint16_t sysid;
 255 
 256         g = zone_getspecific(nlm_zone_key, curzone);
 257         bcopy(&argp->priv, &sysid, sizeof (sysid));
 258 
 259         DTRACE_PROBE2(nsm__notify, uint16_t, sysid,
 260             int, argp->state);
 261 
 262         host = nlm_host_find_by_sysid(g, (sysid_t)sysid);
 263         if (host == NULL)
 264                 return;
 265 
 266         nlm_host_notify_server(host, argp->state);
 267         nlm_host_notify_client(host, argp->state);
 268         nlm_host_release(g, host);
 269 }
 270 
 271 /*
 272  * Another available call-back for NFS statd.
 273  * Not currently used.
 274  */
 275 /* ARGSUSED */
 276 void
 277 nlm_do_notify2(nlm_sm_status *argp, void *res, struct svc_req *sr)
 278 {
 279         ASSERT(0);
 280 }
 281 
 282 
 283 /*
 284  * NLM_TEST, NLM_TEST_MSG,
 285  * NLM4_TEST, NLM4_TEST_MSG,
 286  * Client inquiry about locks, non-blocking.
 287  */
 288 void
 289 nlm_do_test(nlm4_testargs *argp, nlm4_testres *resp,
 290     struct svc_req *sr, nlm_testres_cb cb)
 291 {
 292         struct nlm_globals *g;
 293         struct nlm_host *host;
 294         struct nlm4_holder *lh;
 295         struct nlm_owner_handle *oh;
 296         nlm_rpc_t *rpcp = NULL;
 297         vnode_t *vp = NULL;
 298         struct netbuf *addr;
 299         char *netid;
 300         char *name;
 301         int error;
 302         struct flock64 fl;
 303 
 304         nlm_copy_netobj(&resp->cookie, &argp->cookie);
 305 
 306         name = argp->alock.caller_name;
 307         netid = svc_getnetid(sr->rq_xprt);
 308         addr = svc_getrpccaller(sr->rq_xprt);
 309 
 310         g = zone_getspecific(nlm_zone_key, curzone);
 311         host = nlm_host_findcreate(g, name, netid, addr);
 312         if (host == NULL) {
 313                 resp->stat.stat = nlm4_denied_nolocks;
 314                 return;
 315         }
 316         if (cb != NULL) {
 317                 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
 318                 if (error != 0) {
 319                         resp->stat.stat = nlm4_denied_nolocks;
 320                         goto out;
 321                 }
 322         }
 323 
 324         vp = nlm_fh_to_vp(&argp->alock.fh);
 325         if (vp == NULL) {
 326                 resp->stat.stat = nlm4_stale_fh;
 327                 goto out;
 328         }
 329 
 330         if (NLM_IN_GRACE(g)) {
 331                 resp->stat.stat = nlm4_denied_grace_period;
 332                 goto out;
 333         }
 334 
 335         /* Convert to local form. */
 336         error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
 337             (argp->exclusive) ? F_WRLCK : F_RDLCK);
 338         if (error) {
 339                 resp->stat.stat = nlm4_failed;
 340                 goto out;
 341         }
 342 
 343         /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_GETLK, &fl, F_REMOTE); */
 344         error = nlm_vop_frlock(vp, F_GETLK, &fl,
 345             F_REMOTELOCK | FREAD | FWRITE,
 346             (u_offset_t)0, NULL, CRED(), NULL);
 347         if (error) {
 348                 resp->stat.stat = nlm4_failed;
 349                 goto out;
 350         }
 351 
 352         if (fl.l_type == F_UNLCK) {
 353                 resp->stat.stat = nlm4_granted;
 354                 goto out;
 355         }
 356         resp->stat.stat = nlm4_denied;
 357 
 358         /*
 359          * This lock "test" fails due to a conflicting lock.
 360          *
 361          * If this is a v1 client, make sure the conflicting
 362          * lock range we report can be expressed with 32-bit
 363          * offsets.  The lock range requested was expressed
 364          * as 32-bit offset and length, so at least part of
 365          * the conflicting lock should lie below MAX_UOFF32.
 366          * If the conflicting lock extends past that, we'll
 367          * trim the range to end at MAX_UOFF32 so this lock
 368          * can be represented in a 32-bit response.  Check
 369          * the start also (paranoid, but a low cost check).
 370          */
 371         if (sr->rq_vers < NLM4_VERS) {
 372                 uint64 maxlen;
 373                 if (fl.l_start > MAX_UOFF32)
 374                         fl.l_start = MAX_UOFF32;
 375                 maxlen = MAX_UOFF32 + 1 - fl.l_start;
 376                 if (fl.l_len > maxlen)
 377                         fl.l_len = maxlen;
 378         }
 379 
 380         /*
 381          * Build the nlm4_holder result structure.
 382          *
 383          * Note that lh->oh is freed via xdr_free,
 384          * xdr_nlm4_holder, xdr_netobj, xdr_bytes.
 385          */
 386         oh = kmem_zalloc(sizeof (*oh), KM_SLEEP);
 387         oh->oh_sysid = (sysid_t)fl.l_sysid;
 388         lh = &resp->stat.nlm4_testrply_u.holder;
 389         lh->exclusive = (fl.l_type == F_WRLCK);
 390         lh->svid = fl.l_pid;
 391         lh->oh.n_len = sizeof (*oh);
 392         lh->oh.n_bytes = (void *)oh;
 393         lh->l_offset = fl.l_start;
 394         lh->l_len = fl.l_len;
 395 
 396 out:
 397         /*
 398          * If we have a callback funtion, use that to
 399          * deliver the response via another RPC call.
 400          */
 401         if (cb != NULL && rpcp != NULL)
 402                 NLM_INVOKE_CALLBACK("test", rpcp, resp, cb);
 403 
 404         if (vp != NULL)
 405                 VN_RELE(vp);
 406         if (rpcp != NULL)
 407                 nlm_host_rele_rpc(host, rpcp);
 408 
 409         nlm_host_release(g, host);
 410 }
 411 
 412 /*
 413  * NLM_LOCK, NLM_LOCK_MSG, NLM_NM_LOCK
 414  * NLM4_LOCK, NLM4_LOCK_MSG, NLM4_NM_LOCK
 415  *
 416  * Client request to set a lock, possibly blocking.
 417  *
 418  * If the lock needs to block, we return status blocked to
 419  * this RPC call, and then later call back the client with
 420  * a "granted" callback.  Tricky aspects of this include:
 421  * sending a reply before this function returns, and then
 422  * borrowing this thread from the RPC service pool for the
 423  * wait on the lock and doing the later granted callback.
 424  *
 425  * We also have to keep a list of locks (pending + granted)
 426  * both to handle retransmitted requests, and to keep the
 427  * vnodes for those locks active.
 428  */
 429 void
 430 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr,
 431     nlm_reply_cb reply_cb, nlm_res_cb res_cb, nlm_testargs_cb grant_cb)
 432 {
 433         struct nlm_globals *g;
 434         struct flock64 fl;
 435         struct nlm_host *host = NULL;
 436         struct netbuf *addr;
 437         struct nlm_vhold *nvp = NULL;
 438         nlm_rpc_t *rpcp = NULL;
 439         char *netid;
 440         char *name;
 441         int error, flags;
 442         bool_t do_blocking = FALSE;
 443         bool_t do_mon_req = FALSE;
 444         enum nlm4_stats status;
 445 
 446         nlm_copy_netobj(&resp->cookie, &argp->cookie);
 447 
 448         name = argp->alock.caller_name;
 449         netid = svc_getnetid(sr->rq_xprt);
 450         addr = svc_getrpccaller(sr->rq_xprt);
 451 
 452         g = zone_getspecific(nlm_zone_key, curzone);
 453         host = nlm_host_findcreate(g, name, netid, addr);
 454         if (host == NULL) {
 455                 DTRACE_PROBE4(no__host, struct nlm_globals *, g,
 456                     char *, name, char *, netid, struct netbuf *, addr);
 457                 status = nlm4_denied_nolocks;
 458                 goto doreply;
 459         }
 460 
 461         DTRACE_PROBE3(start, struct nlm_globals *, g,
 462             struct nlm_host *, host, nlm4_lockargs *, argp);
 463 
 464         /*
 465          * If we may need to do _msg_ call needing an RPC
 466          * callback, get the RPC client handle now,
 467          * so we know if we can bind to the NLM service on
 468          * this client.
 469          *
 470          * Note: host object carries transport type.
 471          * One client using multiple transports gets
 472          * separate sysids for each of its transports.
 473          */
 474         if (res_cb != NULL || (grant_cb != NULL && argp->block == TRUE)) {
 475                 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
 476                 if (error != 0) {
 477                         status = nlm4_denied_nolocks;
 478                         goto doreply;
 479                 }
 480         }
 481 
 482         /*
 483          * During the "grace period", only allow reclaim.
 484          */
 485         if (argp->reclaim == 0 && NLM_IN_GRACE(g)) {
 486                 status = nlm4_denied_grace_period;
 487                 goto doreply;
 488         }
 489 
 490         /*
 491          * Check whether we missed host shutdown event
 492          */
 493         if (nlm_host_get_state(host) != argp->state)
 494                 nlm_host_notify_server(host, argp->state);
 495 
 496         /*
 497          * Get a hold on the vnode for a lock operation.
 498          * Only lock() and share() need vhold objects.
 499          */
 500         nvp = nlm_fh_to_vhold(host, &argp->alock.fh);
 501         if (nvp == NULL) {
 502                 status = nlm4_stale_fh;
 503                 goto doreply;
 504         }
 505 
 506         /* Convert to local form. */
 507         error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
 508             (argp->exclusive) ? F_WRLCK : F_RDLCK);
 509         if (error) {
 510                 status = nlm4_failed;
 511                 goto doreply;
 512         }
 513 
 514         /*
 515          * Try to lock non-blocking first.  If we succeed
 516          * getting the lock, we can reply with the granted
 517          * status directly and avoid the complications of
 518          * making the "granted" RPC callback later.
 519          *
 520          * This also let's us find out now about some
 521          * possible errors like EROFS, etc.
 522          */
 523         flags = F_REMOTELOCK | FREAD | FWRITE;
 524         error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl, flags,
 525             (u_offset_t)0, NULL, CRED(), NULL);
 526 
 527         DTRACE_PROBE3(setlk__res, struct flock64 *, &fl,
 528             int, flags, int, error);
 529 
 530         switch (error) {
 531         case 0:
 532                 /* Got it without waiting! */
 533                 status = nlm4_granted;
 534                 do_mon_req = TRUE;
 535                 break;
 536 
 537         /* EINPROGRESS too? */
 538         case EAGAIN:
 539                 /* We did not get the lock. Should we block? */
 540                 if (argp->block == FALSE || grant_cb == NULL) {
 541                         status = nlm4_denied;
 542                         break;
 543                 }
 544                 /*
 545                  * Should block.  Try to reserve this thread
 546                  * so we can use it to wait for the lock and
 547                  * later send the granted message.  If this
 548                  * reservation fails, say "no resources".
 549                  */
 550                 if (!svc_reserve_thread(sr->rq_xprt)) {
 551                         status = nlm4_denied_nolocks;
 552                         break;
 553                 }
 554                 /*
 555                  * OK, can detach this thread, so this call
 556                  * will block below (after we reply).
 557                  */
 558                 status = nlm4_blocked;
 559                 do_blocking = TRUE;
 560                 do_mon_req = TRUE;
 561                 break;
 562 
 563         case ENOLCK:
 564                 /* Failed for lack of resources. */
 565                 status = nlm4_denied_nolocks;
 566                 break;
 567 
 568         case EROFS:
 569                 /* read-only file system */
 570                 status = nlm4_rofs;
 571                 break;
 572 
 573         case EFBIG:
 574                 /* file too big */
 575                 status = nlm4_fbig;
 576                 break;
 577 
 578         case EDEADLK:
 579                 /* dead lock condition */
 580                 status = nlm4_deadlck;
 581                 break;
 582 
 583         default:
 584                 status = nlm4_denied;
 585                 break;
 586         }
 587 
 588 doreply:
 589         resp->stat.stat = status;
 590 
 591         /*
 592          * We get one of two function pointers; one for a
 593          * normal RPC reply, and another for doing an RPC
 594          * "callback" _res reply for a _msg function.
 595          * Use either of those to send the reply now.
 596          *
 597          * If sending this reply fails, just leave the
 598          * lock in the list for retransmitted requests.
 599          * Cleanup is via unlock or host rele (statmon).
 600          */
 601         if (reply_cb != NULL) {
 602                 /* i.e. nlm_lock_1_reply */
 603                 if (!(*reply_cb)(sr->rq_xprt, resp))
 604                         svcerr_systemerr(sr->rq_xprt);
 605         }
 606         if (res_cb != NULL && rpcp != NULL)
 607                 NLM_INVOKE_CALLBACK("lock", rpcp, resp, res_cb);
 608 
 609         /*
 610          * The reply has been sent to the client.
 611          * Start monitoring this client (maybe).
 612          *
 613          * Note that the non-monitored (NM) calls pass grant_cb=NULL
 614          * indicating that the client doesn't support RPC callbacks.
 615          * No monitoring for these (lame) clients.
 616          */
 617         if (do_mon_req && grant_cb != NULL)
 618                 nlm_host_monitor(g, host, argp->state);
 619 
 620         if (do_blocking) {
 621                 /*
 622                  * We need to block on this lock, and when that
 623                  * completes, do the granted RPC call. Note that
 624                  * we "reserved" this thread above, so we can now
 625                  * "detach" it from the RPC SVC pool, allowing it
 626                  * to block indefinitely if needed.
 627                  */
 628                 ASSERT(rpcp != NULL);
 629                 (void) svc_detach_thread(sr->rq_xprt);
 630                 nlm_block(argp, host, nvp, rpcp, &fl, grant_cb);
 631         }
 632 
 633         DTRACE_PROBE3(lock__end, struct nlm_globals *, g,
 634             struct nlm_host *, host, nlm4_res *, resp);
 635 
 636         if (rpcp != NULL)
 637                 nlm_host_rele_rpc(host, rpcp);
 638 
 639         nlm_vhold_release(host, nvp);
 640         nlm_host_release(g, host);
 641 }
 642 
 643 /*
 644  * Helper for nlm_do_lock(), partly for observability,
 645  * (we'll see a call blocked in this function) and
 646  * because nlm_do_lock() was getting quite long.
 647  */
 648 static void
 649 nlm_block(nlm4_lockargs *lockargs,
 650     struct nlm_host *host,
 651     struct nlm_vhold *nvp,
 652     nlm_rpc_t *rpcp,
 653     struct flock64 *flp,
 654     nlm_testargs_cb grant_cb)
 655 {
 656         nlm4_testargs args;
 657         int error;
 658         flk_callback_t flk_cb;
 659         struct nlm_block_cb_data cb_data;
 660 
 661         /*
 662          * Keep a list of blocked locks on nh_pending, and use it
 663          * to cancel these threads in nlm_destroy_client_pending.
 664          *
 665          * Check to see if this lock is already in the list
 666          * and if not, add an entry for it.  Allocate first,
 667          * then if we don't insert, free the new one.
 668          * Caller already has vp held.
 669          */
 670 
 671         error = nlm_slreq_register(host, nvp, flp);
 672         if (error != 0) {
 673                 /*
 674                  * Sleeping lock request with given fl is already
 675                  * registered by someone else. This means that
 676                  * some other thread is handling the request, let
 677                  * him to do its work.
 678                  */
 679                 ASSERT(error == EEXIST);
 680                 return;
 681         }
 682 
 683         cb_data.hostp = host;
 684         cb_data.nvp = nvp;
 685         cb_data.flp = flp;
 686         flk_init_callback(&flk_cb, nlm_block_callback, &cb_data);
 687 
 688         /* BSD: VOP_ADVLOCK(vp, NULL, F_SETLK, fl, F_REMOTE); */
 689         error = nlm_vop_frlock(nvp->nv_vp, F_SETLKW, flp,
 690             F_REMOTELOCK | FREAD | FWRITE,
 691             (u_offset_t)0, &flk_cb, CRED(), NULL);
 692 
 693         if (error != 0) {
 694                 /*
 695                  * We failed getting the lock, but have no way to
 696                  * tell the client about that.  Let 'em time out.
 697                  */
 698                 (void) nlm_slreq_unregister(host, nvp, flp);
 699                 return;
 700         }
 701 
 702         /*
 703          * Do the "granted" call-back to the client.
 704          */
 705         args.cookie     = lockargs->cookie;
 706         args.exclusive  = lockargs->exclusive;
 707         args.alock      = lockargs->alock;
 708 
 709         NLM_INVOKE_CALLBACK("grant", rpcp, &args, grant_cb);
 710 }
 711 
 712 /*
 713  * The function that is used as flk callback when NLM server
 714  * sets new sleeping lock. The function unregisters NLM
 715  * sleeping lock request (nlm_slreq) associated with the
 716  * sleeping lock _before_ lock becomes active. It prevents
 717  * potential race condition between nlm_block() and
 718  * nlm_do_cancel().
 719  */
 720 static callb_cpr_t *
 721 nlm_block_callback(flk_cb_when_t when, void *data)
 722 {
 723         struct nlm_block_cb_data *cb_data;
 724 
 725         cb_data = (struct nlm_block_cb_data *)data;
 726         if (when == FLK_AFTER_SLEEP) {
 727                 (void) nlm_slreq_unregister(cb_data->hostp,
 728                     cb_data->nvp, cb_data->flp);
 729         }
 730 
 731         return (0);
 732 }
 733 
 734 /*
 735  * NLM_CANCEL, NLM_CANCEL_MSG,
 736  * NLM4_CANCEL, NLM4_CANCEL_MSG,
 737  * Client gives up waiting for a blocking lock.
 738  */
 739 void
 740 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *resp,
 741     struct svc_req *sr, nlm_res_cb cb)
 742 {
 743         struct nlm_globals *g;
 744         struct nlm_host *host;
 745         struct netbuf *addr;
 746         struct nlm_vhold *nvp = NULL;
 747         nlm_rpc_t *rpcp = NULL;
 748         char *netid;
 749         char *name;
 750         int error;
 751         struct flock64 fl;
 752 
 753         nlm_copy_netobj(&resp->cookie, &argp->cookie);
 754         netid = svc_getnetid(sr->rq_xprt);
 755         addr = svc_getrpccaller(sr->rq_xprt);
 756         name = argp->alock.caller_name;
 757 
 758         g = zone_getspecific(nlm_zone_key, curzone);
 759         host = nlm_host_findcreate(g, name, netid, addr);
 760         if (host == NULL) {
 761                 resp->stat.stat = nlm4_denied_nolocks;
 762                 return;
 763         }
 764         if (cb != NULL) {
 765                 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
 766                 if (error != 0) {
 767                         resp->stat.stat = nlm4_denied_nolocks;
 768                         return;
 769                 }
 770         }
 771 
 772         DTRACE_PROBE3(start, struct nlm_globals *, g,
 773             struct nlm_host *, host, nlm4_cancargs *, argp);
 774 
 775         if (NLM_IN_GRACE(g)) {
 776                 resp->stat.stat = nlm4_denied_grace_period;
 777                 goto out;
 778         }
 779 
 780         nvp = nlm_fh_to_vhold(host, &argp->alock.fh);
 781         if (nvp == NULL) {
 782                 resp->stat.stat = nlm4_stale_fh;
 783                 goto out;
 784         }
 785 
 786         /* Convert to local form. */
 787         error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
 788             (argp->exclusive) ? F_WRLCK : F_RDLCK);
 789         if (error) {
 790                 resp->stat.stat = nlm4_failed;
 791                 goto out;
 792         }
 793 
 794         error = nlm_slreq_unregister(host, nvp, &fl);
 795         if (error != 0) {
 796                 /*
 797                  * There's no sleeping lock request corresponding
 798                  * to the lock. Then requested sleeping lock
 799                  * doesn't exist.
 800                  */
 801                 resp->stat.stat = nlm4_denied;
 802                 goto out;
 803         }
 804 
 805         fl.l_type = F_UNLCK;
 806         error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl,
 807             F_REMOTELOCK | FREAD | FWRITE,
 808             (u_offset_t)0, NULL, CRED(), NULL);
 809 
 810         resp->stat.stat = (error == 0) ?
 811             nlm4_granted : nlm4_denied;
 812 
 813 out:
 814         /*
 815          * If we have a callback funtion, use that to
 816          * deliver the response via another RPC call.
 817          */
 818         if (cb != NULL && rpcp != NULL)
 819                 NLM_INVOKE_CALLBACK("cancel", rpcp, resp, cb);
 820 
 821         DTRACE_PROBE3(cancel__end, struct nlm_globals *, g,
 822             struct nlm_host *, host, nlm4_res *, resp);
 823 
 824         if (rpcp != NULL)
 825                 nlm_host_rele_rpc(host, rpcp);
 826 
 827         nlm_vhold_release(host, nvp);
 828         nlm_host_release(g, host);
 829 }
 830 
 831 /*
 832  * NLM_UNLOCK, NLM_UNLOCK_MSG,
 833  * NLM4_UNLOCK, NLM4_UNLOCK_MSG,
 834  * Client removes one of their locks.
 835  */
 836 void
 837 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *resp,
 838     struct svc_req *sr, nlm_res_cb cb)
 839 {
 840         struct nlm_globals *g;
 841         struct nlm_host *host;
 842         struct netbuf *addr;
 843         nlm_rpc_t *rpcp = NULL;
 844         vnode_t *vp = NULL;
 845         char *netid;
 846         char *name;
 847         int error;
 848         struct flock64 fl;
 849 
 850         nlm_copy_netobj(&resp->cookie, &argp->cookie);
 851 
 852         netid = svc_getnetid(sr->rq_xprt);
 853         addr = svc_getrpccaller(sr->rq_xprt);
 854         name = argp->alock.caller_name;
 855 
 856         /*
 857          * NLM_UNLOCK operation doesn't have an error code
 858          * denoting that operation failed, so we always
 859          * return nlm4_granted except when the server is
 860          * in a grace period.
 861          */
 862         resp->stat.stat = nlm4_granted;
 863 
 864         g = zone_getspecific(nlm_zone_key, curzone);
 865         host = nlm_host_findcreate(g, name, netid, addr);
 866         if (host == NULL)
 867                 return;
 868 
 869         if (cb != NULL) {
 870                 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
 871                 if (error != 0)
 872                         goto out;
 873         }
 874 
 875         DTRACE_PROBE3(start, struct nlm_globals *, g,
 876             struct nlm_host *, host, nlm4_unlockargs *, argp);
 877 
 878         if (NLM_IN_GRACE(g)) {
 879                 resp->stat.stat = nlm4_denied_grace_period;
 880                 goto out;
 881         }
 882 
 883         vp = nlm_fh_to_vp(&argp->alock.fh);
 884         if (vp == NULL)
 885                 goto out;
 886 
 887         /* Convert to local form. */
 888         error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, F_UNLCK);
 889         if (error)
 890                 goto out;
 891 
 892         /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_UNLCK, &fl, F_REMOTE); */
 893         error = nlm_vop_frlock(vp, F_SETLK, &fl,
 894             F_REMOTELOCK | FREAD | FWRITE,
 895             (u_offset_t)0, NULL, CRED(), NULL);
 896 
 897         DTRACE_PROBE1(unlock__res, int, error);
 898 out:
 899         /*
 900          * If we have a callback funtion, use that to
 901          * deliver the response via another RPC call.
 902          */
 903         if (cb != NULL && rpcp != NULL)
 904                 NLM_INVOKE_CALLBACK("unlock", rpcp, resp, cb);
 905 
 906         DTRACE_PROBE3(unlock__end, struct nlm_globals *, g,
 907             struct nlm_host *, host, nlm4_res *, resp);
 908 
 909         if (vp != NULL)
 910                 VN_RELE(vp);
 911         if (rpcp != NULL)
 912                 nlm_host_rele_rpc(host, rpcp);
 913 
 914         nlm_host_release(g, host);
 915 }
 916 
 917 /*
 918  * NLM_GRANTED, NLM_GRANTED_MSG,
 919  * NLM4_GRANTED, NLM4_GRANTED_MSG,
 920  *
 921  * This service routine is special.  It's the only one that's
 922  * really part of our NLM _client_ support, used by _servers_
 923  * to "call back" when a blocking lock from this NLM client
 924  * is granted by the server.  In this case, we _know_ there is
 925  * already an nlm_host allocated and held by the client code.
 926  * We want to find that nlm_host here.
 927  *
 928  * Over in nlm_call_lock(), the client encoded the sysid for this
 929  * server in the "owner handle" netbuf sent with our lock request.
 930  * We can now use that to find the nlm_host object we used there.
 931  * (NB: The owner handle is opaque to the server.)
 932  */
 933 void
 934 nlm_do_granted(nlm4_testargs *argp, nlm4_res *resp,
 935     struct svc_req *sr, nlm_res_cb cb)
 936 {
 937         struct nlm_globals *g;
 938         struct nlm_owner_handle *oh;
 939         struct nlm_host *host;
 940         nlm_rpc_t *rpcp = NULL;
 941         int error;
 942 
 943         nlm_copy_netobj(&resp->cookie, &argp->cookie);
 944         resp->stat.stat = nlm4_denied;
 945 
 946         g = zone_getspecific(nlm_zone_key, curzone);
 947         oh = (void *) argp->alock.oh.n_bytes;
 948         if (oh == NULL)
 949                 return;
 950 
 951         host = nlm_host_find_by_sysid(g, oh->oh_sysid);
 952         if (host == NULL)
 953                 return;
 954 
 955         if (cb != NULL) {
 956                 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
 957                 if (error != 0)
 958                         goto out;
 959         }
 960 
 961         if (NLM_IN_GRACE(g)) {
 962                 resp->stat.stat = nlm4_denied_grace_period;
 963                 goto out;
 964         }
 965 
 966         error = nlm_slock_grant(g, host, &argp->alock);
 967         if (error == 0)
 968                 resp->stat.stat = nlm4_granted;
 969 
 970 out:
 971         /*
 972          * If we have a callback funtion, use that to
 973          * deliver the response via another RPC call.
 974          */
 975         if (cb != NULL && rpcp != NULL)
 976                 NLM_INVOKE_CALLBACK("do_granted", rpcp, resp, cb);
 977 
 978         if (rpcp != NULL)
 979                 nlm_host_rele_rpc(host, rpcp);
 980 
 981         nlm_host_release(g, host);
 982 }
 983 
 984 /*
 985  * NLM_FREE_ALL, NLM4_FREE_ALL
 986  *
 987  * Destroy all lock state for the calling client.
 988  */
 989 void
 990 nlm_do_free_all(nlm4_notify *argp, void *res, struct svc_req *sr)
 991 {
 992         struct nlm_globals *g;
 993         struct nlm_host_list host_list;
 994         struct nlm_host *hostp;
 995 
 996         TAILQ_INIT(&host_list);
 997         g = zone_getspecific(nlm_zone_key, curzone);
 998 
 999         /* Serialize calls to clean locks. */
1000         mutex_enter(&g->clean_lock);
1001 
1002         /*
1003          * Find all hosts that have the given node name and put them on a
1004          * local list.
1005          */
1006         mutex_enter(&g->lock);
1007         for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
1008             hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
1009                 if (strcasecmp(hostp->nh_name, argp->name) == 0) {
1010                         /*
1011                          * If needed take the host out of the idle list since
1012                          * we are taking a reference.
1013                          */
1014                         if (hostp->nh_flags & NLM_NH_INIDLE) {
1015                                 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1016                                 hostp->nh_flags &= ~NLM_NH_INIDLE;
1017                         }
1018                         hostp->nh_refs++;
1019 
1020                         TAILQ_INSERT_TAIL(&host_list, hostp, nh_link);
1021                 }
1022         }
1023         mutex_exit(&g->lock);
1024 
1025         /* Free locks for all hosts on the local list. */
1026         while (!TAILQ_EMPTY(&host_list)) {
1027                 hostp = TAILQ_FIRST(&host_list);
1028                 TAILQ_REMOVE(&host_list, hostp, nh_link);
1029 
1030                 /*
1031                  * Note that this does not do client-side cleanup.
1032                  * We want to do that ONLY if statd tells us the
1033                  * server has restarted.
1034                  */
1035                 nlm_host_notify_server(hostp, argp->state);
1036                 nlm_host_release(g, hostp);
1037         }
1038 
1039         mutex_exit(&g->clean_lock);
1040 
1041         (void) res;
1042         (void) sr;
1043 }
1044 
1045 static void
1046 nlm_init_shrlock(struct shrlock *shr,
1047     nlm4_share *nshare, struct nlm_host *host)
1048 {
1049 
1050         switch (nshare->access) {
1051         default:
1052         case fsa_NONE:
1053                 shr->s_access = 0;
1054                 break;
1055         case fsa_R:
1056                 shr->s_access = F_RDACC;
1057                 break;
1058         case fsa_W:
1059                 shr->s_access = F_WRACC;
1060                 break;
1061         case fsa_RW:
1062                 shr->s_access = F_RWACC;
1063                 break;
1064         }
1065 
1066         switch (nshare->mode) {
1067         default:
1068         case fsm_DN:
1069                 shr->s_deny = F_NODNY;
1070                 break;
1071         case fsm_DR:
1072                 shr->s_deny = F_RDDNY;
1073                 break;
1074         case fsm_DW:
1075                 shr->s_deny = F_WRDNY;
1076                 break;
1077         case fsm_DRW:
1078                 shr->s_deny = F_RWDNY;
1079                 break;
1080         }
1081 
1082         shr->s_sysid = host->nh_sysid;
1083         shr->s_pid = 0;
1084         shr->s_own_len = nshare->oh.n_len;
1085         shr->s_owner   = nshare->oh.n_bytes;
1086 }
1087 
1088 /*
1089  * NLM_SHARE, NLM4_SHARE
1090  *
1091  * Request a DOS-style share reservation
1092  */
1093 void
1094 nlm_do_share(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr)
1095 {
1096         struct nlm_globals *g;
1097         struct nlm_host *host;
1098         struct netbuf *addr;
1099         struct nlm_vhold *nvp = NULL;
1100         char *netid;
1101         char *name;
1102         int error;
1103         struct shrlock shr;
1104 
1105         nlm_copy_netobj(&resp->cookie, &argp->cookie);
1106 
1107         name = argp->share.caller_name;
1108         netid = svc_getnetid(sr->rq_xprt);
1109         addr = svc_getrpccaller(sr->rq_xprt);
1110 
1111         g = zone_getspecific(nlm_zone_key, curzone);
1112         host = nlm_host_findcreate(g, name, netid, addr);
1113         if (host == NULL) {
1114                 resp->stat = nlm4_denied_nolocks;
1115                 return;
1116         }
1117 
1118         DTRACE_PROBE3(share__start, struct nlm_globals *, g,
1119             struct nlm_host *, host, nlm4_shareargs *, argp);
1120 
1121         if (argp->reclaim == 0 && NLM_IN_GRACE(g)) {
1122                 resp->stat = nlm4_denied_grace_period;
1123                 goto out;
1124         }
1125 
1126         /*
1127          * Get holded vnode when on lock operation.
1128          * Only lock() and share() need vhold objects.
1129          */
1130         nvp = nlm_fh_to_vhold(host, &argp->share.fh);
1131         if (nvp == NULL) {
1132                 resp->stat = nlm4_stale_fh;
1133                 goto out;
1134         }
1135 
1136         /* Convert to local form. */
1137         nlm_init_shrlock(&shr, &argp->share, host);
1138         error = VOP_SHRLOCK(nvp->nv_vp, F_SHARE, &shr,
1139             FREAD | FWRITE, CRED(), NULL);
1140 
1141         if (error == 0) {
1142                 resp->stat = nlm4_granted;
1143                 nlm_host_monitor(g, host, 0);
1144         } else {
1145                 resp->stat = nlm4_denied;
1146         }
1147 
1148 out:
1149         DTRACE_PROBE3(share__end, struct nlm_globals *, g,
1150             struct nlm_host *, host, nlm4_shareres *, resp);
1151 
1152         nlm_vhold_release(host, nvp);
1153         nlm_host_release(g, host);
1154 }
1155 
1156 /*
1157  * NLM_UNSHARE, NLM4_UNSHARE
1158  *
1159  * Release a DOS-style share reservation
1160  */
1161 void
1162 nlm_do_unshare(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr)
1163 {
1164         struct nlm_globals *g;
1165         struct nlm_host *host;
1166         struct netbuf *addr;
1167         vnode_t *vp = NULL;
1168         char *netid;
1169         int error;
1170         struct shrlock shr;
1171 
1172         nlm_copy_netobj(&resp->cookie, &argp->cookie);
1173 
1174         netid = svc_getnetid(sr->rq_xprt);
1175         addr = svc_getrpccaller(sr->rq_xprt);
1176 
1177         g = zone_getspecific(nlm_zone_key, curzone);
1178         host = nlm_host_find(g, netid, addr);
1179         if (host == NULL) {
1180                 resp->stat = nlm4_denied_nolocks;
1181                 return;
1182         }
1183 
1184         DTRACE_PROBE3(unshare__start, struct nlm_globals *, g,
1185             struct nlm_host *, host, nlm4_shareargs *, argp);
1186 
1187         if (NLM_IN_GRACE(g)) {
1188                 resp->stat = nlm4_denied_grace_period;
1189                 goto out;
1190         }
1191 
1192         vp = nlm_fh_to_vp(&argp->share.fh);
1193         if (vp == NULL) {
1194                 resp->stat = nlm4_stale_fh;
1195                 goto out;
1196         }
1197 
1198         /* Convert to local form. */
1199         nlm_init_shrlock(&shr, &argp->share, host);
1200         error = VOP_SHRLOCK(vp, F_UNSHARE, &shr,
1201             FREAD | FWRITE, CRED(), NULL);
1202 
1203         (void) error;
1204         resp->stat = nlm4_granted;
1205 
1206 out:
1207         DTRACE_PROBE3(unshare__end, struct nlm_globals *, g,
1208             struct nlm_host *, host, nlm4_shareres *, resp);
1209 
1210         if (vp != NULL)
1211                 VN_RELE(vp);
1212 
1213         nlm_host_release(g, host);
1214 }
1215 
1216 /*
1217  * NLM wrapper to VOP_FRLOCK that checks the validity of the lock before
1218  * invoking the vnode operation.
1219  */
1220 static int
1221 nlm_vop_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
1222         struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct)
1223 {
1224         if (bfp->l_len != 0 && bfp->l_start + (bfp->l_len - 1)
1225             < bfp->l_start) {
1226                 return (EOVERFLOW);
1227         }
1228 
1229         return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1230 }