1 /*
   2  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
   3  * Authors: Doug Rabson <dfr@rabson.org>
   4  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25  * SUCH DAMAGE.
  26  */
  27 
  28 /*
  29  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  30  * Copyright (c) 2012 by Delphix. All rights reserved.
  31  */
  32 
  33 /*
  34  * NFS Lock Manager service functions (nlm_do_...)
  35  * Called from nlm_rpc_svc.c wrappers.
  36  *
  37  * Source code derived from FreeBSD nlm_prot_impl.c
  38  */
  39 
  40 #include <sys/param.h>
  41 #include <sys/systm.h>
  42 #include <sys/thread.h>
  43 #include <sys/fcntl.h>
  44 #include <sys/flock.h>
  45 #include <sys/mount.h>
  46 #include <sys/priv.h>
  47 #include <sys/proc.h>
  48 #include <sys/share.h>
  49 #include <sys/socket.h>
  50 #include <sys/syscall.h>
  51 #include <sys/syslog.h>
  52 #include <sys/systm.h>
  53 #include <sys/taskq.h>
  54 #include <sys/unistd.h>
  55 #include <sys/vnode.h>
  56 #include <sys/vfs.h>
  57 #include <sys/queue.h>
  58 #include <sys/sdt.h>
  59 #include <netinet/in.h>
  60 
  61 #include <rpc/rpc.h>
  62 #include <rpc/xdr.h>
  63 #include <rpc/pmap_prot.h>
  64 #include <rpc/pmap_clnt.h>
  65 #include <rpc/rpcb_prot.h>
  66 
  67 #include <rpcsvc/nlm_prot.h>
  68 #include <rpcsvc/sm_inter.h>
  69 
  70 #include <nfs/nfs.h>
  71 #include <nfs/nfs_clnt.h>
  72 #include <nfs/export.h>
  73 #include <nfs/rnode.h>
  74 
  75 #include "nlm_impl.h"
  76 
  77 #define NLM_IN_GRACE(g) (ddi_get_lbolt() < (g)->grace_threshold)
  78 
  79 struct nlm_block_cb_data {
  80         struct nlm_host         *hostp;
  81         struct nlm_vhold        *nvp;
  82         struct flock64          *flp;
  83 };
  84 
  85 /*
  86  * Invoke an asyncronous RPC callbeck
  87  * (used when NLM server needs to reply to MSG NLM procedure).
  88  */
  89 #define NLM_INVOKE_CALLBACK(descr, rpcp, resp, callb)                   \
  90         do {                                                            \
  91                 enum clnt_stat _stat;                                   \
  92                                                                         \
  93                 _stat = (*(callb))(resp, NULL, (rpcp)->nr_handle);   \
  94                 if (_stat != RPC_SUCCESS && _stat != RPC_TIMEDOUT) {    \
  95                         struct rpc_err _err;                            \
  96                                                                         \
  97                         CLNT_GETERR((rpcp)->nr_handle, &_err);           \
  98                         NLM_ERR("NLM: %s callback failed: "             \
  99                             "stat %d, err %d\n", descr, _stat,          \
 100                             _err.re_errno);                             \
 101                 }                                                       \
 102                                                                         \
 103         _NOTE(CONSTCOND) } while (0)
 104 
 105 static void nlm_block(
 106         nlm4_lockargs *lockargs,
 107         struct nlm_host *host,
 108         struct nlm_vhold *nvp,
 109         nlm_rpc_t *rpcp,
 110         struct flock64 *fl,
 111         nlm_testargs_cb grant_cb);
 112 
 113 static vnode_t *nlm_fh_to_vp(struct netobj *);
 114 static struct nlm_vhold *nlm_fh_to_vhold(struct nlm_host *, struct netobj *);
 115 static void nlm_init_shrlock(struct shrlock *, nlm4_share *, struct nlm_host *);
 116 static callb_cpr_t *nlm_block_callback(flk_cb_when_t, void *);
 117 static int nlm_vop_frlock(vnode_t *, int, flock64_t *, int, offset_t,
 118     struct flk_callback *, cred_t *, caller_context_t *);
 119 
 120 /*
 121  * Convert a lock from network to local form, and
 122  * check for valid range (no overflow).
 123  */
 124 static int
 125 nlm_init_flock(struct flock64 *fl, struct nlm4_lock *nl,
 126         struct nlm_host *host, rpcvers_t vers, short type)
 127 {
 128         uint64_t off, len;
 129 
 130         bzero(fl, sizeof (*fl));
 131         off = nl->l_offset;
 132         len = nl->l_len;
 133 
 134         if (vers < NLM4_VERS) {
 135                 if (off > MAX_UOFF32 || len > MAX_UOFF32)
 136                         return (EINVAL);
 137                 if (off + len > MAX_UOFF32 + 1)
 138                         return (EINVAL);
 139         } else {
 140                 /*
 141                  * Check range for 64-bit client (no overflow).
 142                  * Again allow len == ~0 to mean lock to EOF.
 143                  */
 144                 if (len == MAX_U_OFFSET_T)
 145                         len = 0;
 146                 if (len != 0 && off + (len - 1) < off)
 147                         return (EINVAL);
 148         }
 149 
 150         fl->l_type = type;
 151         fl->l_whence = SEEK_SET;
 152         fl->l_start = off;
 153         fl->l_len = len;
 154         fl->l_sysid = host->nh_sysid;
 155         fl->l_pid = nl->svid;
 156         /* l_pad */
 157 
 158         return (0);
 159 }
 160 
 161 /*
 162  * Gets vnode from client's filehandle
 163  * NOTE: Holds vnode, it _must_ be explicitly
 164  * released by VN_RELE().
 165  */
 166 static vnode_t *
 167 nlm_fh_to_vp(struct netobj *fh)
 168 {
 169         fhandle_t *fhp;
 170 
 171         /*
 172          * Get a vnode pointer for the given NFS file handle.
 173          * Note that it could be an NFSv2 for NFSv3 handle,
 174          * which means the size might vary.  (don't copy)
 175          */
 176         if (fh->n_len < sizeof (*fhp))
 177                 return (NULL);
 178 
 179         /* We know this is aligned (kmem_alloc) */
 180         /* LINTED E_BAD_PTR_CAST_ALIGN */
 181         fhp = (fhandle_t *)fh->n_bytes;
 182         return (lm_fhtovp(fhp));
 183 }
 184 
 185 /*
 186  * Get vhold from client's filehandle, but in contrast to
 187  * The function tries to check some access rights as well.
 188  *
 189  * NOTE: vhold object _must_ be explicitly released by
 190  * nlm_vhold_release().
 191  */
 192 static struct nlm_vhold *
 193 nlm_fh_to_vhold(struct nlm_host *hostp, struct netobj *fh)
 194 {
 195         vnode_t *vp;
 196         struct nlm_vhold *nvp;
 197 
 198         vp = nlm_fh_to_vp(fh);
 199         if (vp == NULL)
 200                 return (NULL);
 201 
 202 
 203         nvp = nlm_vhold_get(hostp, vp);
 204 
 205         /*
 206          * Both nlm_fh_to_vp() and nlm_vhold_get()
 207          * do VN_HOLD(), so we need to drop one
 208          * reference on vnode.
 209          */
 210         VN_RELE(vp);
 211         return (nvp);
 212 }
 213 
 214 /* ******************************************************************* */
 215 
 216 /*
 217  * NLM implementation details, called from the RPC svc code.
 218  */
 219 
 220 /*
 221  * Call-back from NFS statd, used to notify that one of our
 222  * hosts had a status change. The host can be either an
 223  * NFS client, NFS server or both.
 224  * According to NSM protocol description, the state is a
 225  * number that is increases monotonically each time the
 226  * state of host changes. An even number indicates that
 227  * the host is down, while an odd number indicates that
 228  * the host is up.
 229  *
 230  * Here we ignore this even/odd difference of status number
 231  * reported by the NSM, we launch notification handlers
 232  * every time the state is changed. The reason we why do so
 233  * is that client and server can talk to each other using
 234  * connectionless transport and it's easy to lose packet
 235  * containing NSM notification with status number update.
 236  *
 237  * In nlm_host_monitor(), we put the sysid in the private data
 238  * that statd carries in this callback, so we can easliy find
 239  * the host this call applies to.
 240  */
 241 /* ARGSUSED */
 242 void
 243 nlm_do_notify1(nlm_sm_status *argp, void *res, struct svc_req *sr)
 244 {
 245         struct nlm_globals *g;
 246         struct nlm_host *host;
 247         uint16_t sysid;
 248 
 249         g = zone_getspecific(nlm_zone_key, curzone);
 250         bcopy(&argp->priv, &sysid, sizeof (sysid));
 251 
 252         DTRACE_PROBE2(nsm__notify, uint16_t, sysid,
 253             int, argp->state);
 254 
 255         host = nlm_host_find_by_sysid(g, (sysid_t)sysid);
 256         if (host == NULL)
 257                 return;
 258 
 259         nlm_host_notify_server(host, argp->state);
 260         nlm_host_notify_client(host, argp->state);
 261         nlm_host_release(g, host);
 262 }
 263 
 264 /*
 265  * Another available call-back for NFS statd.
 266  * Not currently used.
 267  */
 268 /* ARGSUSED */
 269 void
 270 nlm_do_notify2(nlm_sm_status *argp, void *res, struct svc_req *sr)
 271 {
 272         ASSERT(0);
 273 }
 274 
 275 
 276 /*
 277  * NLM_TEST, NLM_TEST_MSG,
 278  * NLM4_TEST, NLM4_TEST_MSG,
 279  * Client inquiry about locks, non-blocking.
 280  */
 281 void
 282 nlm_do_test(nlm4_testargs *argp, nlm4_testres *resp,
 283     struct svc_req *sr, nlm_testres_cb cb)
 284 {
 285         struct nlm_globals *g;
 286         struct nlm_host *host;
 287         struct nlm4_holder *lh;
 288         struct nlm_owner_handle *oh;
 289         nlm_rpc_t *rpcp = NULL;
 290         vnode_t *vp = NULL;
 291         struct netbuf *addr;
 292         char *netid;
 293         char *name;
 294         int error;
 295         struct flock64 fl;
 296 
 297         nlm_copy_netobj(&resp->cookie, &argp->cookie);
 298 
 299         name = argp->alock.caller_name;
 300         netid = svc_getnetid(sr->rq_xprt);
 301         addr = svc_getrpccaller(sr->rq_xprt);
 302 
 303         g = zone_getspecific(nlm_zone_key, curzone);
 304         host = nlm_host_findcreate(g, name, netid, addr);
 305         if (host == NULL) {
 306                 resp->stat.stat = nlm4_denied_nolocks;
 307                 return;
 308         }
 309         if (cb != NULL) {
 310                 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
 311                 if (error != 0) {
 312                         resp->stat.stat = nlm4_denied_nolocks;
 313                         goto out;
 314                 }
 315         }
 316 
 317         vp = nlm_fh_to_vp(&argp->alock.fh);
 318         if (vp == NULL) {
 319                 resp->stat.stat = nlm4_stale_fh;
 320                 goto out;
 321         }
 322 
 323         if (NLM_IN_GRACE(g)) {
 324                 resp->stat.stat = nlm4_denied_grace_period;
 325                 goto out;
 326         }
 327 
 328         /* Convert to local form. */
 329         error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
 330             (argp->exclusive) ? F_WRLCK : F_RDLCK);
 331         if (error) {
 332                 resp->stat.stat = nlm4_failed;
 333                 goto out;
 334         }
 335 
 336         /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_GETLK, &fl, F_REMOTE); */
 337         error = nlm_vop_frlock(vp, F_GETLK, &fl,
 338             F_REMOTELOCK | FREAD | FWRITE,
 339             (u_offset_t)0, NULL, CRED(), NULL);
 340         if (error) {
 341                 resp->stat.stat = nlm4_failed;
 342                 goto out;
 343         }
 344 
 345         if (fl.l_type == F_UNLCK) {
 346                 resp->stat.stat = nlm4_granted;
 347                 goto out;
 348         }
 349         resp->stat.stat = nlm4_denied;
 350 
 351         /*
 352          * This lock "test" fails due to a conflicting lock.
 353          *
 354          * If this is a v1 client, make sure the conflicting
 355          * lock range we report can be expressed with 32-bit
 356          * offsets.  The lock range requested was expressed
 357          * as 32-bit offset and length, so at least part of
 358          * the conflicting lock should lie below MAX_UOFF32.
 359          * If the conflicting lock extends past that, we'll
 360          * trim the range to end at MAX_UOFF32 so this lock
 361          * can be represented in a 32-bit response.  Check
 362          * the start also (paranoid, but a low cost check).
 363          */
 364         if (sr->rq_vers < NLM4_VERS) {
 365                 uint64 maxlen;
 366                 if (fl.l_start > MAX_UOFF32)
 367                         fl.l_start = MAX_UOFF32;
 368                 maxlen = MAX_UOFF32 + 1 - fl.l_start;
 369                 if (fl.l_len > maxlen)
 370                         fl.l_len = maxlen;
 371         }
 372 
 373         /*
 374          * Build the nlm4_holder result structure.
 375          *
 376          * Note that lh->oh is freed via xdr_free,
 377          * xdr_nlm4_holder, xdr_netobj, xdr_bytes.
 378          */
 379         oh = kmem_zalloc(sizeof (*oh), KM_SLEEP);
 380         oh->oh_sysid = (sysid_t)fl.l_sysid;
 381         lh = &resp->stat.nlm4_testrply_u.holder;
 382         lh->exclusive = (fl.l_type == F_WRLCK);
 383         lh->svid = fl.l_pid;
 384         lh->oh.n_len = sizeof (*oh);
 385         lh->oh.n_bytes = (void *)oh;
 386         lh->l_offset = fl.l_start;
 387         lh->l_len = fl.l_len;
 388 
 389 out:
 390         /*
 391          * If we have a callback funtion, use that to
 392          * deliver the response via another RPC call.
 393          */
 394         if (cb != NULL && rpcp != NULL)
 395                 NLM_INVOKE_CALLBACK("test", rpcp, resp, cb);
 396 
 397         if (vp != NULL)
 398                 VN_RELE(vp);
 399         if (rpcp != NULL)
 400                 nlm_host_rele_rpc(host, rpcp);
 401 
 402         nlm_host_release(g, host);
 403 }
 404 
 405 /*
 406  * NLM_LOCK, NLM_LOCK_MSG, NLM_NM_LOCK
 407  * NLM4_LOCK, NLM4_LOCK_MSG, NLM4_NM_LOCK
 408  *
 409  * Client request to set a lock, possibly blocking.
 410  *
 411  * If the lock needs to block, we return status blocked to
 412  * this RPC call, and then later call back the client with
 413  * a "granted" callback.  Tricky aspects of this include:
 414  * sending a reply before this function returns, and then
 415  * borrowing this thread from the RPC service pool for the
 416  * wait on the lock and doing the later granted callback.
 417  *
 418  * We also have to keep a list of locks (pending + granted)
 419  * both to handle retransmitted requests, and to keep the
 420  * vnodes for those locks active.
 421  */
 422 void
 423 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr,
 424     nlm_reply_cb reply_cb, nlm_res_cb res_cb, nlm_testargs_cb grant_cb)
 425 {
 426         struct nlm_globals *g;
 427         struct flock64 fl;
 428         struct nlm_host *host = NULL;
 429         struct netbuf *addr;
 430         struct nlm_vhold *nvp = NULL;
 431         nlm_rpc_t *rpcp = NULL;
 432         char *netid;
 433         char *name;
 434         int error, flags;
 435         bool_t do_blocking = FALSE;
 436         bool_t do_mon_req = FALSE;
 437         enum nlm4_stats status;
 438 
 439         nlm_copy_netobj(&resp->cookie, &argp->cookie);
 440 
 441         name = argp->alock.caller_name;
 442         netid = svc_getnetid(sr->rq_xprt);
 443         addr = svc_getrpccaller(sr->rq_xprt);
 444 
 445         g = zone_getspecific(nlm_zone_key, curzone);
 446         host = nlm_host_findcreate(g, name, netid, addr);
 447         if (host == NULL) {
 448                 DTRACE_PROBE4(no__host, struct nlm_globals *, g,
 449                     char *, name, char *, netid, struct netbuf *, addr);
 450                 status = nlm4_denied_nolocks;
 451                 goto doreply;
 452         }
 453 
 454         DTRACE_PROBE3(start, struct nlm_globals *, g,
 455             struct nlm_host *, host, nlm4_lockargs *, argp);
 456 
 457         /*
 458          * If we may need to do _msg_ call needing an RPC
 459          * callback, get the RPC client handle now,
 460          * so we know if we can bind to the NLM service on
 461          * this client.
 462          *
 463          * Note: host object carries transport type.
 464          * One client using multiple transports gets
 465          * separate sysids for each of its transports.
 466          */
 467         if (res_cb != NULL || (grant_cb != NULL && argp->block == TRUE)) {
 468                 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
 469                 if (error != 0) {
 470                         status = nlm4_denied_nolocks;
 471                         goto doreply;
 472                 }
 473         }
 474 
 475         /*
 476          * During the "grace period", only allow reclaim.
 477          */
 478         if (argp->reclaim == 0 && NLM_IN_GRACE(g)) {
 479                 status = nlm4_denied_grace_period;
 480                 goto doreply;
 481         }
 482 
 483         /*
 484          * Check whether we missed host shutdown event
 485          */
 486         if (nlm_host_get_state(host) != argp->state)
 487                 nlm_host_notify_server(host, argp->state);
 488 
 489         /*
 490          * Get a hold on the vnode for a lock operation.
 491          * Only lock() and share() need vhold objects.
 492          */
 493         nvp = nlm_fh_to_vhold(host, &argp->alock.fh);
 494         if (nvp == NULL) {
 495                 status = nlm4_stale_fh;
 496                 goto doreply;
 497         }
 498 
 499         /* Convert to local form. */
 500         error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
 501             (argp->exclusive) ? F_WRLCK : F_RDLCK);
 502         if (error) {
 503                 status = nlm4_failed;
 504                 goto doreply;
 505         }
 506 
 507         /*
 508          * Try to lock non-blocking first.  If we succeed
 509          * getting the lock, we can reply with the granted
 510          * status directly and avoid the complications of
 511          * making the "granted" RPC callback later.
 512          *
 513          * This also let's us find out now about some
 514          * possible errors like EROFS, etc.
 515          */
 516         flags = F_REMOTELOCK | FREAD | FWRITE;
 517         error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl, flags,
 518             (u_offset_t)0, NULL, CRED(), NULL);
 519 
 520         DTRACE_PROBE3(setlk__res, struct flock64 *, &fl,
 521             int, flags, int, error);
 522 
 523         switch (error) {
 524         case 0:
 525                 /* Got it without waiting! */
 526                 status = nlm4_granted;
 527                 do_mon_req = TRUE;
 528                 break;
 529 
 530         /* EINPROGRESS too? */
 531         case EAGAIN:
 532                 /* We did not get the lock. Should we block? */
 533                 if (argp->block == FALSE || grant_cb == NULL) {
 534                         status = nlm4_denied;
 535                         break;
 536                 }
 537                 /*
 538                  * Should block.  Try to reserve this thread
 539                  * so we can use it to wait for the lock and
 540                  * later send the granted message.  If this
 541                  * reservation fails, say "no resources".
 542                  */
 543                 if (!svc_reserve_thread(sr->rq_xprt)) {
 544                         status = nlm4_denied_nolocks;
 545                         break;
 546                 }
 547                 /*
 548                  * OK, can detach this thread, so this call
 549                  * will block below (after we reply).
 550                  */
 551                 status = nlm4_blocked;
 552                 do_blocking = TRUE;
 553                 do_mon_req = TRUE;
 554                 break;
 555 
 556         case ENOLCK:
 557                 /* Failed for lack of resources. */
 558                 status = nlm4_denied_nolocks;
 559                 break;
 560 
 561         case EROFS:
 562                 /* read-only file system */
 563                 status = nlm4_rofs;
 564                 break;
 565 
 566         case EFBIG:
 567                 /* file too big */
 568                 status = nlm4_fbig;
 569                 break;
 570 
 571         case EDEADLK:
 572                 /* dead lock condition */
 573                 status = nlm4_deadlck;
 574                 break;
 575 
 576         default:
 577                 status = nlm4_denied;
 578                 break;
 579         }
 580 
 581 doreply:
 582         resp->stat.stat = status;
 583 
 584         /*
 585          * We get one of two function pointers; one for a
 586          * normal RPC reply, and another for doing an RPC
 587          * "callback" _res reply for a _msg function.
 588          * Use either of those to send the reply now.
 589          *
 590          * If sending this reply fails, just leave the
 591          * lock in the list for retransmitted requests.
 592          * Cleanup is via unlock or host rele (statmon).
 593          */
 594         if (reply_cb != NULL) {
 595                 /* i.e. nlm_lock_1_reply */
 596                 if (!(*reply_cb)(sr->rq_xprt, resp))
 597                         svcerr_systemerr(sr->rq_xprt);
 598         }
 599         if (res_cb != NULL && rpcp != NULL)
 600                 NLM_INVOKE_CALLBACK("lock", rpcp, resp, res_cb);
 601 
 602         /*
 603          * The reply has been sent to the client.
 604          * Start monitoring this client (maybe).
 605          *
 606          * Note that the non-monitored (NM) calls pass grant_cb=NULL
 607          * indicating that the client doesn't support RPC callbacks.
 608          * No monitoring for these (lame) clients.
 609          */
 610         if (do_mon_req && grant_cb != NULL)
 611                 nlm_host_monitor(g, host, argp->state);
 612 
 613         if (do_blocking) {
 614                 /*
 615                  * We need to block on this lock, and when that
 616                  * completes, do the granted RPC call. Note that
 617                  * we "reserved" this thread above, so we can now
 618                  * "detach" it from the RPC SVC pool, allowing it
 619                  * to block indefinitely if needed.
 620                  */
 621                 ASSERT(rpcp != NULL);
 622                 (void) svc_detach_thread(sr->rq_xprt);
 623                 nlm_block(argp, host, nvp, rpcp, &fl, grant_cb);
 624         }
 625 
 626         DTRACE_PROBE3(lock__end, struct nlm_globals *, g,
 627             struct nlm_host *, host, nlm4_res *, resp);
 628 
 629         if (rpcp != NULL)
 630                 nlm_host_rele_rpc(host, rpcp);
 631 
 632         nlm_vhold_release(host, nvp);
 633         nlm_host_release(g, host);
 634 }
 635 
 636 /*
 637  * Helper for nlm_do_lock(), partly for observability,
 638  * (we'll see a call blocked in this function) and
 639  * because nlm_do_lock() was getting quite long.
 640  */
 641 static void
 642 nlm_block(nlm4_lockargs *lockargs,
 643     struct nlm_host *host,
 644     struct nlm_vhold *nvp,
 645     nlm_rpc_t *rpcp,
 646     struct flock64 *flp,
 647     nlm_testargs_cb grant_cb)
 648 {
 649         nlm4_testargs args;
 650         int error;
 651         flk_callback_t flk_cb;
 652         struct nlm_block_cb_data cb_data;
 653 
 654         /*
 655          * Keep a list of blocked locks on nh_pending, and use it
 656          * to cancel these threads in nlm_destroy_client_pending.
 657          *
 658          * Check to see if this lock is already in the list
 659          * and if not, add an entry for it.  Allocate first,
 660          * then if we don't insert, free the new one.
 661          * Caller already has vp held.
 662          */
 663 
 664         error = nlm_slreq_register(host, nvp, flp);
 665         if (error != 0) {
 666                 /*
 667                  * Sleeping lock request with given fl is already
 668                  * registered by someone else. This means that
 669                  * some other thread is handling the request, let
 670                  * him to do its work.
 671                  */
 672                 ASSERT(error == EEXIST);
 673                 return;
 674         }
 675 
 676         cb_data.hostp = host;
 677         cb_data.nvp = nvp;
 678         cb_data.flp = flp;
 679         flk_init_callback(&flk_cb, nlm_block_callback, &cb_data);
 680 
 681         /* BSD: VOP_ADVLOCK(vp, NULL, F_SETLK, fl, F_REMOTE); */
 682         error = nlm_vop_frlock(nvp->nv_vp, F_SETLKW, flp,
 683             F_REMOTELOCK | FREAD | FWRITE,
 684             (u_offset_t)0, &flk_cb, CRED(), NULL);
 685 
 686         if (error != 0) {
 687                 /*
 688                  * We failed getting the lock, but have no way to
 689                  * tell the client about that.  Let 'em time out.
 690                  */
 691                 (void) nlm_slreq_unregister(host, nvp, flp);
 692                 return;
 693         }
 694 
 695         /*
 696          * Do the "granted" call-back to the client.
 697          */
 698         args.cookie     = lockargs->cookie;
 699         args.exclusive  = lockargs->exclusive;
 700         args.alock      = lockargs->alock;
 701 
 702         NLM_INVOKE_CALLBACK("grant", rpcp, &args, grant_cb);
 703 }
 704 
 705 /*
 706  * The function that is used as flk callback when NLM server
 707  * sets new sleeping lock. The function unregisters NLM
 708  * sleeping lock request (nlm_slreq) associated with the
 709  * sleeping lock _before_ lock becomes active. It prevents
 710  * potential race condition between nlm_block() and
 711  * nlm_do_cancel().
 712  */
 713 static callb_cpr_t *
 714 nlm_block_callback(flk_cb_when_t when, void *data)
 715 {
 716         struct nlm_block_cb_data *cb_data;
 717 
 718         cb_data = (struct nlm_block_cb_data *)data;
 719         if (when == FLK_AFTER_SLEEP) {
 720                 (void) nlm_slreq_unregister(cb_data->hostp,
 721                     cb_data->nvp, cb_data->flp);
 722         }
 723 
 724         return (0);
 725 }
 726 
 727 /*
 728  * NLM_CANCEL, NLM_CANCEL_MSG,
 729  * NLM4_CANCEL, NLM4_CANCEL_MSG,
 730  * Client gives up waiting for a blocking lock.
 731  */
 732 void
 733 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *resp,
 734     struct svc_req *sr, nlm_res_cb cb)
 735 {
 736         struct nlm_globals *g;
 737         struct nlm_host *host;
 738         struct netbuf *addr;
 739         struct nlm_vhold *nvp = NULL;
 740         nlm_rpc_t *rpcp = NULL;
 741         char *netid;
 742         char *name;
 743         int error;
 744         struct flock64 fl;
 745 
 746         nlm_copy_netobj(&resp->cookie, &argp->cookie);
 747         netid = svc_getnetid(sr->rq_xprt);
 748         addr = svc_getrpccaller(sr->rq_xprt);
 749         name = argp->alock.caller_name;
 750 
 751         g = zone_getspecific(nlm_zone_key, curzone);
 752         host = nlm_host_findcreate(g, name, netid, addr);
 753         if (host == NULL) {
 754                 resp->stat.stat = nlm4_denied_nolocks;
 755                 return;
 756         }
 757         if (cb != NULL) {
 758                 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
 759                 if (error != 0) {
 760                         resp->stat.stat = nlm4_denied_nolocks;
 761                         return;
 762                 }
 763         }
 764 
 765         DTRACE_PROBE3(start, struct nlm_globals *, g,
 766             struct nlm_host *, host, nlm4_cancargs *, argp);
 767 
 768         if (NLM_IN_GRACE(g)) {
 769                 resp->stat.stat = nlm4_denied_grace_period;
 770                 goto out;
 771         }
 772 
 773         nvp = nlm_fh_to_vhold(host, &argp->alock.fh);
 774         if (nvp == NULL) {
 775                 resp->stat.stat = nlm4_stale_fh;
 776                 goto out;
 777         }
 778 
 779         /* Convert to local form. */
 780         error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
 781             (argp->exclusive) ? F_WRLCK : F_RDLCK);
 782         if (error) {
 783                 resp->stat.stat = nlm4_failed;
 784                 goto out;
 785         }
 786 
 787         error = nlm_slreq_unregister(host, nvp, &fl);
 788         if (error != 0) {
 789                 /*
 790                  * There's no sleeping lock request corresponding
 791                  * to the lock. Then requested sleeping lock
 792                  * doesn't exist.
 793                  */
 794                 resp->stat.stat = nlm4_denied;
 795                 goto out;
 796         }
 797 
 798         fl.l_type = F_UNLCK;
 799         error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl,
 800             F_REMOTELOCK | FREAD | FWRITE,
 801             (u_offset_t)0, NULL, CRED(), NULL);
 802 
 803         resp->stat.stat = (error == 0) ?
 804             nlm4_granted : nlm4_denied;
 805 
 806 out:
 807         /*
 808          * If we have a callback funtion, use that to
 809          * deliver the response via another RPC call.
 810          */
 811         if (cb != NULL && rpcp != NULL)
 812                 NLM_INVOKE_CALLBACK("cancel", rpcp, resp, cb);
 813 
 814         DTRACE_PROBE3(cancel__end, struct nlm_globals *, g,
 815             struct nlm_host *, host, nlm4_res *, resp);
 816 
 817         if (rpcp != NULL)
 818                 nlm_host_rele_rpc(host, rpcp);
 819 
 820         nlm_vhold_release(host, nvp);
 821         nlm_host_release(g, host);
 822 }
 823 
 824 /*
 825  * NLM_UNLOCK, NLM_UNLOCK_MSG,
 826  * NLM4_UNLOCK, NLM4_UNLOCK_MSG,
 827  * Client removes one of their locks.
 828  */
 829 void
 830 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *resp,
 831     struct svc_req *sr, nlm_res_cb cb)
 832 {
 833         struct nlm_globals *g;
 834         struct nlm_host *host;
 835         struct netbuf *addr;
 836         nlm_rpc_t *rpcp = NULL;
 837         vnode_t *vp = NULL;
 838         char *netid;
 839         char *name;
 840         int error;
 841         struct flock64 fl;
 842 
 843         nlm_copy_netobj(&resp->cookie, &argp->cookie);
 844 
 845         netid = svc_getnetid(sr->rq_xprt);
 846         addr = svc_getrpccaller(sr->rq_xprt);
 847         name = argp->alock.caller_name;
 848 
 849         /*
 850          * NLM_UNLOCK operation doesn't have an error code
 851          * denoting that operation failed, so we always
 852          * return nlm4_granted except when the server is
 853          * in a grace period.
 854          */
 855         resp->stat.stat = nlm4_granted;
 856 
 857         g = zone_getspecific(nlm_zone_key, curzone);
 858         host = nlm_host_findcreate(g, name, netid, addr);
 859         if (host == NULL)
 860                 return;
 861 
 862         if (cb != NULL) {
 863                 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
 864                 if (error != 0)
 865                         goto out;
 866         }
 867 
 868         DTRACE_PROBE3(start, struct nlm_globals *, g,
 869             struct nlm_host *, host, nlm4_unlockargs *, argp);
 870 
 871         if (NLM_IN_GRACE(g)) {
 872                 resp->stat.stat = nlm4_denied_grace_period;
 873                 goto out;
 874         }
 875 
 876         vp = nlm_fh_to_vp(&argp->alock.fh);
 877         if (vp == NULL)
 878                 goto out;
 879 
 880         /* Convert to local form. */
 881         error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, F_UNLCK);
 882         if (error)
 883                 goto out;
 884 
 885         /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_UNLCK, &fl, F_REMOTE); */
 886         error = nlm_vop_frlock(vp, F_SETLK, &fl,
 887             F_REMOTELOCK | FREAD | FWRITE,
 888             (u_offset_t)0, NULL, CRED(), NULL);
 889 
 890         DTRACE_PROBE1(unlock__res, int, error);
 891 out:
 892         /*
 893          * If we have a callback funtion, use that to
 894          * deliver the response via another RPC call.
 895          */
 896         if (cb != NULL && rpcp != NULL)
 897                 NLM_INVOKE_CALLBACK("unlock", rpcp, resp, cb);
 898 
 899         DTRACE_PROBE3(unlock__end, struct nlm_globals *, g,
 900             struct nlm_host *, host, nlm4_res *, resp);
 901 
 902         if (vp != NULL)
 903                 VN_RELE(vp);
 904         if (rpcp != NULL)
 905                 nlm_host_rele_rpc(host, rpcp);
 906 
 907         nlm_host_release(g, host);
 908 }
 909 
 910 /*
 911  * NLM_GRANTED, NLM_GRANTED_MSG,
 912  * NLM4_GRANTED, NLM4_GRANTED_MSG,
 913  *
 914  * This service routine is special.  It's the only one that's
 915  * really part of our NLM _client_ support, used by _servers_
 916  * to "call back" when a blocking lock from this NLM client
 917  * is granted by the server.  In this case, we _know_ there is
 918  * already an nlm_host allocated and held by the client code.
 919  * We want to find that nlm_host here.
 920  *
 921  * Over in nlm_call_lock(), the client encoded the sysid for this
 922  * server in the "owner handle" netbuf sent with our lock request.
 923  * We can now use that to find the nlm_host object we used there.
 924  * (NB: The owner handle is opaque to the server.)
 925  */
 926 void
 927 nlm_do_granted(nlm4_testargs *argp, nlm4_res *resp,
 928     struct svc_req *sr, nlm_res_cb cb)
 929 {
 930         struct nlm_globals *g;
 931         struct nlm_owner_handle *oh;
 932         struct nlm_host *host;
 933         nlm_rpc_t *rpcp = NULL;
 934         int error;
 935 
 936         nlm_copy_netobj(&resp->cookie, &argp->cookie);
 937         resp->stat.stat = nlm4_denied;
 938 
 939         g = zone_getspecific(nlm_zone_key, curzone);
 940         oh = (void *) argp->alock.oh.n_bytes;
 941         if (oh == NULL)
 942                 return;
 943 
 944         host = nlm_host_find_by_sysid(g, oh->oh_sysid);
 945         if (host == NULL)
 946                 return;
 947 
 948         if (cb != NULL) {
 949                 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
 950                 if (error != 0)
 951                         goto out;
 952         }
 953 
 954         if (NLM_IN_GRACE(g)) {
 955                 resp->stat.stat = nlm4_denied_grace_period;
 956                 goto out;
 957         }
 958 
 959         error = nlm_slock_grant(g, host, &argp->alock);
 960         if (error == 0)
 961                 resp->stat.stat = nlm4_granted;
 962 
 963 out:
 964         /*
 965          * If we have a callback funtion, use that to
 966          * deliver the response via another RPC call.
 967          */
 968         if (cb != NULL && rpcp != NULL)
 969                 NLM_INVOKE_CALLBACK("do_granted", rpcp, resp, cb);
 970 
 971         if (rpcp != NULL)
 972                 nlm_host_rele_rpc(host, rpcp);
 973 
 974         nlm_host_release(g, host);
 975 }
 976 
 977 /*
 978  * NLM_FREE_ALL, NLM4_FREE_ALL
 979  *
 980  * Destroy all lock state for the calling client.
 981  */
 982 void
 983 nlm_do_free_all(nlm4_notify *argp, void *res, struct svc_req *sr)
 984 {
 985         struct nlm_globals *g;
 986         struct nlm_host_list host_list;
 987         struct nlm_host *hostp;
 988 
 989         TAILQ_INIT(&host_list);
 990         g = zone_getspecific(nlm_zone_key, curzone);
 991 
 992         /* Serialize calls to clean locks. */
 993         mutex_enter(&g->clean_lock);
 994 
 995         /*
 996          * Find all hosts that have the given node name and put them on a
 997          * local list.
 998          */
 999         mutex_enter(&g->lock);
1000         for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
1001             hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
1002                 if (strcasecmp(hostp->nh_name, argp->name) == 0) {
1003                         /*
1004                          * If needed take the host out of the idle list since
1005                          * we are taking a reference.
1006                          */
1007                         if (hostp->nh_flags & NLM_NH_INIDLE) {
1008                                 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1009                                 hostp->nh_flags &= ~NLM_NH_INIDLE;
1010                         }
1011                         hostp->nh_refs++;
1012 
1013                         TAILQ_INSERT_TAIL(&host_list, hostp, nh_link);
1014                 }
1015         }
1016         mutex_exit(&g->lock);
1017 
1018         /* Free locks for all hosts on the local list. */
1019         while (!TAILQ_EMPTY(&host_list)) {
1020                 hostp = TAILQ_FIRST(&host_list);
1021                 TAILQ_REMOVE(&host_list, hostp, nh_link);
1022 
1023                 /*
1024                  * Note that this does not do client-side cleanup.
1025                  * We want to do that ONLY if statd tells us the
1026                  * server has restarted.
1027                  */
1028                 nlm_host_notify_server(hostp, argp->state);
1029                 nlm_host_release(g, hostp);
1030         }
1031 
1032         mutex_exit(&g->clean_lock);
1033 
1034         (void) res;
1035         (void) sr;
1036 }
1037 
1038 static void
1039 nlm_init_shrlock(struct shrlock *shr,
1040     nlm4_share *nshare, struct nlm_host *host)
1041 {
1042 
1043         switch (nshare->access) {
1044         default:
1045         case fsa_NONE:
1046                 shr->s_access = 0;
1047                 break;
1048         case fsa_R:
1049                 shr->s_access = F_RDACC;
1050                 break;
1051         case fsa_W:
1052                 shr->s_access = F_WRACC;
1053                 break;
1054         case fsa_RW:
1055                 shr->s_access = F_RWACC;
1056                 break;
1057         }
1058 
1059         switch (nshare->mode) {
1060         default:
1061         case fsm_DN:
1062                 shr->s_deny = F_NODNY;
1063                 break;
1064         case fsm_DR:
1065                 shr->s_deny = F_RDDNY;
1066                 break;
1067         case fsm_DW:
1068                 shr->s_deny = F_WRDNY;
1069                 break;
1070         case fsm_DRW:
1071                 shr->s_deny = F_RWDNY;
1072                 break;
1073         }
1074 
1075         shr->s_sysid = host->nh_sysid;
1076         shr->s_pid = 0;
1077         shr->s_own_len = nshare->oh.n_len;
1078         shr->s_owner   = nshare->oh.n_bytes;
1079 }
1080 
1081 /*
1082  * NLM_SHARE, NLM4_SHARE
1083  *
1084  * Request a DOS-style share reservation
1085  */
1086 void
1087 nlm_do_share(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr)
1088 {
1089         struct nlm_globals *g;
1090         struct nlm_host *host;
1091         struct netbuf *addr;
1092         struct nlm_vhold *nvp = NULL;
1093         char *netid;
1094         char *name;
1095         int error;
1096         struct shrlock shr;
1097 
1098         nlm_copy_netobj(&resp->cookie, &argp->cookie);
1099 
1100         name = argp->share.caller_name;
1101         netid = svc_getnetid(sr->rq_xprt);
1102         addr = svc_getrpccaller(sr->rq_xprt);
1103 
1104         g = zone_getspecific(nlm_zone_key, curzone);
1105         host = nlm_host_findcreate(g, name, netid, addr);
1106         if (host == NULL) {
1107                 resp->stat = nlm4_denied_nolocks;
1108                 return;
1109         }
1110 
1111         DTRACE_PROBE3(share__start, struct nlm_globals *, g,
1112             struct nlm_host *, host, nlm4_shareargs *, argp);
1113 
1114         if (argp->reclaim == 0 && NLM_IN_GRACE(g)) {
1115                 resp->stat = nlm4_denied_grace_period;
1116                 goto out;
1117         }
1118 
1119         /*
1120          * Get holded vnode when on lock operation.
1121          * Only lock() and share() need vhold objects.
1122          */
1123         nvp = nlm_fh_to_vhold(host, &argp->share.fh);
1124         if (nvp == NULL) {
1125                 resp->stat = nlm4_stale_fh;
1126                 goto out;
1127         }
1128 
1129         /* Convert to local form. */
1130         nlm_init_shrlock(&shr, &argp->share, host);
1131         error = VOP_SHRLOCK(nvp->nv_vp, F_SHARE, &shr,
1132             FREAD | FWRITE, CRED(), NULL);
1133 
1134         if (error == 0) {
1135                 resp->stat = nlm4_granted;
1136                 nlm_host_monitor(g, host, 0);
1137         } else {
1138                 resp->stat = nlm4_denied;
1139         }
1140 
1141 out:
1142         DTRACE_PROBE3(share__end, struct nlm_globals *, g,
1143             struct nlm_host *, host, nlm4_shareres *, resp);
1144 
1145         nlm_vhold_release(host, nvp);
1146         nlm_host_release(g, host);
1147 }
1148 
1149 /*
1150  * NLM_UNSHARE, NLM4_UNSHARE
1151  *
1152  * Release a DOS-style share reservation
1153  */
1154 void
1155 nlm_do_unshare(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr)
1156 {
1157         struct nlm_globals *g;
1158         struct nlm_host *host;
1159         struct netbuf *addr;
1160         vnode_t *vp = NULL;
1161         char *netid;
1162         int error;
1163         struct shrlock shr;
1164 
1165         nlm_copy_netobj(&resp->cookie, &argp->cookie);
1166 
1167         netid = svc_getnetid(sr->rq_xprt);
1168         addr = svc_getrpccaller(sr->rq_xprt);
1169 
1170         g = zone_getspecific(nlm_zone_key, curzone);
1171         host = nlm_host_find(g, netid, addr);
1172         if (host == NULL) {
1173                 resp->stat = nlm4_denied_nolocks;
1174                 return;
1175         }
1176 
1177         DTRACE_PROBE3(unshare__start, struct nlm_globals *, g,
1178             struct nlm_host *, host, nlm4_shareargs *, argp);
1179 
1180         if (NLM_IN_GRACE(g)) {
1181                 resp->stat = nlm4_denied_grace_period;
1182                 goto out;
1183         }
1184 
1185         vp = nlm_fh_to_vp(&argp->share.fh);
1186         if (vp == NULL) {
1187                 resp->stat = nlm4_stale_fh;
1188                 goto out;
1189         }
1190 
1191         /* Convert to local form. */
1192         nlm_init_shrlock(&shr, &argp->share, host);
1193         error = VOP_SHRLOCK(vp, F_UNSHARE, &shr,
1194             FREAD | FWRITE, CRED(), NULL);
1195 
1196         (void) error;
1197         resp->stat = nlm4_granted;
1198 
1199 out:
1200         DTRACE_PROBE3(unshare__end, struct nlm_globals *, g,
1201             struct nlm_host *, host, nlm4_shareres *, resp);
1202 
1203         if (vp != NULL)
1204                 VN_RELE(vp);
1205 
1206         nlm_host_release(g, host);
1207 }
1208 
1209 /*
1210  * NLM wrapper to VOP_FRLOCK that checks the validity of the lock before
1211  * invoking the vnode operation.
1212  */
1213 static int
1214 nlm_vop_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
1215         struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct)
1216 {
1217         if (bfp->l_len != 0 && bfp->l_start + (bfp->l_len - 1)
1218             < bfp->l_start) {
1219                 return (EOVERFLOW);
1220         }
1221 
1222         return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1223 }