1 /*
2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3 * Authors: Doug Rabson <dfr@rabson.org>
4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
30 * Copyright (c) 2012 by Delphix. All rights reserved.
31 */
32
33 /*
34 * NFS Lock Manager service functions (nlm_do_...)
35 * Called from nlm_rpc_svc.c wrappers.
36 *
37 * Source code derived from FreeBSD nlm_prot_impl.c
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/thread.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/mount.h>
46 #include <sys/priv.h>
47 #include <sys/proc.h>
48 #include <sys/share.h>
49 #include <sys/socket.h>
50 #include <sys/syscall.h>
51 #include <sys/syslog.h>
52 #include <sys/systm.h>
53 #include <sys/taskq.h>
54 #include <sys/unistd.h>
55 #include <sys/vnode.h>
56 #include <sys/vfs.h>
57 #include <sys/queue.h>
58 #include <sys/sdt.h>
59 #include <netinet/in.h>
60
61 #include <rpc/rpc.h>
62 #include <rpc/xdr.h>
63 #include <rpc/pmap_prot.h>
64 #include <rpc/pmap_clnt.h>
65 #include <rpc/rpcb_prot.h>
66
67 #include <rpcsvc/nlm_prot.h>
68 #include <rpcsvc/sm_inter.h>
69
70 #include <nfs/nfs.h>
71 #include <nfs/nfs_clnt.h>
72 #include <nfs/export.h>
73 #include <nfs/rnode.h>
74
75 #include "nlm_impl.h"
76
77 #define NLM_IN_GRACE(g) (ddi_get_lbolt() < (g)->grace_threshold)
78
79 struct nlm_block_cb_data {
80 struct nlm_host *hostp;
81 struct nlm_vhold *nvp;
82 struct flock64 *flp;
83 };
84
85 /*
86 * Invoke an asyncronous RPC callbeck
87 * (used when NLM server needs to reply to MSG NLM procedure).
88 */
89 #define NLM_INVOKE_CALLBACK(descr, rpcp, resp, callb) \
90 do { \
91 enum clnt_stat _stat; \
92 \
93 _stat = (*(callb))(resp, NULL, (rpcp)->nr_handle); \
94 if (_stat != RPC_SUCCESS && _stat != RPC_TIMEDOUT) { \
95 struct rpc_err _err; \
96 \
97 CLNT_GETERR((rpcp)->nr_handle, &_err); \
98 NLM_ERR("NLM: %s callback failed: " \
99 "stat %d, err %d\n", descr, _stat, \
100 _err.re_errno); \
101 } \
102 \
103 _NOTE(CONSTCOND) } while (0)
104
105 static void nlm_block(
106 nlm4_lockargs *lockargs,
107 struct nlm_host *host,
108 struct nlm_vhold *nvp,
109 nlm_rpc_t *rpcp,
110 struct flock64 *fl,
111 nlm_testargs_cb grant_cb);
112
113 static vnode_t *nlm_fh_to_vp(struct netobj *);
114 static struct nlm_vhold *nlm_fh_to_vhold(struct nlm_host *, struct netobj *);
115 static void nlm_init_shrlock(struct shrlock *, nlm4_share *, struct nlm_host *);
116 static callb_cpr_t *nlm_block_callback(flk_cb_when_t, void *);
117 static int nlm_vop_frlock(vnode_t *, int, flock64_t *, int, offset_t,
118 struct flk_callback *, cred_t *, caller_context_t *);
119
120 /*
121 * Convert a lock from network to local form, and
122 * check for valid range (no overflow).
123 */
124 static int
125 nlm_init_flock(struct flock64 *fl, struct nlm4_lock *nl,
126 struct nlm_host *host, rpcvers_t vers, short type)
127 {
128 uint64_t off, len;
129
130 bzero(fl, sizeof (*fl));
131 off = nl->l_offset;
132 len = nl->l_len;
133
134 if (vers < NLM4_VERS) {
135 /*
136 * Make sure range is valid for 32-bit client.
137 * Also allow len == ~0 to mean lock to EOF,
138 * which is supposed to be l_len == 0.
139 */
140 if (len == MAX_UOFF32)
141 len = 0;
142 if (off > MAX_UOFF32 || len > MAX_UOFF32)
143 return (EINVAL);
144 if (off + len > MAX_UOFF32 + 1)
145 return (EINVAL);
146 } else {
147 /*
148 * Check range for 64-bit client (no overflow).
149 * Again allow len == ~0 to mean lock to EOF.
150 */
151 if (len == MAX_U_OFFSET_T)
152 len = 0;
153 if (len != 0 && off + (len - 1) < off)
154 return (EINVAL);
155 }
156
157 fl->l_type = type;
158 fl->l_whence = SEEK_SET;
159 fl->l_start = off;
160 fl->l_len = len;
161 fl->l_sysid = host->nh_sysid;
162 fl->l_pid = nl->svid;
163 /* l_pad */
164
165 return (0);
166 }
167
168 /*
169 * Gets vnode from client's filehandle
170 * NOTE: Holds vnode, it _must_ be explicitly
171 * released by VN_RELE().
172 */
173 static vnode_t *
174 nlm_fh_to_vp(struct netobj *fh)
175 {
176 fhandle_t *fhp;
177
178 /*
179 * Get a vnode pointer for the given NFS file handle.
180 * Note that it could be an NFSv2 for NFSv3 handle,
181 * which means the size might vary. (don't copy)
182 */
183 if (fh->n_len < sizeof (*fhp))
184 return (NULL);
185
186 /* We know this is aligned (kmem_alloc) */
187 /* LINTED E_BAD_PTR_CAST_ALIGN */
188 fhp = (fhandle_t *)fh->n_bytes;
189 return (lm_fhtovp(fhp));
190 }
191
192 /*
193 * Get vhold from client's filehandle, but in contrast to
194 * The function tries to check some access rights as well.
195 *
196 * NOTE: vhold object _must_ be explicitly released by
197 * nlm_vhold_release().
198 */
199 static struct nlm_vhold *
200 nlm_fh_to_vhold(struct nlm_host *hostp, struct netobj *fh)
201 {
202 vnode_t *vp;
203 struct nlm_vhold *nvp;
204
205 vp = nlm_fh_to_vp(fh);
206 if (vp == NULL)
207 return (NULL);
208
209
210 nvp = nlm_vhold_get(hostp, vp);
211
212 /*
213 * Both nlm_fh_to_vp() and nlm_vhold_get()
214 * do VN_HOLD(), so we need to drop one
215 * reference on vnode.
216 */
217 VN_RELE(vp);
218 return (nvp);
219 }
220
221 /* ******************************************************************* */
222
223 /*
224 * NLM implementation details, called from the RPC svc code.
225 */
226
227 /*
228 * Call-back from NFS statd, used to notify that one of our
229 * hosts had a status change. The host can be either an
230 * NFS client, NFS server or both.
231 * According to NSM protocol description, the state is a
232 * number that is increases monotonically each time the
233 * state of host changes. An even number indicates that
234 * the host is down, while an odd number indicates that
235 * the host is up.
236 *
237 * Here we ignore this even/odd difference of status number
238 * reported by the NSM, we launch notification handlers
239 * every time the state is changed. The reason we why do so
240 * is that client and server can talk to each other using
241 * connectionless transport and it's easy to lose packet
242 * containing NSM notification with status number update.
243 *
244 * In nlm_host_monitor(), we put the sysid in the private data
245 * that statd carries in this callback, so we can easliy find
246 * the host this call applies to.
247 */
248 /* ARGSUSED */
249 void
250 nlm_do_notify1(nlm_sm_status *argp, void *res, struct svc_req *sr)
251 {
252 struct nlm_globals *g;
253 struct nlm_host *host;
254 uint16_t sysid;
255
256 g = zone_getspecific(nlm_zone_key, curzone);
257 bcopy(&argp->priv, &sysid, sizeof (sysid));
258
259 DTRACE_PROBE2(nsm__notify, uint16_t, sysid,
260 int, argp->state);
261
262 host = nlm_host_find_by_sysid(g, (sysid_t)sysid);
263 if (host == NULL)
264 return;
265
266 nlm_host_notify_server(host, argp->state);
267 nlm_host_notify_client(host, argp->state);
268 nlm_host_release(g, host);
269 }
270
271 /*
272 * Another available call-back for NFS statd.
273 * Not currently used.
274 */
275 /* ARGSUSED */
276 void
277 nlm_do_notify2(nlm_sm_status *argp, void *res, struct svc_req *sr)
278 {
279 ASSERT(0);
280 }
281
282
283 /*
284 * NLM_TEST, NLM_TEST_MSG,
285 * NLM4_TEST, NLM4_TEST_MSG,
286 * Client inquiry about locks, non-blocking.
287 */
288 void
289 nlm_do_test(nlm4_testargs *argp, nlm4_testres *resp,
290 struct svc_req *sr, nlm_testres_cb cb)
291 {
292 struct nlm_globals *g;
293 struct nlm_host *host;
294 struct nlm4_holder *lh;
295 struct nlm_owner_handle *oh;
296 nlm_rpc_t *rpcp = NULL;
297 vnode_t *vp = NULL;
298 struct netbuf *addr;
299 char *netid;
300 char *name;
301 int error;
302 struct flock64 fl;
303
304 nlm_copy_netobj(&resp->cookie, &argp->cookie);
305
306 name = argp->alock.caller_name;
307 netid = svc_getnetid(sr->rq_xprt);
308 addr = svc_getrpccaller(sr->rq_xprt);
309
310 g = zone_getspecific(nlm_zone_key, curzone);
311 host = nlm_host_findcreate(g, name, netid, addr);
312 if (host == NULL) {
313 resp->stat.stat = nlm4_denied_nolocks;
314 return;
315 }
316 if (cb != NULL) {
317 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
318 if (error != 0) {
319 resp->stat.stat = nlm4_denied_nolocks;
320 goto out;
321 }
322 }
323
324 vp = nlm_fh_to_vp(&argp->alock.fh);
325 if (vp == NULL) {
326 resp->stat.stat = nlm4_stale_fh;
327 goto out;
328 }
329
330 if (NLM_IN_GRACE(g)) {
331 resp->stat.stat = nlm4_denied_grace_period;
332 goto out;
333 }
334
335 /* Convert to local form. */
336 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
337 (argp->exclusive) ? F_WRLCK : F_RDLCK);
338 if (error) {
339 resp->stat.stat = nlm4_failed;
340 goto out;
341 }
342
343 /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_GETLK, &fl, F_REMOTE); */
344 error = nlm_vop_frlock(vp, F_GETLK, &fl,
345 F_REMOTELOCK | FREAD | FWRITE,
346 (u_offset_t)0, NULL, CRED(), NULL);
347 if (error) {
348 resp->stat.stat = nlm4_failed;
349 goto out;
350 }
351
352 if (fl.l_type == F_UNLCK) {
353 resp->stat.stat = nlm4_granted;
354 goto out;
355 }
356 resp->stat.stat = nlm4_denied;
357
358 /*
359 * This lock "test" fails due to a conflicting lock.
360 *
361 * If this is a v1 client, make sure the conflicting
362 * lock range we report can be expressed with 32-bit
363 * offsets. The lock range requested was expressed
364 * as 32-bit offset and length, so at least part of
365 * the conflicting lock should lie below MAX_UOFF32.
366 * If the conflicting lock extends past that, we'll
367 * trim the range to end at MAX_UOFF32 so this lock
368 * can be represented in a 32-bit response. Check
369 * the start also (paranoid, but a low cost check).
370 */
371 if (sr->rq_vers < NLM4_VERS) {
372 uint64 maxlen;
373 if (fl.l_start > MAX_UOFF32)
374 fl.l_start = MAX_UOFF32;
375 maxlen = MAX_UOFF32 + 1 - fl.l_start;
376 if (fl.l_len > maxlen)
377 fl.l_len = maxlen;
378 }
379
380 /*
381 * Build the nlm4_holder result structure.
382 *
383 * Note that lh->oh is freed via xdr_free,
384 * xdr_nlm4_holder, xdr_netobj, xdr_bytes.
385 */
386 oh = kmem_zalloc(sizeof (*oh), KM_SLEEP);
387 oh->oh_sysid = (sysid_t)fl.l_sysid;
388 lh = &resp->stat.nlm4_testrply_u.holder;
389 lh->exclusive = (fl.l_type == F_WRLCK);
390 lh->svid = fl.l_pid;
391 lh->oh.n_len = sizeof (*oh);
392 lh->oh.n_bytes = (void *)oh;
393 lh->l_offset = fl.l_start;
394 lh->l_len = fl.l_len;
395
396 out:
397 /*
398 * If we have a callback funtion, use that to
399 * deliver the response via another RPC call.
400 */
401 if (cb != NULL && rpcp != NULL)
402 NLM_INVOKE_CALLBACK("test", rpcp, resp, cb);
403
404 if (vp != NULL)
405 VN_RELE(vp);
406 if (rpcp != NULL)
407 nlm_host_rele_rpc(host, rpcp);
408
409 nlm_host_release(g, host);
410 }
411
412 /*
413 * NLM_LOCK, NLM_LOCK_MSG, NLM_NM_LOCK
414 * NLM4_LOCK, NLM4_LOCK_MSG, NLM4_NM_LOCK
415 *
416 * Client request to set a lock, possibly blocking.
417 *
418 * If the lock needs to block, we return status blocked to
419 * this RPC call, and then later call back the client with
420 * a "granted" callback. Tricky aspects of this include:
421 * sending a reply before this function returns, and then
422 * borrowing this thread from the RPC service pool for the
423 * wait on the lock and doing the later granted callback.
424 *
425 * We also have to keep a list of locks (pending + granted)
426 * both to handle retransmitted requests, and to keep the
427 * vnodes for those locks active.
428 */
429 void
430 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr,
431 nlm_reply_cb reply_cb, nlm_res_cb res_cb, nlm_testargs_cb grant_cb)
432 {
433 struct nlm_globals *g;
434 struct flock64 fl;
435 struct nlm_host *host = NULL;
436 struct netbuf *addr;
437 struct nlm_vhold *nvp = NULL;
438 nlm_rpc_t *rpcp = NULL;
439 char *netid;
440 char *name;
441 int error, flags;
442 bool_t do_blocking = FALSE;
443 bool_t do_mon_req = FALSE;
444 enum nlm4_stats status;
445
446 nlm_copy_netobj(&resp->cookie, &argp->cookie);
447
448 name = argp->alock.caller_name;
449 netid = svc_getnetid(sr->rq_xprt);
450 addr = svc_getrpccaller(sr->rq_xprt);
451
452 g = zone_getspecific(nlm_zone_key, curzone);
453 host = nlm_host_findcreate(g, name, netid, addr);
454 if (host == NULL) {
455 DTRACE_PROBE4(no__host, struct nlm_globals *, g,
456 char *, name, char *, netid, struct netbuf *, addr);
457 status = nlm4_denied_nolocks;
458 goto doreply;
459 }
460
461 DTRACE_PROBE3(start, struct nlm_globals *, g,
462 struct nlm_host *, host, nlm4_lockargs *, argp);
463
464 /*
465 * If we may need to do _msg_ call needing an RPC
466 * callback, get the RPC client handle now,
467 * so we know if we can bind to the NLM service on
468 * this client.
469 *
470 * Note: host object carries transport type.
471 * One client using multiple transports gets
472 * separate sysids for each of its transports.
473 */
474 if (res_cb != NULL || (grant_cb != NULL && argp->block == TRUE)) {
475 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
476 if (error != 0) {
477 status = nlm4_denied_nolocks;
478 goto doreply;
479 }
480 }
481
482 /*
483 * During the "grace period", only allow reclaim.
484 */
485 if (argp->reclaim == 0 && NLM_IN_GRACE(g)) {
486 status = nlm4_denied_grace_period;
487 goto doreply;
488 }
489
490 /*
491 * Check whether we missed host shutdown event
492 */
493 if (nlm_host_get_state(host) != argp->state)
494 nlm_host_notify_server(host, argp->state);
495
496 /*
497 * Get a hold on the vnode for a lock operation.
498 * Only lock() and share() need vhold objects.
499 */
500 nvp = nlm_fh_to_vhold(host, &argp->alock.fh);
501 if (nvp == NULL) {
502 status = nlm4_stale_fh;
503 goto doreply;
504 }
505
506 /* Convert to local form. */
507 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
508 (argp->exclusive) ? F_WRLCK : F_RDLCK);
509 if (error) {
510 status = nlm4_failed;
511 goto doreply;
512 }
513
514 /*
515 * Try to lock non-blocking first. If we succeed
516 * getting the lock, we can reply with the granted
517 * status directly and avoid the complications of
518 * making the "granted" RPC callback later.
519 *
520 * This also let's us find out now about some
521 * possible errors like EROFS, etc.
522 */
523 flags = F_REMOTELOCK | FREAD | FWRITE;
524 error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl, flags,
525 (u_offset_t)0, NULL, CRED(), NULL);
526
527 DTRACE_PROBE3(setlk__res, struct flock64 *, &fl,
528 int, flags, int, error);
529
530 switch (error) {
531 case 0:
532 /* Got it without waiting! */
533 status = nlm4_granted;
534 do_mon_req = TRUE;
535 break;
536
537 /* EINPROGRESS too? */
538 case EAGAIN:
539 /* We did not get the lock. Should we block? */
540 if (argp->block == FALSE || grant_cb == NULL) {
541 status = nlm4_denied;
542 break;
543 }
544 /*
545 * Should block. Try to reserve this thread
546 * so we can use it to wait for the lock and
547 * later send the granted message. If this
548 * reservation fails, say "no resources".
549 */
550 if (!svc_reserve_thread(sr->rq_xprt)) {
551 status = nlm4_denied_nolocks;
552 break;
553 }
554 /*
555 * OK, can detach this thread, so this call
556 * will block below (after we reply).
557 */
558 status = nlm4_blocked;
559 do_blocking = TRUE;
560 do_mon_req = TRUE;
561 break;
562
563 case ENOLCK:
564 /* Failed for lack of resources. */
565 status = nlm4_denied_nolocks;
566 break;
567
568 case EROFS:
569 /* read-only file system */
570 status = nlm4_rofs;
571 break;
572
573 case EFBIG:
574 /* file too big */
575 status = nlm4_fbig;
576 break;
577
578 case EDEADLK:
579 /* dead lock condition */
580 status = nlm4_deadlck;
581 break;
582
583 default:
584 status = nlm4_denied;
585 break;
586 }
587
588 doreply:
589 resp->stat.stat = status;
590
591 /*
592 * We get one of two function pointers; one for a
593 * normal RPC reply, and another for doing an RPC
594 * "callback" _res reply for a _msg function.
595 * Use either of those to send the reply now.
596 *
597 * If sending this reply fails, just leave the
598 * lock in the list for retransmitted requests.
599 * Cleanup is via unlock or host rele (statmon).
600 */
601 if (reply_cb != NULL) {
602 /* i.e. nlm_lock_1_reply */
603 if (!(*reply_cb)(sr->rq_xprt, resp))
604 svcerr_systemerr(sr->rq_xprt);
605 }
606 if (res_cb != NULL && rpcp != NULL)
607 NLM_INVOKE_CALLBACK("lock", rpcp, resp, res_cb);
608
609 /*
610 * The reply has been sent to the client.
611 * Start monitoring this client (maybe).
612 *
613 * Note that the non-monitored (NM) calls pass grant_cb=NULL
614 * indicating that the client doesn't support RPC callbacks.
615 * No monitoring for these (lame) clients.
616 */
617 if (do_mon_req && grant_cb != NULL)
618 nlm_host_monitor(g, host, argp->state);
619
620 if (do_blocking) {
621 /*
622 * We need to block on this lock, and when that
623 * completes, do the granted RPC call. Note that
624 * we "reserved" this thread above, so we can now
625 * "detach" it from the RPC SVC pool, allowing it
626 * to block indefinitely if needed.
627 */
628 ASSERT(rpcp != NULL);
629 (void) svc_detach_thread(sr->rq_xprt);
630 nlm_block(argp, host, nvp, rpcp, &fl, grant_cb);
631 }
632
633 DTRACE_PROBE3(lock__end, struct nlm_globals *, g,
634 struct nlm_host *, host, nlm4_res *, resp);
635
636 if (rpcp != NULL)
637 nlm_host_rele_rpc(host, rpcp);
638
639 nlm_vhold_release(host, nvp);
640 nlm_host_release(g, host);
641 }
642
643 /*
644 * Helper for nlm_do_lock(), partly for observability,
645 * (we'll see a call blocked in this function) and
646 * because nlm_do_lock() was getting quite long.
647 */
648 static void
649 nlm_block(nlm4_lockargs *lockargs,
650 struct nlm_host *host,
651 struct nlm_vhold *nvp,
652 nlm_rpc_t *rpcp,
653 struct flock64 *flp,
654 nlm_testargs_cb grant_cb)
655 {
656 nlm4_testargs args;
657 int error;
658 flk_callback_t flk_cb;
659 struct nlm_block_cb_data cb_data;
660
661 /*
662 * Keep a list of blocked locks on nh_pending, and use it
663 * to cancel these threads in nlm_destroy_client_pending.
664 *
665 * Check to see if this lock is already in the list
666 * and if not, add an entry for it. Allocate first,
667 * then if we don't insert, free the new one.
668 * Caller already has vp held.
669 */
670
671 error = nlm_slreq_register(host, nvp, flp);
672 if (error != 0) {
673 /*
674 * Sleeping lock request with given fl is already
675 * registered by someone else. This means that
676 * some other thread is handling the request, let
677 * him to do its work.
678 */
679 ASSERT(error == EEXIST);
680 return;
681 }
682
683 cb_data.hostp = host;
684 cb_data.nvp = nvp;
685 cb_data.flp = flp;
686 flk_init_callback(&flk_cb, nlm_block_callback, &cb_data);
687
688 /* BSD: VOP_ADVLOCK(vp, NULL, F_SETLK, fl, F_REMOTE); */
689 error = nlm_vop_frlock(nvp->nv_vp, F_SETLKW, flp,
690 F_REMOTELOCK | FREAD | FWRITE,
691 (u_offset_t)0, &flk_cb, CRED(), NULL);
692
693 if (error != 0) {
694 /*
695 * We failed getting the lock, but have no way to
696 * tell the client about that. Let 'em time out.
697 */
698 (void) nlm_slreq_unregister(host, nvp, flp);
699 return;
700 }
701
702 /*
703 * Do the "granted" call-back to the client.
704 */
705 args.cookie = lockargs->cookie;
706 args.exclusive = lockargs->exclusive;
707 args.alock = lockargs->alock;
708
709 NLM_INVOKE_CALLBACK("grant", rpcp, &args, grant_cb);
710 }
711
712 /*
713 * The function that is used as flk callback when NLM server
714 * sets new sleeping lock. The function unregisters NLM
715 * sleeping lock request (nlm_slreq) associated with the
716 * sleeping lock _before_ lock becomes active. It prevents
717 * potential race condition between nlm_block() and
718 * nlm_do_cancel().
719 */
720 static callb_cpr_t *
721 nlm_block_callback(flk_cb_when_t when, void *data)
722 {
723 struct nlm_block_cb_data *cb_data;
724
725 cb_data = (struct nlm_block_cb_data *)data;
726 if (when == FLK_AFTER_SLEEP) {
727 (void) nlm_slreq_unregister(cb_data->hostp,
728 cb_data->nvp, cb_data->flp);
729 }
730
731 return (0);
732 }
733
734 /*
735 * NLM_CANCEL, NLM_CANCEL_MSG,
736 * NLM4_CANCEL, NLM4_CANCEL_MSG,
737 * Client gives up waiting for a blocking lock.
738 */
739 void
740 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *resp,
741 struct svc_req *sr, nlm_res_cb cb)
742 {
743 struct nlm_globals *g;
744 struct nlm_host *host;
745 struct netbuf *addr;
746 struct nlm_vhold *nvp = NULL;
747 nlm_rpc_t *rpcp = NULL;
748 char *netid;
749 char *name;
750 int error;
751 struct flock64 fl;
752
753 nlm_copy_netobj(&resp->cookie, &argp->cookie);
754 netid = svc_getnetid(sr->rq_xprt);
755 addr = svc_getrpccaller(sr->rq_xprt);
756 name = argp->alock.caller_name;
757
758 g = zone_getspecific(nlm_zone_key, curzone);
759 host = nlm_host_findcreate(g, name, netid, addr);
760 if (host == NULL) {
761 resp->stat.stat = nlm4_denied_nolocks;
762 return;
763 }
764 if (cb != NULL) {
765 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
766 if (error != 0) {
767 resp->stat.stat = nlm4_denied_nolocks;
768 return;
769 }
770 }
771
772 DTRACE_PROBE3(start, struct nlm_globals *, g,
773 struct nlm_host *, host, nlm4_cancargs *, argp);
774
775 if (NLM_IN_GRACE(g)) {
776 resp->stat.stat = nlm4_denied_grace_period;
777 goto out;
778 }
779
780 nvp = nlm_fh_to_vhold(host, &argp->alock.fh);
781 if (nvp == NULL) {
782 resp->stat.stat = nlm4_stale_fh;
783 goto out;
784 }
785
786 /* Convert to local form. */
787 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers,
788 (argp->exclusive) ? F_WRLCK : F_RDLCK);
789 if (error) {
790 resp->stat.stat = nlm4_failed;
791 goto out;
792 }
793
794 error = nlm_slreq_unregister(host, nvp, &fl);
795 if (error != 0) {
796 /*
797 * There's no sleeping lock request corresponding
798 * to the lock. Then requested sleeping lock
799 * doesn't exist.
800 */
801 resp->stat.stat = nlm4_denied;
802 goto out;
803 }
804
805 fl.l_type = F_UNLCK;
806 error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl,
807 F_REMOTELOCK | FREAD | FWRITE,
808 (u_offset_t)0, NULL, CRED(), NULL);
809
810 resp->stat.stat = (error == 0) ?
811 nlm4_granted : nlm4_denied;
812
813 out:
814 /*
815 * If we have a callback funtion, use that to
816 * deliver the response via another RPC call.
817 */
818 if (cb != NULL && rpcp != NULL)
819 NLM_INVOKE_CALLBACK("cancel", rpcp, resp, cb);
820
821 DTRACE_PROBE3(cancel__end, struct nlm_globals *, g,
822 struct nlm_host *, host, nlm4_res *, resp);
823
824 if (rpcp != NULL)
825 nlm_host_rele_rpc(host, rpcp);
826
827 nlm_vhold_release(host, nvp);
828 nlm_host_release(g, host);
829 }
830
831 /*
832 * NLM_UNLOCK, NLM_UNLOCK_MSG,
833 * NLM4_UNLOCK, NLM4_UNLOCK_MSG,
834 * Client removes one of their locks.
835 */
836 void
837 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *resp,
838 struct svc_req *sr, nlm_res_cb cb)
839 {
840 struct nlm_globals *g;
841 struct nlm_host *host;
842 struct netbuf *addr;
843 nlm_rpc_t *rpcp = NULL;
844 vnode_t *vp = NULL;
845 char *netid;
846 char *name;
847 int error;
848 struct flock64 fl;
849
850 nlm_copy_netobj(&resp->cookie, &argp->cookie);
851
852 netid = svc_getnetid(sr->rq_xprt);
853 addr = svc_getrpccaller(sr->rq_xprt);
854 name = argp->alock.caller_name;
855
856 /*
857 * NLM_UNLOCK operation doesn't have an error code
858 * denoting that operation failed, so we always
859 * return nlm4_granted except when the server is
860 * in a grace period.
861 */
862 resp->stat.stat = nlm4_granted;
863
864 g = zone_getspecific(nlm_zone_key, curzone);
865 host = nlm_host_findcreate(g, name, netid, addr);
866 if (host == NULL)
867 return;
868
869 if (cb != NULL) {
870 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
871 if (error != 0)
872 goto out;
873 }
874
875 DTRACE_PROBE3(start, struct nlm_globals *, g,
876 struct nlm_host *, host, nlm4_unlockargs *, argp);
877
878 if (NLM_IN_GRACE(g)) {
879 resp->stat.stat = nlm4_denied_grace_period;
880 goto out;
881 }
882
883 vp = nlm_fh_to_vp(&argp->alock.fh);
884 if (vp == NULL)
885 goto out;
886
887 /* Convert to local form. */
888 error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, F_UNLCK);
889 if (error)
890 goto out;
891
892 /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_UNLCK, &fl, F_REMOTE); */
893 error = nlm_vop_frlock(vp, F_SETLK, &fl,
894 F_REMOTELOCK | FREAD | FWRITE,
895 (u_offset_t)0, NULL, CRED(), NULL);
896
897 DTRACE_PROBE1(unlock__res, int, error);
898 out:
899 /*
900 * If we have a callback funtion, use that to
901 * deliver the response via another RPC call.
902 */
903 if (cb != NULL && rpcp != NULL)
904 NLM_INVOKE_CALLBACK("unlock", rpcp, resp, cb);
905
906 DTRACE_PROBE3(unlock__end, struct nlm_globals *, g,
907 struct nlm_host *, host, nlm4_res *, resp);
908
909 if (vp != NULL)
910 VN_RELE(vp);
911 if (rpcp != NULL)
912 nlm_host_rele_rpc(host, rpcp);
913
914 nlm_host_release(g, host);
915 }
916
917 /*
918 * NLM_GRANTED, NLM_GRANTED_MSG,
919 * NLM4_GRANTED, NLM4_GRANTED_MSG,
920 *
921 * This service routine is special. It's the only one that's
922 * really part of our NLM _client_ support, used by _servers_
923 * to "call back" when a blocking lock from this NLM client
924 * is granted by the server. In this case, we _know_ there is
925 * already an nlm_host allocated and held by the client code.
926 * We want to find that nlm_host here.
927 *
928 * Over in nlm_call_lock(), the client encoded the sysid for this
929 * server in the "owner handle" netbuf sent with our lock request.
930 * We can now use that to find the nlm_host object we used there.
931 * (NB: The owner handle is opaque to the server.)
932 */
933 void
934 nlm_do_granted(nlm4_testargs *argp, nlm4_res *resp,
935 struct svc_req *sr, nlm_res_cb cb)
936 {
937 struct nlm_globals *g;
938 struct nlm_owner_handle *oh;
939 struct nlm_host *host;
940 nlm_rpc_t *rpcp = NULL;
941 int error;
942
943 nlm_copy_netobj(&resp->cookie, &argp->cookie);
944 resp->stat.stat = nlm4_denied;
945
946 g = zone_getspecific(nlm_zone_key, curzone);
947 oh = (void *) argp->alock.oh.n_bytes;
948 if (oh == NULL)
949 return;
950
951 host = nlm_host_find_by_sysid(g, oh->oh_sysid);
952 if (host == NULL)
953 return;
954
955 if (cb != NULL) {
956 error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
957 if (error != 0)
958 goto out;
959 }
960
961 if (NLM_IN_GRACE(g)) {
962 resp->stat.stat = nlm4_denied_grace_period;
963 goto out;
964 }
965
966 error = nlm_slock_grant(g, host, &argp->alock);
967 if (error == 0)
968 resp->stat.stat = nlm4_granted;
969
970 out:
971 /*
972 * If we have a callback funtion, use that to
973 * deliver the response via another RPC call.
974 */
975 if (cb != NULL && rpcp != NULL)
976 NLM_INVOKE_CALLBACK("do_granted", rpcp, resp, cb);
977
978 if (rpcp != NULL)
979 nlm_host_rele_rpc(host, rpcp);
980
981 nlm_host_release(g, host);
982 }
983
984 /*
985 * NLM_FREE_ALL, NLM4_FREE_ALL
986 *
987 * Destroy all lock state for the calling client.
988 */
989 void
990 nlm_do_free_all(nlm4_notify *argp, void *res, struct svc_req *sr)
991 {
992 struct nlm_globals *g;
993 struct nlm_host_list host_list;
994 struct nlm_host *hostp;
995
996 TAILQ_INIT(&host_list);
997 g = zone_getspecific(nlm_zone_key, curzone);
998
999 /* Serialize calls to clean locks. */
1000 mutex_enter(&g->clean_lock);
1001
1002 /*
1003 * Find all hosts that have the given node name and put them on a
1004 * local list.
1005 */
1006 mutex_enter(&g->lock);
1007 for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
1008 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
1009 if (strcasecmp(hostp->nh_name, argp->name) == 0) {
1010 /*
1011 * If needed take the host out of the idle list since
1012 * we are taking a reference.
1013 */
1014 if (hostp->nh_flags & NLM_NH_INIDLE) {
1015 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1016 hostp->nh_flags &= ~NLM_NH_INIDLE;
1017 }
1018 hostp->nh_refs++;
1019
1020 TAILQ_INSERT_TAIL(&host_list, hostp, nh_link);
1021 }
1022 }
1023 mutex_exit(&g->lock);
1024
1025 /* Free locks for all hosts on the local list. */
1026 while (!TAILQ_EMPTY(&host_list)) {
1027 hostp = TAILQ_FIRST(&host_list);
1028 TAILQ_REMOVE(&host_list, hostp, nh_link);
1029
1030 /*
1031 * Note that this does not do client-side cleanup.
1032 * We want to do that ONLY if statd tells us the
1033 * server has restarted.
1034 */
1035 nlm_host_notify_server(hostp, argp->state);
1036 nlm_host_release(g, hostp);
1037 }
1038
1039 mutex_exit(&g->clean_lock);
1040
1041 (void) res;
1042 (void) sr;
1043 }
1044
1045 static void
1046 nlm_init_shrlock(struct shrlock *shr,
1047 nlm4_share *nshare, struct nlm_host *host)
1048 {
1049
1050 switch (nshare->access) {
1051 default:
1052 case fsa_NONE:
1053 shr->s_access = 0;
1054 break;
1055 case fsa_R:
1056 shr->s_access = F_RDACC;
1057 break;
1058 case fsa_W:
1059 shr->s_access = F_WRACC;
1060 break;
1061 case fsa_RW:
1062 shr->s_access = F_RWACC;
1063 break;
1064 }
1065
1066 switch (nshare->mode) {
1067 default:
1068 case fsm_DN:
1069 shr->s_deny = F_NODNY;
1070 break;
1071 case fsm_DR:
1072 shr->s_deny = F_RDDNY;
1073 break;
1074 case fsm_DW:
1075 shr->s_deny = F_WRDNY;
1076 break;
1077 case fsm_DRW:
1078 shr->s_deny = F_RWDNY;
1079 break;
1080 }
1081
1082 shr->s_sysid = host->nh_sysid;
1083 shr->s_pid = 0;
1084 shr->s_own_len = nshare->oh.n_len;
1085 shr->s_owner = nshare->oh.n_bytes;
1086 }
1087
1088 /*
1089 * NLM_SHARE, NLM4_SHARE
1090 *
1091 * Request a DOS-style share reservation
1092 */
1093 void
1094 nlm_do_share(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr)
1095 {
1096 struct nlm_globals *g;
1097 struct nlm_host *host;
1098 struct netbuf *addr;
1099 struct nlm_vhold *nvp = NULL;
1100 char *netid;
1101 char *name;
1102 int error;
1103 struct shrlock shr;
1104
1105 nlm_copy_netobj(&resp->cookie, &argp->cookie);
1106
1107 name = argp->share.caller_name;
1108 netid = svc_getnetid(sr->rq_xprt);
1109 addr = svc_getrpccaller(sr->rq_xprt);
1110
1111 g = zone_getspecific(nlm_zone_key, curzone);
1112 host = nlm_host_findcreate(g, name, netid, addr);
1113 if (host == NULL) {
1114 resp->stat = nlm4_denied_nolocks;
1115 return;
1116 }
1117
1118 DTRACE_PROBE3(share__start, struct nlm_globals *, g,
1119 struct nlm_host *, host, nlm4_shareargs *, argp);
1120
1121 if (argp->reclaim == 0 && NLM_IN_GRACE(g)) {
1122 resp->stat = nlm4_denied_grace_period;
1123 goto out;
1124 }
1125
1126 /*
1127 * Get holded vnode when on lock operation.
1128 * Only lock() and share() need vhold objects.
1129 */
1130 nvp = nlm_fh_to_vhold(host, &argp->share.fh);
1131 if (nvp == NULL) {
1132 resp->stat = nlm4_stale_fh;
1133 goto out;
1134 }
1135
1136 /* Convert to local form. */
1137 nlm_init_shrlock(&shr, &argp->share, host);
1138 error = VOP_SHRLOCK(nvp->nv_vp, F_SHARE, &shr,
1139 FREAD | FWRITE, CRED(), NULL);
1140
1141 if (error == 0) {
1142 resp->stat = nlm4_granted;
1143 nlm_host_monitor(g, host, 0);
1144 } else {
1145 resp->stat = nlm4_denied;
1146 }
1147
1148 out:
1149 DTRACE_PROBE3(share__end, struct nlm_globals *, g,
1150 struct nlm_host *, host, nlm4_shareres *, resp);
1151
1152 nlm_vhold_release(host, nvp);
1153 nlm_host_release(g, host);
1154 }
1155
1156 /*
1157 * NLM_UNSHARE, NLM4_UNSHARE
1158 *
1159 * Release a DOS-style share reservation
1160 */
1161 void
1162 nlm_do_unshare(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr)
1163 {
1164 struct nlm_globals *g;
1165 struct nlm_host *host;
1166 struct netbuf *addr;
1167 vnode_t *vp = NULL;
1168 char *netid;
1169 int error;
1170 struct shrlock shr;
1171
1172 nlm_copy_netobj(&resp->cookie, &argp->cookie);
1173
1174 netid = svc_getnetid(sr->rq_xprt);
1175 addr = svc_getrpccaller(sr->rq_xprt);
1176
1177 g = zone_getspecific(nlm_zone_key, curzone);
1178 host = nlm_host_find(g, netid, addr);
1179 if (host == NULL) {
1180 resp->stat = nlm4_denied_nolocks;
1181 return;
1182 }
1183
1184 DTRACE_PROBE3(unshare__start, struct nlm_globals *, g,
1185 struct nlm_host *, host, nlm4_shareargs *, argp);
1186
1187 if (NLM_IN_GRACE(g)) {
1188 resp->stat = nlm4_denied_grace_period;
1189 goto out;
1190 }
1191
1192 vp = nlm_fh_to_vp(&argp->share.fh);
1193 if (vp == NULL) {
1194 resp->stat = nlm4_stale_fh;
1195 goto out;
1196 }
1197
1198 /* Convert to local form. */
1199 nlm_init_shrlock(&shr, &argp->share, host);
1200 error = VOP_SHRLOCK(vp, F_UNSHARE, &shr,
1201 FREAD | FWRITE, CRED(), NULL);
1202
1203 (void) error;
1204 resp->stat = nlm4_granted;
1205
1206 out:
1207 DTRACE_PROBE3(unshare__end, struct nlm_globals *, g,
1208 struct nlm_host *, host, nlm4_shareres *, resp);
1209
1210 if (vp != NULL)
1211 VN_RELE(vp);
1212
1213 nlm_host_release(g, host);
1214 }
1215
1216 /*
1217 * NLM wrapper to VOP_FRLOCK that checks the validity of the lock before
1218 * invoking the vnode operation.
1219 */
1220 static int
1221 nlm_vop_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
1222 struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct)
1223 {
1224 if (bfp->l_len != 0 && bfp->l_start + (bfp->l_len - 1)
1225 < bfp->l_start) {
1226 return (EOVERFLOW);
1227 }
1228
1229 return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1230 }