1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #include <sys/flock_impl.h>
31 #include <sys/vfs.h>
32 #include <sys/t_lock.h> /* for <sys/callb.h> */
33 #include <sys/callb.h>
34 #include <sys/clconf.h>
35 #include <sys/cladm.h>
36 #include <sys/nbmlock.h>
37 #include <sys/cred.h>
38 #include <sys/policy.h>
39
40 /*
41 * The following four variables are for statistics purposes and they are
42 * not protected by locks. They may not be accurate but will at least be
43 * close to the actual value.
44 */
45
46 int flk_lock_allocs;
47 int flk_lock_frees;
48 int edge_allocs;
49 int edge_frees;
50 int flk_proc_vertex_allocs;
51 int flk_proc_edge_allocs;
52 int flk_proc_vertex_frees;
53 int flk_proc_edge_frees;
54
55 static kmutex_t flock_lock;
56
57 #ifdef DEBUG
58 int check_debug = 0;
59 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \
60 check_active_locks(gp);
61 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \
62 check_sleeping_locks(gp);
63 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \
64 if (check_debug) \
65 check_owner_locks(gp, pid, sysid, vp);
66 #define CHECK_LOCK_TRANSITION(old_state, new_state) \
67 { \
68 if (check_lock_transition(old_state, new_state)) { \
69 cmn_err(CE_PANIC, "Illegal lock transition \
70 from %d to %d", old_state, new_state); \
71 } \
72 }
73 #else
74
75 #define CHECK_ACTIVE_LOCKS(gp)
76 #define CHECK_SLEEPING_LOCKS(gp)
77 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
78 #define CHECK_LOCK_TRANSITION(old_state, new_state)
79
80 #endif /* DEBUG */
81
82 struct kmem_cache *flk_edge_cache;
83
84 graph_t *lock_graph[HASH_SIZE];
85 proc_graph_t pgraph;
86
87 /*
88 * Clustering.
89 *
90 * NLM REGISTRY TYPE IMPLEMENTATION
91 *
92 * Assumptions:
93 * 1. Nodes in a cluster are numbered starting at 1; always non-negative
94 * integers; maximum node id is returned by clconf_maximum_nodeid().
95 * 2. We use this node id to identify the node an NLM server runs on.
96 */
97
98 /*
99 * NLM registry object keeps track of NLM servers via their
100 * nlmids (which are the node ids of the node in the cluster they run on)
101 * that have requested locks at this LLM with which this registry is
102 * associated.
103 *
104 * Representation of abstraction:
105 * rep = record[ states: array[nlm_state],
106 * lock: mutex]
107 *
108 * Representation invariants:
109 * 1. index i of rep.states is between 0 and n - 1 where n is number
110 * of elements in the array, which happen to be the maximum number
111 * of nodes in the cluster configuration + 1.
112 * 2. map nlmid to index i of rep.states
113 * 0 -> 0
114 * 1 -> 1
115 * 2 -> 2
116 * n-1 -> clconf_maximum_nodeid()+1
117 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting
118 * from forgetting to subtract 1 from the index.
119 * 4. The reason we keep the 0th index is the following. A legitimate
120 * cluster configuration includes making a UFS file system NFS
121 * exportable. The code is structured so that if you're in a cluster
122 * you do one thing; otherwise, you do something else. The problem
123 * is what to do if you think you're in a cluster with PXFS loaded,
124 * but you're using UFS not PXFS? The upper two bytes of the sysid
125 * encode the node id of the node where NLM server runs; these bytes
126 * are zero for UFS. Since the nodeid is used to index into the
127 * registry, we can record the NLM server state information at index
128 * 0 using the same mechanism used for PXFS file locks!
129 */
130 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */
131 static kmutex_t nlm_reg_lock; /* lock to protect arrary */
132 static uint_t nlm_status_size; /* size of state array */
133
134 /*
135 * Although we need a global lock dependency graph (and associated data
136 * structures), we also need a per-zone notion of whether the lock manager is
137 * running, and so whether to allow lock manager requests or not.
138 *
139 * Thus, on a per-zone basis we maintain a ``global'' variable
140 * (flk_lockmgr_status), protected by flock_lock, and set when the lock
141 * manager is determined to be changing state (starting or stopping).
142 *
143 * Each graph/zone pair also has a copy of this variable, which is protected by
144 * the graph's mutex.
145 *
146 * The per-graph copies are used to synchronize lock requests with shutdown
147 * requests. The global copy is used to initialize the per-graph field when a
148 * new graph is created.
149 */
150 struct flock_globals {
151 flk_lockmgr_status_t flk_lockmgr_status;
152 flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
153 };
154
155 zone_key_t flock_zone_key;
156
157 static void create_flock(lock_descriptor_t *, flock64_t *);
158 static lock_descriptor_t *flk_get_lock(void);
159 static void flk_free_lock(lock_descriptor_t *lock);
160 static void flk_get_first_blocking_lock(lock_descriptor_t *request);
161 static int flk_process_request(lock_descriptor_t *);
162 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
163 static edge_t *flk_get_edge(void);
164 static int flk_wait_execute_request(lock_descriptor_t *);
165 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
166 static void flk_insert_active_lock(lock_descriptor_t *);
167 static void flk_delete_active_lock(lock_descriptor_t *, int);
168 static void flk_insert_sleeping_lock(lock_descriptor_t *);
169 static void flk_graph_uncolor(graph_t *);
170 static void flk_wakeup(lock_descriptor_t *, int);
171 static void flk_free_edge(edge_t *);
172 static void flk_recompute_dependencies(lock_descriptor_t *,
173 lock_descriptor_t **, int, int);
174 static int flk_find_barriers(lock_descriptor_t *);
175 static void flk_update_barriers(lock_descriptor_t *);
176 static int flk_color_reachables(lock_descriptor_t *);
177 static int flk_canceled(lock_descriptor_t *);
178 static void flk_delete_locks_by_sysid(lock_descriptor_t *);
179 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
180 static void wait_for_lock(lock_descriptor_t *);
181 static void unlock_lockmgr_granted(struct flock_globals *);
182 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
183
184 /* Clustering hooks */
185 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t);
186 static void cl_flk_wakeup_sleeping_nlm_locks(int);
187 static void cl_flk_unlock_nlm_granted(int);
188
189 #ifdef DEBUG
190 static int check_lock_transition(int, int);
191 static void check_sleeping_locks(graph_t *);
192 static void check_active_locks(graph_t *);
193 static int no_path(lock_descriptor_t *, lock_descriptor_t *);
194 static void path(lock_descriptor_t *, lock_descriptor_t *);
195 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
196 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
197 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
198 #endif
199
200 /* proc_graph function definitions */
201 static int flk_check_deadlock(lock_descriptor_t *);
202 static void flk_proc_graph_uncolor(void);
203 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
204 static proc_edge_t *flk_get_proc_edge(void);
205 static void flk_proc_release(proc_vertex_t *);
206 static void flk_free_proc_edge(proc_edge_t *);
207 static void flk_update_proc_graph(edge_t *, int);
208
209 /* Non-blocking mandatory locking */
210 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t,
211 u_offset_t);
212
213 static struct flock_globals *
214 flk_get_globals(void)
215 {
216 /*
217 * The KLM module had better be loaded if we're attempting to handle
218 * lockmgr requests.
219 */
220 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
221 return (zone_getspecific(flock_zone_key, curproc->p_zone));
222 }
223
224 static flk_lockmgr_status_t
225 flk_get_lockmgr_status(void)
226 {
227 struct flock_globals *fg;
228
229 ASSERT(MUTEX_HELD(&flock_lock));
230
231 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
232 /*
233 * KLM module not loaded; lock manager definitely not running.
234 */
235 return (FLK_LOCKMGR_DOWN);
236 }
237 fg = flk_get_globals();
238 return (fg->flk_lockmgr_status);
239 }
240
241 /*
242 * Routine called from fs_frlock in fs/fs_subr.c
243 */
244
245 int
246 reclock(vnode_t *vp,
247 flock64_t *lckdat,
248 int cmd,
249 int flag,
250 u_offset_t offset,
251 flk_callback_t *flk_cbp)
252 {
253 lock_descriptor_t stack_lock_request;
254 lock_descriptor_t *lock_request;
255 int error = 0;
256 graph_t *gp;
257 int nlmid;
258
259 /*
260 * Check access permissions
261 */
262 if ((cmd & SETFLCK) &&
263 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
264 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
265 return (EBADF);
266
267 /*
268 * for query and unlock we use the stack_lock_request
269 */
270
271 if ((lckdat->l_type == F_UNLCK) ||
272 !((cmd & INOFLCK) || (cmd & SETFLCK))) {
273 lock_request = &stack_lock_request;
274 (void) bzero((caddr_t)lock_request,
275 sizeof (lock_descriptor_t));
276
277 /*
278 * following is added to make the assertions in
279 * flk_execute_request() to pass through
280 */
281
282 lock_request->l_edge.edge_in_next = &lock_request->l_edge;
283 lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
284 lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
285 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
286 lock_request->l_status = FLK_INITIAL_STATE;
287 } else {
288 lock_request = flk_get_lock();
289 }
290 lock_request->l_state = 0;
291 lock_request->l_vnode = vp;
292 lock_request->l_zoneid = getzoneid();
293
294 /*
295 * Convert the request range into the canonical start and end
296 * values. The NLM protocol supports locking over the entire
297 * 32-bit range, so there's no range checking for remote requests,
298 * but we still need to verify that local requests obey the rules.
299 */
300 /* Clustering */
301 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) {
302 ASSERT(lckdat->l_whence == 0);
303 lock_request->l_start = lckdat->l_start;
304 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
305 lckdat->l_start + (lckdat->l_len - 1);
306 } else {
307 /* check the validity of the lock range */
308 error = flk_convert_lock_data(vp, lckdat,
309 &lock_request->l_start, &lock_request->l_end,
310 offset);
311 if (error) {
312 goto done;
313 }
314 error = flk_check_lock_data(lock_request->l_start,
315 lock_request->l_end, MAXEND);
316 if (error) {
317 goto done;
318 }
319 }
320
321 ASSERT(lock_request->l_end >= lock_request->l_start);
322
323 lock_request->l_type = lckdat->l_type;
324 if (cmd & INOFLCK)
325 lock_request->l_state |= IO_LOCK;
326 if (cmd & SLPFLCK)
327 lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
328 if (cmd & RCMDLCK)
329 lock_request->l_state |= LOCKMGR_LOCK;
330 if (cmd & NBMLCK)
331 lock_request->l_state |= NBMAND_LOCK;
332 /*
333 * Clustering: set flag for PXFS locks
334 * We do not _only_ check for the PCMDLCK flag because PXFS locks could
335 * also be of type 'RCMDLCK'.
336 * We do not _only_ check the GETPXFSID() macro because local PXFS
337 * clients use a pxfsid of zero to permit deadlock detection in the LLM.
338 */
339
340 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) {
341 lock_request->l_state |= PXFS_LOCK;
342 }
343 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
344 if (lock_request->l_type == F_RDLCK ||
345 lock_request->l_type == F_WRLCK)
346 lock_request->l_state |= QUERY_LOCK;
347 }
348 lock_request->l_flock = (*lckdat);
349 lock_request->l_callbacks = flk_cbp;
350
351 /*
352 * We are ready for processing the request
353 */
354 if (IS_LOCKMGR(lock_request)) {
355 /*
356 * If the lock request is an NLM server request ....
357 */
358 if (nlm_status_size == 0) { /* not booted as cluster */
359 mutex_enter(&flock_lock);
360 /*
361 * Bail out if this is a lock manager request and the
362 * lock manager is not supposed to be running.
363 */
364 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
365 mutex_exit(&flock_lock);
366 error = ENOLCK;
367 goto done;
368 }
369 mutex_exit(&flock_lock);
370 } else { /* booted as a cluster */
371 nlmid = GETNLMID(lock_request->l_flock.l_sysid);
372 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
373
374 mutex_enter(&nlm_reg_lock);
375 /*
376 * If the NLM registry does not know about this
377 * NLM server making the request, add its nlmid
378 * to the registry.
379 */
380 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
381 nlmid)) {
382 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
383 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
384 nlmid)) {
385 /*
386 * If the NLM server is already known (has made
387 * previous lock requests) and its state is
388 * not NLM_UP (means that NLM server is
389 * shutting down), then bail out with an
390 * error to deny the lock request.
391 */
392 mutex_exit(&nlm_reg_lock);
393 error = ENOLCK;
394 goto done;
395 }
396 mutex_exit(&nlm_reg_lock);
397 }
398 }
399
400 /* Now get the lock graph for a particular vnode */
401 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
402
403 /*
404 * We drop rwlock here otherwise this might end up causing a
405 * deadlock if this IOLOCK sleeps. (bugid # 1183392).
406 */
407
408 if (IS_IO_LOCK(lock_request)) {
409 VOP_RWUNLOCK(vp,
410 (lock_request->l_type == F_RDLCK) ?
411 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
412 }
413 mutex_enter(&gp->gp_mutex);
414
415 lock_request->l_state |= REFERENCED_LOCK;
416 lock_request->l_graph = gp;
417
418 switch (lock_request->l_type) {
419 case F_RDLCK:
420 case F_WRLCK:
421 if (IS_QUERY_LOCK(lock_request)) {
422 flk_get_first_blocking_lock(lock_request);
423 (*lckdat) = lock_request->l_flock;
424 break;
425 }
426
427 /* process the request now */
428
429 error = flk_process_request(lock_request);
430 break;
431
432 case F_UNLCK:
433 /* unlock request will not block so execute it immediately */
434
435 if (IS_LOCKMGR(lock_request) &&
436 flk_canceled(lock_request)) {
437 error = 0;
438 } else {
439 error = flk_execute_request(lock_request);
440 }
441 break;
442
443 case F_UNLKSYS:
444 /*
445 * Recovery mechanism to release lock manager locks when
446 * NFS client crashes and restart. NFS server will clear
447 * old locks and grant new locks.
448 */
449
450 if (lock_request->l_flock.l_sysid == 0) {
451 mutex_exit(&gp->gp_mutex);
452 return (EINVAL);
453 }
454 if (secpolicy_nfs(CRED()) != 0) {
455 mutex_exit(&gp->gp_mutex);
456 return (EPERM);
457 }
458 flk_delete_locks_by_sysid(lock_request);
459 lock_request->l_state &= ~REFERENCED_LOCK;
460 flk_set_state(lock_request, FLK_DEAD_STATE);
461 flk_free_lock(lock_request);
462 mutex_exit(&gp->gp_mutex);
463 return (0);
464
465 default:
466 error = EINVAL;
467 break;
468 }
469
470 /* Clustering: For blocked PXFS locks, return */
471 if (error == PXFS_LOCK_BLOCKED) {
472 lock_request->l_state &= ~REFERENCED_LOCK;
473 mutex_exit(&gp->gp_mutex);
474 return (error);
475 }
476
477 /*
478 * Now that we have seen the status of locks in the system for
479 * this vnode we acquire the rwlock if it is an IO_LOCK.
480 */
481
482 if (IS_IO_LOCK(lock_request)) {
483 (void) VOP_RWLOCK(vp,
484 (lock_request->l_type == F_RDLCK) ?
485 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
486 if (!error) {
487 lckdat->l_type = F_UNLCK;
488
489 /*
490 * This wake up is needed otherwise
491 * if IO_LOCK has slept the dependents on this
492 * will not be woken up at all. (bugid # 1185482).
493 */
494
495 flk_wakeup(lock_request, 1);
496 flk_set_state(lock_request, FLK_DEAD_STATE);
497 flk_free_lock(lock_request);
498 }
499 /*
500 * else if error had occurred either flk_process_request()
501 * has returned EDEADLK in which case there will be no
502 * dependents for this lock or EINTR from flk_wait_execute_
503 * request() in which case flk_cancel_sleeping_lock()
504 * would have been done. same is true with EBADF.
505 */
506 }
507
508 if (lock_request == &stack_lock_request) {
509 flk_set_state(lock_request, FLK_DEAD_STATE);
510 } else {
511 lock_request->l_state &= ~REFERENCED_LOCK;
512 if ((error != 0) || IS_DELETED(lock_request)) {
513 flk_set_state(lock_request, FLK_DEAD_STATE);
514 flk_free_lock(lock_request);
515 }
516 }
517
518 mutex_exit(&gp->gp_mutex);
519 return (error);
520
521 done:
522 flk_set_state(lock_request, FLK_DEAD_STATE);
523 if (lock_request != &stack_lock_request)
524 flk_free_lock(lock_request);
525 return (error);
526 }
527
528 /*
529 * Invoke the callbacks in the given list. If before sleeping, invoke in
530 * list order. If after sleeping, invoke in reverse order.
531 *
532 * CPR (suspend/resume) support: if one of the callbacks returns a
533 * callb_cpr_t, return it. This will be used to make the thread CPR-safe
534 * while it is sleeping. There should be at most one callb_cpr_t for the
535 * thread.
536 * XXX This is unnecessarily complicated. The CPR information should just
537 * get passed in directly through VOP_FRLOCK and reclock, rather than
538 * sneaking it in via a callback.
539 */
540
541 callb_cpr_t *
542 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
543 {
544 callb_cpr_t *cpr_callbackp = NULL;
545 callb_cpr_t *one_result;
546 flk_callback_t *cb;
547
548 if (cblist == NULL)
549 return (NULL);
550
551 if (when == FLK_BEFORE_SLEEP) {
552 cb = cblist;
553 do {
554 one_result = (*cb->cb_callback)(when, cb->cb_data);
555 if (one_result != NULL) {
556 ASSERT(cpr_callbackp == NULL);
557 cpr_callbackp = one_result;
558 }
559 cb = cb->cb_next;
560 } while (cb != cblist);
561 } else {
562 cb = cblist->cb_prev;
563 do {
564 one_result = (*cb->cb_callback)(when, cb->cb_data);
565 if (one_result != NULL) {
566 cpr_callbackp = one_result;
567 }
568 cb = cb->cb_prev;
569 } while (cb != cblist->cb_prev);
570 }
571
572 return (cpr_callbackp);
573 }
574
575 /*
576 * Initialize a flk_callback_t to hold the given callback.
577 */
578
579 void
580 flk_init_callback(flk_callback_t *flk_cb,
581 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
582 {
583 flk_cb->cb_next = flk_cb;
584 flk_cb->cb_prev = flk_cb;
585 flk_cb->cb_callback = cb_fcn;
586 flk_cb->cb_data = cbdata;
587 }
588
589 /*
590 * Initialize an flk_callback_t and then link it into the head of an
591 * existing list (which may be NULL).
592 */
593
594 void
595 flk_add_callback(flk_callback_t *newcb,
596 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
597 void *cbdata, flk_callback_t *cblist)
598 {
599 flk_init_callback(newcb, cb_fcn, cbdata);
600
601 if (cblist == NULL)
602 return;
603
604 newcb->cb_prev = cblist->cb_prev;
605 newcb->cb_next = cblist;
606 cblist->cb_prev->cb_next = newcb;
607 cblist->cb_prev = newcb;
608 }
609
610 /*
611 * Initialize the flk_edge_cache data structure and create the
612 * nlm_reg_status array.
613 */
614
615 void
616 flk_init(void)
617 {
618 uint_t i;
619
620 flk_edge_cache = kmem_cache_create("flk_edges",
621 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
622 if (flk_edge_cache == NULL) {
623 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
624 }
625 /*
626 * Create the NLM registry object.
627 */
628
629 if (cluster_bootflags & CLUSTER_BOOTED) {
630 /*
631 * This routine tells you the maximum node id that will be used
632 * in the cluster. This number will be the size of the nlm
633 * registry status array. We add 1 because we will be using
634 * all entries indexed from 0 to maxnodeid; e.g., from 0
635 * to 64, for a total of 65 entries.
636 */
637 nlm_status_size = clconf_maximum_nodeid() + 1;
638 } else {
639 nlm_status_size = 0;
640 }
641
642 if (nlm_status_size != 0) { /* booted as a cluster */
643 nlm_reg_status = (flk_nlm_status_t *)
644 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size,
645 KM_SLEEP);
646
647 /* initialize all NLM states in array to NLM_UNKNOWN */
648 for (i = 0; i < nlm_status_size; i++) {
649 nlm_reg_status[i] = FLK_NLM_UNKNOWN;
650 }
651 }
652 }
653
654 /*
655 * Zone constructor/destructor callbacks to be executed when a zone is
656 * created/destroyed.
657 */
658 /* ARGSUSED */
659 void *
660 flk_zone_init(zoneid_t zoneid)
661 {
662 struct flock_globals *fg;
663 uint_t i;
664
665 fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
666 fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
667 for (i = 0; i < HASH_SIZE; i++)
668 fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
669 return (fg);
670 }
671
672 /* ARGSUSED */
673 void
674 flk_zone_fini(zoneid_t zoneid, void *data)
675 {
676 struct flock_globals *fg = data;
677
678 kmem_free(fg, sizeof (*fg));
679 }
680
681 /*
682 * Get a lock_descriptor structure with initialization of edge lists.
683 */
684
685 static lock_descriptor_t *
686 flk_get_lock(void)
687 {
688 lock_descriptor_t *l;
689
690 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
691
692 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
693 l->l_edge.edge_in_next = &l->l_edge;
694 l->l_edge.edge_in_prev = &l->l_edge;
695 l->l_edge.edge_adj_next = &l->l_edge;
696 l->l_edge.edge_adj_prev = &l->l_edge;
697 l->pvertex = -1;
698 l->l_status = FLK_INITIAL_STATE;
699 flk_lock_allocs++;
700 return (l);
701 }
702
703 /*
704 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag
705 * when some thread has a reference to it as in reclock().
706 */
707
708 void
709 flk_free_lock(lock_descriptor_t *lock)
710 {
711 ASSERT(IS_DEAD(lock));
712 if (IS_REFERENCED(lock)) {
713 lock->l_state |= DELETED_LOCK;
714 return;
715 }
716 flk_lock_frees++;
717 kmem_free((void *)lock, sizeof (lock_descriptor_t));
718 }
719
720 void
721 flk_set_state(lock_descriptor_t *lock, int new_state)
722 {
723 /*
724 * Locks in the sleeping list may be woken up in a number of ways,
725 * and more than once. If a sleeping lock is signaled awake more
726 * than once, then it may or may not change state depending on its
727 * current state.
728 * Also note that NLM locks that are sleeping could be moved to an
729 * interrupted state more than once if the unlock request is
730 * retransmitted by the NLM client - the second time around, this is
731 * just a nop.
732 * The ordering of being signaled awake is:
733 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE.
734 * The checks below implement this ordering.
735 */
736 if (IS_INTERRUPTED(lock)) {
737 if ((new_state == FLK_CANCELLED_STATE) ||
738 (new_state == FLK_GRANTED_STATE) ||
739 (new_state == FLK_INTERRUPTED_STATE)) {
740 return;
741 }
742 }
743 if (IS_CANCELLED(lock)) {
744 if ((new_state == FLK_GRANTED_STATE) ||
745 (new_state == FLK_CANCELLED_STATE)) {
746 return;
747 }
748 }
749 CHECK_LOCK_TRANSITION(lock->l_status, new_state);
750 if (IS_PXFS(lock)) {
751 cl_flk_state_transition_notify(lock, lock->l_status, new_state);
752 }
753 lock->l_status = new_state;
754 }
755
756 /*
757 * Routine that checks whether there are any blocking locks in the system.
758 *
759 * The policy followed is if a write lock is sleeping we don't allow read
760 * locks before this write lock even though there may not be any active
761 * locks corresponding to the read locks' region.
762 *
763 * flk_add_edge() function adds an edge between l1 and l2 iff there
764 * is no path between l1 and l2. This is done to have a "minimum
765 * storage representation" of the dependency graph.
766 *
767 * Another property of the graph is since only the new request throws
768 * edges to the existing locks in the graph, the graph is always topologically
769 * ordered.
770 */
771
772 static int
773 flk_process_request(lock_descriptor_t *request)
774 {
775 graph_t *gp = request->l_graph;
776 lock_descriptor_t *lock;
777 int request_blocked_by_active = 0;
778 int request_blocked_by_granted = 0;
779 int request_blocked_by_sleeping = 0;
780 vnode_t *vp = request->l_vnode;
781 int error = 0;
782 int request_will_wait = 0;
783 int found_covering_lock = 0;
784 lock_descriptor_t *covered_by = NULL;
785
786 ASSERT(MUTEX_HELD(&gp->gp_mutex));
787 request_will_wait = IS_WILLING_TO_SLEEP(request);
788
789 /*
790 * check active locks
791 */
792
793 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
794
795
796 if (lock) {
797 do {
798 if (BLOCKS(lock, request)) {
799 if (!request_will_wait)
800 return (EAGAIN);
801 request_blocked_by_active = 1;
802 break;
803 }
804 /*
805 * Grant lock if it is for the same owner holding active
806 * lock that covers the request.
807 */
808
809 if (SAME_OWNER(lock, request) &&
810 COVERS(lock, request) &&
811 (request->l_type == F_RDLCK))
812 return (flk_execute_request(request));
813 lock = lock->l_next;
814 } while (lock->l_vnode == vp);
815 }
816
817 if (!request_blocked_by_active) {
818 lock_descriptor_t *lk[1];
819 lock_descriptor_t *first_glock = NULL;
820 /*
821 * Shall we grant this?! NO!!
822 * What about those locks that were just granted and still
823 * in sleep queue. Those threads are woken up and so locks
824 * are almost active.
825 */
826 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
827 if (lock) {
828 do {
829 if (BLOCKS(lock, request)) {
830 if (IS_GRANTED(lock)) {
831 request_blocked_by_granted = 1;
832 } else {
833 request_blocked_by_sleeping = 1;
834 }
835 }
836
837 lock = lock->l_next;
838 } while ((lock->l_vnode == vp));
839 first_glock = lock->l_prev;
840 ASSERT(first_glock->l_vnode == vp);
841 }
842
843 if (request_blocked_by_granted)
844 goto block;
845
846 if (!request_blocked_by_sleeping) {
847 /*
848 * If the request isn't going to be blocked by a
849 * sleeping request, we know that it isn't going to
850 * be blocked; we can just execute the request --
851 * without performing costly deadlock detection.
852 */
853 ASSERT(!request_blocked_by_active);
854 return (flk_execute_request(request));
855 } else if (request->l_type == F_RDLCK) {
856 /*
857 * If we have a sleeping writer in the requested
858 * lock's range, block.
859 */
860 goto block;
861 }
862
863 lk[0] = request;
864 request->l_state |= RECOMPUTE_LOCK;
865 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
866 if (lock) {
867 do {
868 flk_recompute_dependencies(lock, lk, 1, 0);
869 lock = lock->l_next;
870 } while (lock->l_vnode == vp);
871 }
872 lock = first_glock;
873 if (lock) {
874 do {
875 if (IS_GRANTED(lock)) {
876 flk_recompute_dependencies(lock, lk, 1, 0);
877 }
878 lock = lock->l_prev;
879 } while ((lock->l_vnode == vp));
880 }
881 request->l_state &= ~RECOMPUTE_LOCK;
882 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
883 return (EDEADLK);
884 return (flk_execute_request(request));
885 }
886
887 block:
888 if (request_will_wait)
889 flk_graph_uncolor(gp);
890
891 /* check sleeping locks */
892
893 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
894
895 /*
896 * If we find a sleeping write lock that is a superset of the
897 * region wanted by request we can be assured that by adding an
898 * edge to this write lock we have paths to all locks in the
899 * graph that blocks the request except in one case and that is why
900 * another check for SAME_OWNER in the loop below. The exception
901 * case is when this process that owns the sleeping write lock 'l1'
902 * has other locks l2, l3, l4 that are in the system and arrived
903 * before l1. l1 does not have path to these locks as they are from
904 * same process. We break when we find a second covering sleeping
905 * lock l5 owned by a process different from that owning l1, because
906 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if
907 * it has l1 would have produced a deadlock already.
908 */
909
910 if (lock) {
911 do {
912 if (BLOCKS(lock, request)) {
913 if (!request_will_wait)
914 return (EAGAIN);
915 if (COVERS(lock, request) &&
916 lock->l_type == F_WRLCK) {
917 if (found_covering_lock &&
918 !SAME_OWNER(lock, covered_by)) {
919 found_covering_lock++;
920 break;
921 }
922 found_covering_lock = 1;
923 covered_by = lock;
924 }
925 if (found_covering_lock &&
926 !SAME_OWNER(lock, covered_by)) {
927 lock = lock->l_next;
928 continue;
929 }
930 if ((error = flk_add_edge(request, lock,
931 !found_covering_lock, 0)))
932 return (error);
933 }
934 lock = lock->l_next;
935 } while (lock->l_vnode == vp);
936 }
937
938 /*
939 * found_covering_lock == 2 iff at this point 'request' has paths
940 * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
941 * point 'request' has paths to all locks that blocks 'request' whose owners
942 * are not same as the one that covers 'request' (covered_by above) and
943 * we can have locks whose owner is same as covered_by in the active list.
944 */
945
946 if (request_blocked_by_active && found_covering_lock != 2) {
947 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
948 ASSERT(lock != NULL);
949 do {
950 if (BLOCKS(lock, request)) {
951 if (found_covering_lock &&
952 !SAME_OWNER(lock, covered_by)) {
953 lock = lock->l_next;
954 continue;
955 }
956 if ((error = flk_add_edge(request, lock,
957 CHECK_CYCLE, 0)))
958 return (error);
959 }
960 lock = lock->l_next;
961 } while (lock->l_vnode == vp);
962 }
963
964 if (NOT_BLOCKED(request)) {
965 /*
966 * request not dependent on any other locks
967 * so execute this request
968 */
969 return (flk_execute_request(request));
970 } else {
971 /*
972 * check for deadlock
973 */
974 if (flk_check_deadlock(request))
975 return (EDEADLK);
976 /*
977 * this thread has to sleep
978 */
979 return (flk_wait_execute_request(request));
980 }
981 }
982
983 /*
984 * The actual execution of the request in the simple case is only to
985 * insert the 'request' in the list of active locks if it is not an
986 * UNLOCK.
987 * We have to consider the existing active locks' relation to
988 * this 'request' if they are owned by same process. flk_relation() does
989 * this job and sees to that the dependency graph information is maintained
990 * properly.
991 */
992
993 int
994 flk_execute_request(lock_descriptor_t *request)
995 {
996 graph_t *gp = request->l_graph;
997 vnode_t *vp = request->l_vnode;
998 lock_descriptor_t *lock, *lock1;
999 int done_searching = 0;
1000
1001 CHECK_SLEEPING_LOCKS(gp);
1002 CHECK_ACTIVE_LOCKS(gp);
1003
1004 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1005
1006 flk_set_state(request, FLK_START_STATE);
1007
1008 ASSERT(NOT_BLOCKED(request));
1009
1010 /* IO_LOCK requests are only to check status */
1011
1012 if (IS_IO_LOCK(request))
1013 return (0);
1014
1015 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1016
1017 if (lock == NULL && request->l_type == F_UNLCK)
1018 return (0);
1019 if (lock == NULL) {
1020 flk_insert_active_lock(request);
1021 return (0);
1022 }
1023
1024 do {
1025 lock1 = lock->l_next;
1026 if (SAME_OWNER(request, lock)) {
1027 done_searching = flk_relation(lock, request);
1028 }
1029 lock = lock1;
1030 } while (lock->l_vnode == vp && !done_searching);
1031
1032 /*
1033 * insert in active queue
1034 */
1035
1036 if (request->l_type != F_UNLCK)
1037 flk_insert_active_lock(request);
1038
1039 return (0);
1040 }
1041
1042 /*
1043 * 'request' is blocked by some one therefore we put it into sleep queue.
1044 */
1045 static int
1046 flk_wait_execute_request(lock_descriptor_t *request)
1047 {
1048 graph_t *gp = request->l_graph;
1049 callb_cpr_t *cprp; /* CPR info from callback */
1050 struct flock_globals *fg;
1051 int index;
1052
1053 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1054 ASSERT(IS_WILLING_TO_SLEEP(request));
1055
1056 flk_insert_sleeping_lock(request);
1057
1058 if (IS_LOCKMGR(request)) {
1059 index = HASH_INDEX(request->l_vnode);
1060 fg = flk_get_globals();
1061
1062 if (nlm_status_size == 0) { /* not booted as a cluster */
1063 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
1064 flk_cancel_sleeping_lock(request, 1);
1065 return (ENOLCK);
1066 }
1067 } else { /* booted as a cluster */
1068 /*
1069 * If the request is an NLM server lock request,
1070 * and the NLM state of the lock request is not
1071 * NLM_UP (because the NLM server is shutting
1072 * down), then cancel the sleeping lock and
1073 * return error ENOLCK that will encourage the
1074 * client to retransmit.
1075 */
1076 if (!IS_NLM_UP(request)) {
1077 flk_cancel_sleeping_lock(request, 1);
1078 return (ENOLCK);
1079 }
1080 }
1081 }
1082
1083 /* Clustering: For blocking PXFS locks, return */
1084 if (IS_PXFS(request)) {
1085 /*
1086 * PXFS locks sleep on the client side.
1087 * The callback argument is used to wake up the sleeper
1088 * when the lock is granted.
1089 * We return -1 (rather than an errno value) to indicate
1090 * the client side should sleep
1091 */
1092 return (PXFS_LOCK_BLOCKED);
1093 }
1094
1095 if (request->l_callbacks != NULL) {
1096 /*
1097 * To make sure the shutdown code works correctly, either
1098 * the callback must happen after putting the lock on the
1099 * sleep list, or we must check the shutdown status after
1100 * returning from the callback (and before sleeping). At
1101 * least for now, we'll use the first option. If a
1102 * shutdown or signal or whatever happened while the graph
1103 * mutex was dropped, that will be detected by
1104 * wait_for_lock().
1105 */
1106 mutex_exit(&gp->gp_mutex);
1107
1108 cprp = flk_invoke_callbacks(request->l_callbacks,
1109 FLK_BEFORE_SLEEP);
1110
1111 mutex_enter(&gp->gp_mutex);
1112
1113 if (cprp == NULL) {
1114 wait_for_lock(request);
1115 } else {
1116 mutex_enter(cprp->cc_lockp);
1117 CALLB_CPR_SAFE_BEGIN(cprp);
1118 mutex_exit(cprp->cc_lockp);
1119 wait_for_lock(request);
1120 mutex_enter(cprp->cc_lockp);
1121 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
1122 mutex_exit(cprp->cc_lockp);
1123 }
1124
1125 mutex_exit(&gp->gp_mutex);
1126 (void) flk_invoke_callbacks(request->l_callbacks,
1127 FLK_AFTER_SLEEP);
1128 mutex_enter(&gp->gp_mutex);
1129 } else {
1130 wait_for_lock(request);
1131 }
1132
1133 if (IS_LOCKMGR(request)) {
1134 /*
1135 * If the lock manager is shutting down, return an
1136 * error that will encourage the client to retransmit.
1137 */
1138 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
1139 !IS_GRANTED(request)) {
1140 flk_cancel_sleeping_lock(request, 1);
1141 return (ENOLCK);
1142 }
1143 }
1144
1145 if (IS_INTERRUPTED(request)) {
1146 /* we got a signal, or act like we did */
1147 flk_cancel_sleeping_lock(request, 1);
1148 return (EINTR);
1149 }
1150
1151 /* Cancelled if some other thread has closed the file */
1152
1153 if (IS_CANCELLED(request)) {
1154 flk_cancel_sleeping_lock(request, 1);
1155 return (EBADF);
1156 }
1157
1158 request->l_state &= ~GRANTED_LOCK;
1159 REMOVE_SLEEP_QUEUE(request);
1160 return (flk_execute_request(request));
1161 }
1162
1163 /*
1164 * This routine adds an edge between from and to because from depends
1165 * to. If asked to check for deadlock it checks whether there are any
1166 * reachable locks from "from_lock" that is owned by the same process
1167 * as "from_lock".
1168 * NOTE: It is the caller's responsibility to make sure that the color
1169 * of the graph is consistent between the calls to flk_add_edge as done
1170 * in flk_process_request. This routine does not color and check for
1171 * deadlock explicitly.
1172 */
1173
1174 static int
1175 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
1176 int check_cycle, int update_graph)
1177 {
1178 edge_t *edge;
1179 edge_t *ep;
1180 lock_descriptor_t *vertex;
1181 lock_descriptor_t *vertex_stack;
1182
1183 STACK_INIT(vertex_stack);
1184
1185 /*
1186 * if to vertex already has mark_color just return
1187 * don't add an edge as it is reachable from from vertex
1188 * before itself.
1189 */
1190
1191 if (COLORED(to_lock))
1192 return (0);
1193
1194 edge = flk_get_edge();
1195
1196 /*
1197 * set the from and to vertex
1198 */
1199
1200 edge->from_vertex = from_lock;
1201 edge->to_vertex = to_lock;
1202
1203 /*
1204 * put in adjacency list of from vertex
1205 */
1206
1207 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
1208 edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
1209 edge->edge_adj_prev = &from_lock->l_edge;
1210 from_lock->l_edge.edge_adj_next = edge;
1211
1212 /*
1213 * put in in list of to vertex
1214 */
1215
1216 to_lock->l_edge.edge_in_next->edge_in_prev = edge;
1217 edge->edge_in_next = to_lock->l_edge.edge_in_next;
1218 to_lock->l_edge.edge_in_next = edge;
1219 edge->edge_in_prev = &to_lock->l_edge;
1220
1221
1222 if (update_graph) {
1223 flk_update_proc_graph(edge, 0);
1224 return (0);
1225 }
1226 if (!check_cycle) {
1227 return (0);
1228 }
1229
1230 STACK_PUSH(vertex_stack, from_lock, l_stack);
1231
1232 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1233
1234 STACK_POP(vertex_stack, l_stack);
1235
1236 for (ep = FIRST_ADJ(vertex);
1237 ep != HEAD(vertex);
1238 ep = NEXT_ADJ(ep)) {
1239 if (COLORED(ep->to_vertex))
1240 continue;
1241 COLOR(ep->to_vertex);
1242 if (SAME_OWNER(ep->to_vertex, from_lock))
1243 goto dead_lock;
1244 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1245 }
1246 }
1247 return (0);
1248
1249 dead_lock:
1250
1251 /*
1252 * remove all edges
1253 */
1254
1255 ep = FIRST_ADJ(from_lock);
1256
1257 while (ep != HEAD(from_lock)) {
1258 IN_LIST_REMOVE(ep);
1259 from_lock->l_sedge = NEXT_ADJ(ep);
1260 ADJ_LIST_REMOVE(ep);
1261 flk_free_edge(ep);
1262 ep = from_lock->l_sedge;
1263 }
1264 return (EDEADLK);
1265 }
1266
1267 /*
1268 * Get an edge structure for representing the dependency between two locks.
1269 */
1270
1271 static edge_t *
1272 flk_get_edge()
1273 {
1274 edge_t *ep;
1275
1276 ASSERT(flk_edge_cache != NULL);
1277
1278 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
1279 edge_allocs++;
1280 return (ep);
1281 }
1282
1283 /*
1284 * Free the edge structure.
1285 */
1286
1287 static void
1288 flk_free_edge(edge_t *ep)
1289 {
1290 edge_frees++;
1291 kmem_cache_free(flk_edge_cache, (void *)ep);
1292 }
1293
1294 /*
1295 * Check the relationship of request with lock and perform the
1296 * recomputation of dependencies, break lock if required, and return
1297 * 1 if request cannot have any more relationship with the next
1298 * active locks.
1299 * The 'lock' and 'request' are compared and in case of overlap we
1300 * delete the 'lock' and form new locks to represent the non-overlapped
1301 * portion of original 'lock'. This function has side effects such as
1302 * 'lock' will be freed, new locks will be added to the active list.
1303 */
1304
1305 static int
1306 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
1307 {
1308 int lock_effect;
1309 lock_descriptor_t *lock1, *lock2;
1310 lock_descriptor_t *topology[3];
1311 int nvertex = 0;
1312 int i;
1313 edge_t *ep;
1314 graph_t *gp = (lock->l_graph);
1315
1316
1317 CHECK_SLEEPING_LOCKS(gp);
1318 CHECK_ACTIVE_LOCKS(gp);
1319
1320 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1321
1322 topology[0] = topology[1] = topology[2] = NULL;
1323
1324 if (request->l_type == F_UNLCK)
1325 lock_effect = FLK_UNLOCK;
1326 else if (request->l_type == F_RDLCK &&
1327 lock->l_type == F_WRLCK)
1328 lock_effect = FLK_DOWNGRADE;
1329 else if (request->l_type == F_WRLCK &&
1330 lock->l_type == F_RDLCK)
1331 lock_effect = FLK_UPGRADE;
1332 else
1333 lock_effect = FLK_STAY_SAME;
1334
1335 if (lock->l_end < request->l_start) {
1336 if (lock->l_end == request->l_start - 1 &&
1337 lock_effect == FLK_STAY_SAME) {
1338 topology[0] = request;
1339 request->l_start = lock->l_start;
1340 nvertex = 1;
1341 goto recompute;
1342 } else {
1343 return (0);
1344 }
1345 }
1346
1347 if (lock->l_start > request->l_end) {
1348 if (request->l_end == lock->l_start - 1 &&
1349 lock_effect == FLK_STAY_SAME) {
1350 topology[0] = request;
1351 request->l_end = lock->l_end;
1352 nvertex = 1;
1353 goto recompute;
1354 } else {
1355 return (1);
1356 }
1357 }
1358
1359 if (request->l_end < lock->l_end) {
1360 if (request->l_start > lock->l_start) {
1361 if (lock_effect == FLK_STAY_SAME) {
1362 request->l_start = lock->l_start;
1363 request->l_end = lock->l_end;
1364 topology[0] = request;
1365 nvertex = 1;
1366 } else {
1367 lock1 = flk_get_lock();
1368 lock2 = flk_get_lock();
1369 COPY(lock1, lock);
1370 COPY(lock2, lock);
1371 lock1->l_start = lock->l_start;
1372 lock1->l_end = request->l_start - 1;
1373 lock2->l_start = request->l_end + 1;
1374 lock2->l_end = lock->l_end;
1375 topology[0] = lock1;
1376 topology[1] = lock2;
1377 topology[2] = request;
1378 nvertex = 3;
1379 }
1380 } else if (request->l_start < lock->l_start) {
1381 if (lock_effect == FLK_STAY_SAME) {
1382 request->l_end = lock->l_end;
1383 topology[0] = request;
1384 nvertex = 1;
1385 } else {
1386 lock1 = flk_get_lock();
1387 COPY(lock1, lock);
1388 lock1->l_start = request->l_end + 1;
1389 topology[0] = lock1;
1390 topology[1] = request;
1391 nvertex = 2;
1392 }
1393 } else {
1394 if (lock_effect == FLK_STAY_SAME) {
1395 request->l_start = lock->l_start;
1396 request->l_end = lock->l_end;
1397 topology[0] = request;
1398 nvertex = 1;
1399 } else {
1400 lock1 = flk_get_lock();
1401 COPY(lock1, lock);
1402 lock1->l_start = request->l_end + 1;
1403 topology[0] = lock1;
1404 topology[1] = request;
1405 nvertex = 2;
1406 }
1407 }
1408 } else if (request->l_end > lock->l_end) {
1409 if (request->l_start > lock->l_start) {
1410 if (lock_effect == FLK_STAY_SAME) {
1411 request->l_start = lock->l_start;
1412 topology[0] = request;
1413 nvertex = 1;
1414 } else {
1415 lock1 = flk_get_lock();
1416 COPY(lock1, lock);
1417 lock1->l_end = request->l_start - 1;
1418 topology[0] = lock1;
1419 topology[1] = request;
1420 nvertex = 2;
1421 }
1422 } else if (request->l_start < lock->l_start) {
1423 topology[0] = request;
1424 nvertex = 1;
1425 } else {
1426 topology[0] = request;
1427 nvertex = 1;
1428 }
1429 } else {
1430 if (request->l_start > lock->l_start) {
1431 if (lock_effect == FLK_STAY_SAME) {
1432 request->l_start = lock->l_start;
1433 topology[0] = request;
1434 nvertex = 1;
1435 } else {
1436 lock1 = flk_get_lock();
1437 COPY(lock1, lock);
1438 lock1->l_end = request->l_start - 1;
1439 topology[0] = lock1;
1440 topology[1] = request;
1441 nvertex = 2;
1442 }
1443 } else if (request->l_start < lock->l_start) {
1444 topology[0] = request;
1445 nvertex = 1;
1446 } else {
1447 if (lock_effect != FLK_UNLOCK) {
1448 topology[0] = request;
1449 nvertex = 1;
1450 } else {
1451 flk_delete_active_lock(lock, 0);
1452 flk_wakeup(lock, 1);
1453 flk_free_lock(lock);
1454 CHECK_SLEEPING_LOCKS(gp);
1455 CHECK_ACTIVE_LOCKS(gp);
1456 return (1);
1457 }
1458 }
1459 }
1460
1461 recompute:
1462
1463 /*
1464 * For unlock we don't send the 'request' to for recomputing
1465 * dependencies because no lock will add an edge to this.
1466 */
1467
1468 if (lock_effect == FLK_UNLOCK) {
1469 topology[nvertex-1] = NULL;
1470 nvertex--;
1471 }
1472 for (i = 0; i < nvertex; i++) {
1473 topology[i]->l_state |= RECOMPUTE_LOCK;
1474 topology[i]->l_color = NO_COLOR;
1475 }
1476
1477 ASSERT(FIRST_ADJ(lock) == HEAD(lock));
1478
1479 /*
1480 * we remove the adjacent edges for all vertices' to this vertex
1481 * 'lock'.
1482 */
1483
1484 ep = FIRST_IN(lock);
1485 while (ep != HEAD(lock)) {
1486 ADJ_LIST_REMOVE(ep);
1487 ep = NEXT_IN(ep);
1488 }
1489
1490 flk_delete_active_lock(lock, 0);
1491
1492 /* We are ready for recomputing the dependencies now */
1493
1494 flk_recompute_dependencies(lock, topology, nvertex, 1);
1495
1496 for (i = 0; i < nvertex; i++) {
1497 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1498 topology[i]->l_color = NO_COLOR;
1499 }
1500
1501
1502 if (lock_effect == FLK_UNLOCK) {
1503 nvertex++;
1504 }
1505 for (i = 0; i < nvertex - 1; i++) {
1506 flk_insert_active_lock(topology[i]);
1507 }
1508
1509
1510 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
1511 flk_wakeup(lock, 0);
1512 } else {
1513 ep = FIRST_IN(lock);
1514 while (ep != HEAD(lock)) {
1515 lock->l_sedge = NEXT_IN(ep);
1516 IN_LIST_REMOVE(ep);
1517 flk_update_proc_graph(ep, 1);
1518 flk_free_edge(ep);
1519 ep = lock->l_sedge;
1520 }
1521 }
1522 flk_free_lock(lock);
1523
1524 CHECK_SLEEPING_LOCKS(gp);
1525 CHECK_ACTIVE_LOCKS(gp);
1526 return (0);
1527 }
1528
1529 /*
1530 * Insert a lock into the active queue.
1531 */
1532
1533 static void
1534 flk_insert_active_lock(lock_descriptor_t *new_lock)
1535 {
1536 graph_t *gp = new_lock->l_graph;
1537 vnode_t *vp = new_lock->l_vnode;
1538 lock_descriptor_t *first_lock, *lock;
1539
1540 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1541
1542 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1543 first_lock = lock;
1544
1545 if (first_lock != NULL) {
1546 for (; (lock->l_vnode == vp &&
1547 lock->l_start < new_lock->l_start); lock = lock->l_next)
1548 ;
1549 } else {
1550 lock = ACTIVE_HEAD(gp);
1551 }
1552
1553 lock->l_prev->l_next = new_lock;
1554 new_lock->l_next = lock;
1555 new_lock->l_prev = lock->l_prev;
1556 lock->l_prev = new_lock;
1557
1558 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
1559 vp->v_filocks = (struct filock *)new_lock;
1560 }
1561 flk_set_state(new_lock, FLK_ACTIVE_STATE);
1562 new_lock->l_state |= ACTIVE_LOCK;
1563
1564 CHECK_ACTIVE_LOCKS(gp);
1565 CHECK_SLEEPING_LOCKS(gp);
1566 }
1567
1568 /*
1569 * Delete the active lock : Performs two functions depending on the
1570 * value of second parameter. One is to remove from the active lists
1571 * only and other is to both remove and free the lock.
1572 */
1573
1574 static void
1575 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
1576 {
1577 vnode_t *vp = lock->l_vnode;
1578 graph_t *gp = lock->l_graph;
1579
1580 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1581 if (free_lock)
1582 ASSERT(NO_DEPENDENTS(lock));
1583 ASSERT(NOT_BLOCKED(lock));
1584 ASSERT(IS_ACTIVE(lock));
1585
1586 ASSERT((vp->v_filocks != NULL));
1587
1588 if (vp->v_filocks == (struct filock *)lock) {
1589 vp->v_filocks = (struct filock *)
1590 ((lock->l_next->l_vnode == vp) ? lock->l_next :
1591 NULL);
1592 }
1593 lock->l_next->l_prev = lock->l_prev;
1594 lock->l_prev->l_next = lock->l_next;
1595 lock->l_next = lock->l_prev = NULL;
1596 flk_set_state(lock, FLK_DEAD_STATE);
1597 lock->l_state &= ~ACTIVE_LOCK;
1598
1599 if (free_lock)
1600 flk_free_lock(lock);
1601 CHECK_ACTIVE_LOCKS(gp);
1602 CHECK_SLEEPING_LOCKS(gp);
1603 }
1604
1605 /*
1606 * Insert into the sleep queue.
1607 */
1608
1609 static void
1610 flk_insert_sleeping_lock(lock_descriptor_t *request)
1611 {
1612 graph_t *gp = request->l_graph;
1613 vnode_t *vp = request->l_vnode;
1614 lock_descriptor_t *lock;
1615
1616 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1617 ASSERT(IS_INITIAL(request));
1618
1619 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
1620 lock->l_vnode < vp); lock = lock->l_next)
1621 ;
1622
1623 lock->l_prev->l_next = request;
1624 request->l_prev = lock->l_prev;
1625 lock->l_prev = request;
1626 request->l_next = lock;
1627 flk_set_state(request, FLK_SLEEPING_STATE);
1628 request->l_state |= SLEEPING_LOCK;
1629 }
1630
1631 /*
1632 * Cancelling a sleeping lock implies removing a vertex from the
1633 * dependency graph and therefore we should recompute the dependencies
1634 * of all vertices that have a path to this vertex, w.r.t. all
1635 * vertices reachable from this vertex.
1636 */
1637
1638 void
1639 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
1640 {
1641 graph_t *gp = request->l_graph;
1642 vnode_t *vp = request->l_vnode;
1643 lock_descriptor_t **topology = NULL;
1644 edge_t *ep;
1645 lock_descriptor_t *vertex, *lock;
1646 int nvertex = 0;
1647 int i;
1648 lock_descriptor_t *vertex_stack;
1649
1650 STACK_INIT(vertex_stack);
1651
1652 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1653 /*
1654 * count number of vertex pointers that has to be allocated
1655 * All vertices that are reachable from request.
1656 */
1657
1658 STACK_PUSH(vertex_stack, request, l_stack);
1659
1660 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1661 STACK_POP(vertex_stack, l_stack);
1662 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
1663 ep = NEXT_ADJ(ep)) {
1664 if (IS_RECOMPUTE(ep->to_vertex))
1665 continue;
1666 ep->to_vertex->l_state |= RECOMPUTE_LOCK;
1667 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1668 nvertex++;
1669 }
1670 }
1671
1672 /*
1673 * allocate memory for holding the vertex pointers
1674 */
1675
1676 if (nvertex) {
1677 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
1678 KM_SLEEP);
1679 }
1680
1681 /*
1682 * one more pass to actually store the vertices in the
1683 * allocated array.
1684 * We first check sleeping locks and then active locks
1685 * so that topology array will be in a topological
1686 * order.
1687 */
1688
1689 nvertex = 0;
1690 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1691
1692 if (lock) {
1693 do {
1694 if (IS_RECOMPUTE(lock)) {
1695 lock->l_index = nvertex;
1696 topology[nvertex++] = lock;
1697 }
1698 lock->l_color = NO_COLOR;
1699 lock = lock->l_next;
1700 } while (lock->l_vnode == vp);
1701 }
1702
1703 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1704
1705 if (lock) {
1706 do {
1707 if (IS_RECOMPUTE(lock)) {
1708 lock->l_index = nvertex;
1709 topology[nvertex++] = lock;
1710 }
1711 lock->l_color = NO_COLOR;
1712 lock = lock->l_next;
1713 } while (lock->l_vnode == vp);
1714 }
1715
1716 /*
1717 * remove in and out edges of request
1718 * They are freed after updating proc_graph below.
1719 */
1720
1721 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
1722 ADJ_LIST_REMOVE(ep);
1723 }
1724
1725
1726 if (remove_from_queue)
1727 REMOVE_SLEEP_QUEUE(request);
1728
1729 /* we are ready to recompute */
1730
1731 flk_recompute_dependencies(request, topology, nvertex, 1);
1732
1733 ep = FIRST_ADJ(request);
1734 while (ep != HEAD(request)) {
1735 IN_LIST_REMOVE(ep);
1736 request->l_sedge = NEXT_ADJ(ep);
1737 ADJ_LIST_REMOVE(ep);
1738 flk_update_proc_graph(ep, 1);
1739 flk_free_edge(ep);
1740 ep = request->l_sedge;
1741 }
1742
1743
1744 /*
1745 * unset the RECOMPUTE flag in those vertices
1746 */
1747
1748 for (i = 0; i < nvertex; i++) {
1749 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1750 }
1751
1752 /*
1753 * free the topology
1754 */
1755 if (nvertex)
1756 kmem_free((void *)topology,
1757 (nvertex * sizeof (lock_descriptor_t *)));
1758 /*
1759 * Possibility of some locks unblocked now
1760 */
1761
1762 flk_wakeup(request, 0);
1763
1764 /*
1765 * we expect to have a correctly recomputed graph now.
1766 */
1767 flk_set_state(request, FLK_DEAD_STATE);
1768 flk_free_lock(request);
1769 CHECK_SLEEPING_LOCKS(gp);
1770 CHECK_ACTIVE_LOCKS(gp);
1771
1772 }
1773
1774 /*
1775 * Uncoloring the graph is simply to increment the mark value of the graph
1776 * And only when wrap round takes place will we color all vertices in
1777 * the graph explicitly.
1778 */
1779
1780 static void
1781 flk_graph_uncolor(graph_t *gp)
1782 {
1783 lock_descriptor_t *lock;
1784
1785 if (gp->mark == UINT_MAX) {
1786 gp->mark = 1;
1787 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
1788 lock = lock->l_next)
1789 lock->l_color = 0;
1790
1791 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp);
1792 lock = lock->l_next)
1793 lock->l_color = 0;
1794 } else {
1795 gp->mark++;
1796 }
1797 }
1798
1799 /*
1800 * Wake up locks that are blocked on the given lock.
1801 */
1802
1803 static void
1804 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
1805 {
1806 edge_t *ep;
1807 graph_t *gp = lock->l_graph;
1808 lock_descriptor_t *lck;
1809
1810 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1811 if (NO_DEPENDENTS(lock))
1812 return;
1813 ep = FIRST_IN(lock);
1814 do {
1815 /*
1816 * delete the edge from the adjacency list
1817 * of from vertex. if no more adjacent edges
1818 * for this vertex wake this process.
1819 */
1820 lck = ep->from_vertex;
1821 if (adj_list_remove)
1822 ADJ_LIST_REMOVE(ep);
1823 flk_update_proc_graph(ep, 1);
1824 if (NOT_BLOCKED(lck)) {
1825 GRANT_WAKEUP(lck);
1826 }
1827 lock->l_sedge = NEXT_IN(ep);
1828 IN_LIST_REMOVE(ep);
1829 flk_free_edge(ep);
1830 ep = lock->l_sedge;
1831 } while (ep != HEAD(lock));
1832 ASSERT(NO_DEPENDENTS(lock));
1833 }
1834
1835 /*
1836 * The dependents of request, is checked for its dependency against the
1837 * locks in topology (called topology because the array is and should be in
1838 * topological order for this algorithm, if not in topological order the
1839 * inner loop below might add more edges than necessary. Topological ordering
1840 * of vertices satisfies the property that all edges will be from left to
1841 * right i.e., topology[i] can have an edge to topology[j], iff i<j)
1842 * If lock l1 in the dependent set of request is dependent (blocked by)
1843 * on lock l2 in topology but does not have a path to it, we add an edge
1844 * in the inner loop below.
1845 *
1846 * We don't want to add an edge between l1 and l2 if there exists
1847 * already a path from l1 to l2, so care has to be taken for those vertices
1848 * that have two paths to 'request'. These vertices are referred to here
1849 * as barrier locks.
1850 *
1851 * The barriers has to be found (those vertex that originally had two paths
1852 * to request) because otherwise we may end up adding edges unnecessarily
1853 * to vertices in topology, and thus barrier vertices can have an edge
1854 * to a vertex in topology as well a path to it.
1855 */
1856
1857 static void
1858 flk_recompute_dependencies(lock_descriptor_t *request,
1859 lock_descriptor_t **topology,
1860 int nvertex, int update_graph)
1861 {
1862 lock_descriptor_t *vertex, *lock;
1863 graph_t *gp = request->l_graph;
1864 int i, count;
1865 int barrier_found = 0;
1866 edge_t *ep;
1867 lock_descriptor_t *vertex_stack;
1868
1869 STACK_INIT(vertex_stack);
1870
1871 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1872 if (nvertex == 0)
1873 return;
1874 flk_graph_uncolor(request->l_graph);
1875 barrier_found = flk_find_barriers(request);
1876 request->l_state |= RECOMPUTE_DONE;
1877
1878 STACK_PUSH(vertex_stack, request, l_stack);
1879 request->l_sedge = FIRST_IN(request);
1880
1881
1882 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1883 if (vertex->l_state & RECOMPUTE_DONE) {
1884 count = 0;
1885 goto next_in_edge;
1886 }
1887 if (IS_BARRIER(vertex)) {
1888 /* decrement the barrier count */
1889 if (vertex->l_index) {
1890 vertex->l_index--;
1891 /* this guy will be pushed again anyway ? */
1892 STACK_POP(vertex_stack, l_stack);
1893 if (vertex->l_index == 0) {
1894 /*
1895 * barrier is over we can recompute
1896 * dependencies for this lock in the
1897 * next stack pop
1898 */
1899 vertex->l_state &= ~BARRIER_LOCK;
1900 }
1901 continue;
1902 }
1903 }
1904 vertex->l_state |= RECOMPUTE_DONE;
1905 flk_graph_uncolor(gp);
1906 count = flk_color_reachables(vertex);
1907 for (i = 0; i < nvertex; i++) {
1908 lock = topology[i];
1909 if (COLORED(lock))
1910 continue;
1911 if (BLOCKS(lock, vertex)) {
1912 (void) flk_add_edge(vertex, lock,
1913 NO_CHECK_CYCLE, update_graph);
1914 COLOR(lock);
1915 count++;
1916 count += flk_color_reachables(lock);
1917 }
1918
1919 }
1920
1921 next_in_edge:
1922 if (count == nvertex ||
1923 vertex->l_sedge == HEAD(vertex)) {
1924 /* prune the tree below this */
1925 STACK_POP(vertex_stack, l_stack);
1926 vertex->l_state &= ~RECOMPUTE_DONE;
1927 /* update the barrier locks below this! */
1928 if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
1929 flk_graph_uncolor(gp);
1930 flk_update_barriers(vertex);
1931 }
1932 continue;
1933 }
1934
1935 ep = vertex->l_sedge;
1936 lock = ep->from_vertex;
1937 STACK_PUSH(vertex_stack, lock, l_stack);
1938 lock->l_sedge = FIRST_IN(lock);
1939 vertex->l_sedge = NEXT_IN(ep);
1940 }
1941
1942 }
1943
1944 /*
1945 * Color all reachable vertices from vertex that belongs to topology (here
1946 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored.
1947 *
1948 * Note: we need to use a different stack_link l_stack1 because this is
1949 * called from flk_recompute_dependencies() that already uses a stack with
1950 * l_stack as stack_link.
1951 */
1952
1953 static int
1954 flk_color_reachables(lock_descriptor_t *vertex)
1955 {
1956 lock_descriptor_t *ver, *lock;
1957 int count;
1958 edge_t *ep;
1959 lock_descriptor_t *vertex_stack;
1960
1961 STACK_INIT(vertex_stack);
1962
1963 STACK_PUSH(vertex_stack, vertex, l_stack1);
1964 count = 0;
1965 while ((ver = STACK_TOP(vertex_stack)) != NULL) {
1966
1967 STACK_POP(vertex_stack, l_stack1);
1968 for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
1969 ep = NEXT_ADJ(ep)) {
1970 lock = ep->to_vertex;
1971 if (COLORED(lock))
1972 continue;
1973 COLOR(lock);
1974 if (IS_RECOMPUTE(lock))
1975 count++;
1976 STACK_PUSH(vertex_stack, lock, l_stack1);
1977 }
1978
1979 }
1980 return (count);
1981 }
1982
1983 /*
1984 * Called from flk_recompute_dependencies() this routine decrements
1985 * the barrier count of barrier vertices that are reachable from lock.
1986 */
1987
1988 static void
1989 flk_update_barriers(lock_descriptor_t *lock)
1990 {
1991 lock_descriptor_t *vertex, *lck;
1992 edge_t *ep;
1993 lock_descriptor_t *vertex_stack;
1994
1995 STACK_INIT(vertex_stack);
1996
1997 STACK_PUSH(vertex_stack, lock, l_stack1);
1998
1999 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2000 STACK_POP(vertex_stack, l_stack1);
2001 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2002 ep = NEXT_IN(ep)) {
2003 lck = ep->from_vertex;
2004 if (COLORED(lck)) {
2005 if (IS_BARRIER(lck)) {
2006 ASSERT(lck->l_index > 0);
2007 lck->l_index--;
2008 if (lck->l_index == 0)
2009 lck->l_state &= ~BARRIER_LOCK;
2010 }
2011 continue;
2012 }
2013 COLOR(lck);
2014 if (IS_BARRIER(lck)) {
2015 ASSERT(lck->l_index > 0);
2016 lck->l_index--;
2017 if (lck->l_index == 0)
2018 lck->l_state &= ~BARRIER_LOCK;
2019 }
2020 STACK_PUSH(vertex_stack, lck, l_stack1);
2021 }
2022 }
2023 }
2024
2025 /*
2026 * Finds all vertices that are reachable from 'lock' more than once and
2027 * mark them as barrier vertices and increment their barrier count.
2028 * The barrier count is one minus the total number of paths from lock
2029 * to that vertex.
2030 */
2031
2032 static int
2033 flk_find_barriers(lock_descriptor_t *lock)
2034 {
2035 lock_descriptor_t *vertex, *lck;
2036 int found = 0;
2037 edge_t *ep;
2038 lock_descriptor_t *vertex_stack;
2039
2040 STACK_INIT(vertex_stack);
2041
2042 STACK_PUSH(vertex_stack, lock, l_stack1);
2043
2044 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2045 STACK_POP(vertex_stack, l_stack1);
2046 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2047 ep = NEXT_IN(ep)) {
2048 lck = ep->from_vertex;
2049 if (COLORED(lck)) {
2050 /* this is a barrier */
2051 lck->l_state |= BARRIER_LOCK;
2052 /* index will have barrier count */
2053 lck->l_index++;
2054 if (!found)
2055 found = 1;
2056 continue;
2057 }
2058 COLOR(lck);
2059 lck->l_index = 0;
2060 STACK_PUSH(vertex_stack, lck, l_stack1);
2061 }
2062 }
2063 return (found);
2064 }
2065
2066 /*
2067 * Finds the first lock that is mainly responsible for blocking this
2068 * request. If there is no such lock, request->l_flock.l_type is set to
2069 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars
2070 * of the blocking lock.
2071 *
2072 * Note: It is possible a request is blocked by a sleeping lock because
2073 * of the fairness policy used in flk_process_request() to construct the
2074 * dependencies. (see comments before flk_process_request()).
2075 */
2076
2077 static void
2078 flk_get_first_blocking_lock(lock_descriptor_t *request)
2079 {
2080 graph_t *gp = request->l_graph;
2081 vnode_t *vp = request->l_vnode;
2082 lock_descriptor_t *lock, *blocker;
2083
2084 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2085 blocker = NULL;
2086 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2087
2088 if (lock) {
2089 do {
2090 if (BLOCKS(lock, request)) {
2091 blocker = lock;
2092 break;
2093 }
2094 lock = lock->l_next;
2095 } while (lock->l_vnode == vp);
2096 }
2097
2098 if (blocker) {
2099 report_blocker(blocker, request);
2100 } else
2101 request->l_flock.l_type = F_UNLCK;
2102 }
2103
2104 /*
2105 * Get the graph_t structure associated with a vnode.
2106 * If 'initialize' is non-zero, and the graph_t structure for this vnode has
2107 * not yet been initialized, then a new element is allocated and returned.
2108 */
2109 graph_t *
2110 flk_get_lock_graph(vnode_t *vp, int initialize)
2111 {
2112 graph_t *gp;
2113 graph_t *gp_alloc = NULL;
2114 int index = HASH_INDEX(vp);
2115
2116 if (initialize == FLK_USE_GRAPH) {
2117 mutex_enter(&flock_lock);
2118 gp = lock_graph[index];
2119 mutex_exit(&flock_lock);
2120 return (gp);
2121 }
2122
2123 ASSERT(initialize == FLK_INIT_GRAPH);
2124
2125 if (lock_graph[index] == NULL) {
2126
2127 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
2128
2129 /* Initialize the graph */
2130
2131 gp_alloc->active_locks.l_next =
2132 gp_alloc->active_locks.l_prev =
2133 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
2134 gp_alloc->sleeping_locks.l_next =
2135 gp_alloc->sleeping_locks.l_prev =
2136 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
2137 gp_alloc->index = index;
2138 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
2139 }
2140
2141 mutex_enter(&flock_lock);
2142
2143 gp = lock_graph[index];
2144
2145 /* Recheck the value within flock_lock */
2146 if (gp == NULL) {
2147 struct flock_globals *fg;
2148
2149 /* We must have previously allocated the graph_t structure */
2150 ASSERT(gp_alloc != NULL);
2151 lock_graph[index] = gp = gp_alloc;
2152 /*
2153 * The lockmgr status is only needed if KLM is loaded.
2154 */
2155 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
2156 fg = flk_get_globals();
2157 fg->lockmgr_status[index] = fg->flk_lockmgr_status;
2158 }
2159 }
2160
2161 mutex_exit(&flock_lock);
2162
2163 if ((gp_alloc != NULL) && (gp != gp_alloc)) {
2164 /* There was a race to allocate the graph_t and we lost */
2165 mutex_destroy(&gp_alloc->gp_mutex);
2166 kmem_free(gp_alloc, sizeof (graph_t));
2167 }
2168
2169 return (gp);
2170 }
2171
2172 /*
2173 * PSARC case 1997/292
2174 */
2175 int
2176 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid)
2177 {
2178 lock_descriptor_t *lock;
2179 int result = 0;
2180 graph_t *gp;
2181 int lock_nlmid;
2182
2183 /*
2184 * Check to see if node is booted as a cluster. If not, return.
2185 */
2186 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2187 return (0);
2188 }
2189
2190 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2191 if (gp == NULL) {
2192 return (0);
2193 }
2194
2195 mutex_enter(&gp->gp_mutex);
2196
2197 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2198
2199 if (lock) {
2200 while (lock->l_vnode == vp) {
2201 /* get NLM id from sysid */
2202 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2203
2204 /*
2205 * If NLM server request _and_ nlmid of lock matches
2206 * nlmid of argument, then we've found a remote lock.
2207 */
2208 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2209 result = 1;
2210 goto done;
2211 }
2212 lock = lock->l_next;
2213 }
2214 }
2215
2216 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2217
2218 if (lock) {
2219 while (lock->l_vnode == vp) {
2220 /* get NLM id from sysid */
2221 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2222
2223 /*
2224 * If NLM server request _and_ nlmid of lock matches
2225 * nlmid of argument, then we've found a remote lock.
2226 */
2227 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2228 result = 1;
2229 goto done;
2230 }
2231 lock = lock->l_next;
2232 }
2233 }
2234
2235 done:
2236 mutex_exit(&gp->gp_mutex);
2237 return (result);
2238 }
2239
2240 /*
2241 * Determine whether there are any locks for the given vnode with a remote
2242 * sysid. Returns zero if not, non-zero if there are.
2243 *
2244 * Note that the return value from this function is potentially invalid
2245 * once it has been returned. The caller is responsible for providing its
2246 * own synchronization mechanism to ensure that the return value is useful
2247 * (e.g., see nfs_lockcompletion()).
2248 */
2249 int
2250 flk_has_remote_locks(vnode_t *vp)
2251 {
2252 lock_descriptor_t *lock;
2253 int result = 0;
2254 graph_t *gp;
2255
2256 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2257 if (gp == NULL) {
2258 return (0);
2259 }
2260
2261 mutex_enter(&gp->gp_mutex);
2262
2263 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2264
2265 if (lock) {
2266 while (lock->l_vnode == vp) {
2267 if (IS_REMOTE(lock)) {
2268 result = 1;
2269 goto done;
2270 }
2271 lock = lock->l_next;
2272 }
2273 }
2274
2275 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2276
2277 if (lock) {
2278 while (lock->l_vnode == vp) {
2279 if (IS_REMOTE(lock)) {
2280 result = 1;
2281 goto done;
2282 }
2283 lock = lock->l_next;
2284 }
2285 }
2286
2287 done:
2288 mutex_exit(&gp->gp_mutex);
2289 return (result);
2290 }
2291
2292 /*
2293 * Determine if there are any locks owned by the given sysid.
2294 * Returns zero if not, non-zero if there are. Note that this return code
2295 * could be derived from flk_get_{sleeping,active}_locks, but this routine
2296 * avoids all the memory allocations of those routines.
2297 *
2298 * This routine has the same synchronization issues as
2299 * flk_has_remote_locks.
2300 */
2301
2302 int
2303 flk_sysid_has_locks(int sysid, int lck_type)
2304 {
2305 int has_locks = 0;
2306 lock_descriptor_t *lock;
2307 graph_t *gp;
2308 int i;
2309
2310 for (i = 0; i < HASH_SIZE && !has_locks; i++) {
2311 mutex_enter(&flock_lock);
2312 gp = lock_graph[i];
2313 mutex_exit(&flock_lock);
2314 if (gp == NULL) {
2315 continue;
2316 }
2317
2318 mutex_enter(&gp->gp_mutex);
2319
2320 if (lck_type & FLK_QUERY_ACTIVE) {
2321 for (lock = ACTIVE_HEAD(gp)->l_next;
2322 lock != ACTIVE_HEAD(gp) && !has_locks;
2323 lock = lock->l_next) {
2324 if (lock->l_flock.l_sysid == sysid)
2325 has_locks = 1;
2326 }
2327 }
2328
2329 if (lck_type & FLK_QUERY_SLEEPING) {
2330 for (lock = SLEEPING_HEAD(gp)->l_next;
2331 lock != SLEEPING_HEAD(gp) && !has_locks;
2332 lock = lock->l_next) {
2333 if (lock->l_flock.l_sysid == sysid)
2334 has_locks = 1;
2335 }
2336 }
2337 mutex_exit(&gp->gp_mutex);
2338 }
2339
2340 return (has_locks);
2341 }
2342
2343
2344 /*
2345 * PSARC case 1997/292
2346 *
2347 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit
2348 * quantity, the real sysid generated by the NLM server; the upper half
2349 * identifies the node of the cluster where the NLM server ran.
2350 * This routine is only called by an NLM server running in a cluster.
2351 * Effects: Remove all locks held on behalf of the client identified
2352 * by "sysid."
2353 */
2354 void
2355 cl_flk_remove_locks_by_sysid(int sysid)
2356 {
2357 graph_t *gp;
2358 int i;
2359 lock_descriptor_t *lock, *nlock;
2360
2361 /*
2362 * Check to see if node is booted as a cluster. If not, return.
2363 */
2364 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2365 return;
2366 }
2367
2368 ASSERT(sysid != 0);
2369 for (i = 0; i < HASH_SIZE; i++) {
2370 mutex_enter(&flock_lock);
2371 gp = lock_graph[i];
2372 mutex_exit(&flock_lock);
2373
2374 if (gp == NULL)
2375 continue;
2376
2377 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */
2378
2379 /* signal sleeping requests so that they bail out */
2380 lock = SLEEPING_HEAD(gp)->l_next;
2381 while (lock != SLEEPING_HEAD(gp)) {
2382 nlock = lock->l_next;
2383 if (lock->l_flock.l_sysid == sysid) {
2384 INTERRUPT_WAKEUP(lock);
2385 }
2386 lock = nlock;
2387 }
2388
2389 /* delete active locks */
2390 lock = ACTIVE_HEAD(gp)->l_next;
2391 while (lock != ACTIVE_HEAD(gp)) {
2392 nlock = lock->l_next;
2393 if (lock->l_flock.l_sysid == sysid) {
2394 flk_delete_active_lock(lock, 0);
2395 flk_wakeup(lock, 1);
2396 flk_free_lock(lock);
2397 }
2398 lock = nlock;
2399 }
2400 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */
2401 }
2402 }
2403
2404 /*
2405 * Delete all locks in the system that belongs to the sysid of the request.
2406 */
2407
2408 static void
2409 flk_delete_locks_by_sysid(lock_descriptor_t *request)
2410 {
2411 int sysid = request->l_flock.l_sysid;
2412 lock_descriptor_t *lock, *nlock;
2413 graph_t *gp;
2414 int i;
2415
2416 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
2417 ASSERT(sysid != 0);
2418
2419 mutex_exit(&request->l_graph->gp_mutex);
2420
2421 for (i = 0; i < HASH_SIZE; i++) {
2422 mutex_enter(&flock_lock);
2423 gp = lock_graph[i];
2424 mutex_exit(&flock_lock);
2425
2426 if (gp == NULL)
2427 continue;
2428
2429 mutex_enter(&gp->gp_mutex);
2430
2431 /* signal sleeping requests so that they bail out */
2432 lock = SLEEPING_HEAD(gp)->l_next;
2433 while (lock != SLEEPING_HEAD(gp)) {
2434 nlock = lock->l_next;
2435 if (lock->l_flock.l_sysid == sysid) {
2436 INTERRUPT_WAKEUP(lock);
2437 }
2438 lock = nlock;
2439 }
2440
2441 /* delete active locks */
2442 lock = ACTIVE_HEAD(gp)->l_next;
2443 while (lock != ACTIVE_HEAD(gp)) {
2444 nlock = lock->l_next;
2445 if (lock->l_flock.l_sysid == sysid) {
2446 flk_delete_active_lock(lock, 0);
2447 flk_wakeup(lock, 1);
2448 flk_free_lock(lock);
2449 }
2450 lock = nlock;
2451 }
2452 mutex_exit(&gp->gp_mutex);
2453 }
2454
2455 mutex_enter(&request->l_graph->gp_mutex);
2456 }
2457
2458 /*
2459 * Clustering: Deletes PXFS locks
2460 * Effects: Delete all locks on files in the given file system and with the
2461 * given PXFS id.
2462 */
2463 void
2464 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid)
2465 {
2466 lock_descriptor_t *lock, *nlock;
2467 graph_t *gp;
2468 int i;
2469
2470 for (i = 0; i < HASH_SIZE; i++) {
2471 mutex_enter(&flock_lock);
2472 gp = lock_graph[i];
2473 mutex_exit(&flock_lock);
2474
2475 if (gp == NULL)
2476 continue;
2477
2478 mutex_enter(&gp->gp_mutex);
2479
2480 /* signal sleeping requests so that they bail out */
2481 lock = SLEEPING_HEAD(gp)->l_next;
2482 while (lock != SLEEPING_HEAD(gp)) {
2483 nlock = lock->l_next;
2484 if (lock->l_vnode->v_vfsp == vfsp) {
2485 ASSERT(IS_PXFS(lock));
2486 if (GETPXFSID(lock->l_flock.l_sysid) ==
2487 pxfsid) {
2488 flk_set_state(lock,
2489 FLK_CANCELLED_STATE);
2490 flk_cancel_sleeping_lock(lock, 1);
2491 }
2492 }
2493 lock = nlock;
2494 }
2495
2496 /* delete active locks */
2497 lock = ACTIVE_HEAD(gp)->l_next;
2498 while (lock != ACTIVE_HEAD(gp)) {
2499 nlock = lock->l_next;
2500 if (lock->l_vnode->v_vfsp == vfsp) {
2501 ASSERT(IS_PXFS(lock));
2502 if (GETPXFSID(lock->l_flock.l_sysid) ==
2503 pxfsid) {
2504 flk_delete_active_lock(lock, 0);
2505 flk_wakeup(lock, 1);
2506 flk_free_lock(lock);
2507 }
2508 }
2509 lock = nlock;
2510 }
2511 mutex_exit(&gp->gp_mutex);
2512 }
2513 }
2514
2515 /*
2516 * Search for a sleeping lock manager lock which matches exactly this lock
2517 * request; if one is found, fake a signal to cancel it.
2518 *
2519 * Return 1 if a matching lock was found, 0 otherwise.
2520 */
2521
2522 static int
2523 flk_canceled(lock_descriptor_t *request)
2524 {
2525 lock_descriptor_t *lock, *nlock;
2526 graph_t *gp = request->l_graph;
2527 vnode_t *vp = request->l_vnode;
2528
2529 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2530 ASSERT(IS_LOCKMGR(request));
2531 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2532
2533 if (lock) {
2534 while (lock->l_vnode == vp) {
2535 nlock = lock->l_next;
2536 if (SAME_OWNER(lock, request) &&
2537 lock->l_start == request->l_start &&
2538 lock->l_end == request->l_end) {
2539 INTERRUPT_WAKEUP(lock);
2540 return (1);
2541 }
2542 lock = nlock;
2543 }
2544 }
2545 return (0);
2546 }
2547
2548 /*
2549 * Remove all the locks for the vnode belonging to the given pid and sysid.
2550 */
2551
2552 void
2553 cleanlocks(vnode_t *vp, pid_t pid, int sysid)
2554 {
2555 graph_t *gp;
2556 lock_descriptor_t *lock, *nlock;
2557 lock_descriptor_t *link_stack;
2558
2559 STACK_INIT(link_stack);
2560
2561 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2562
2563 if (gp == NULL)
2564 return;
2565 mutex_enter(&gp->gp_mutex);
2566
2567 CHECK_SLEEPING_LOCKS(gp);
2568 CHECK_ACTIVE_LOCKS(gp);
2569
2570 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2571
2572 if (lock) {
2573 do {
2574 nlock = lock->l_next;
2575 if ((lock->l_flock.l_pid == pid ||
2576 pid == IGN_PID) &&
2577 lock->l_flock.l_sysid == sysid) {
2578 CANCEL_WAKEUP(lock);
2579 }
2580 lock = nlock;
2581 } while (lock->l_vnode == vp);
2582 }
2583
2584 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2585
2586 if (lock) {
2587 do {
2588 nlock = lock->l_next;
2589 if ((lock->l_flock.l_pid == pid ||
2590 pid == IGN_PID) &&
2591 lock->l_flock.l_sysid == sysid) {
2592 flk_delete_active_lock(lock, 0);
2593 STACK_PUSH(link_stack, lock, l_stack);
2594 }
2595 lock = nlock;
2596 } while (lock->l_vnode == vp);
2597 }
2598
2599 while ((lock = STACK_TOP(link_stack)) != NULL) {
2600 STACK_POP(link_stack, l_stack);
2601 flk_wakeup(lock, 1);
2602 flk_free_lock(lock);
2603 }
2604
2605 CHECK_SLEEPING_LOCKS(gp);
2606 CHECK_ACTIVE_LOCKS(gp);
2607 CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
2608 mutex_exit(&gp->gp_mutex);
2609 }
2610
2611
2612 /*
2613 * Called from 'fs' read and write routines for files that have mandatory
2614 * locking enabled.
2615 */
2616
2617 int
2618 chklock(
2619 struct vnode *vp,
2620 int iomode,
2621 u_offset_t offset,
2622 ssize_t len,
2623 int fmode,
2624 caller_context_t *ct)
2625 {
2626 register int i;
2627 struct flock64 bf;
2628 int error = 0;
2629
2630 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
2631 bf.l_whence = 0;
2632 bf.l_start = offset;
2633 bf.l_len = len;
2634 if (ct == NULL) {
2635 bf.l_pid = curproc->p_pid;
2636 bf.l_sysid = 0;
2637 } else {
2638 bf.l_pid = ct->cc_pid;
2639 bf.l_sysid = ct->cc_sysid;
2640 }
2641 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
2642 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
2643 bf.l_type != F_UNLCK)
2644 error = i ? i : EAGAIN;
2645 return (error);
2646 }
2647
2648 /*
2649 * convoff - converts the given data (start, whence) to the
2650 * given whence.
2651 */
2652 int
2653 convoff(vp, lckdat, whence, offset)
2654 struct vnode *vp;
2655 struct flock64 *lckdat;
2656 int whence;
2657 offset_t offset;
2658 {
2659 int error;
2660 struct vattr vattr;
2661
2662 if ((lckdat->l_whence == 2) || (whence == 2)) {
2663 vattr.va_mask = AT_SIZE;
2664 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
2665 return (error);
2666 }
2667
2668 switch (lckdat->l_whence) {
2669 case 1:
2670 lckdat->l_start += offset;
2671 break;
2672 case 2:
2673 lckdat->l_start += vattr.va_size;
2674 /* FALLTHRU */
2675 case 0:
2676 break;
2677 default:
2678 return (EINVAL);
2679 }
2680
2681 if (lckdat->l_start < 0)
2682 return (EINVAL);
2683
2684 switch (whence) {
2685 case 1:
2686 lckdat->l_start -= offset;
2687 break;
2688 case 2:
2689 lckdat->l_start -= vattr.va_size;
2690 /* FALLTHRU */
2691 case 0:
2692 break;
2693 default:
2694 return (EINVAL);
2695 }
2696
2697 lckdat->l_whence = (short)whence;
2698 return (0);
2699 }
2700
2701
2702 /* proc_graph function definitions */
2703
2704 /*
2705 * Function checks for deadlock due to the new 'lock'. If deadlock found
2706 * edges of this lock are freed and returned.
2707 */
2708
2709 static int
2710 flk_check_deadlock(lock_descriptor_t *lock)
2711 {
2712 proc_vertex_t *start_vertex, *pvertex;
2713 proc_vertex_t *dvertex;
2714 proc_edge_t *pep, *ppep;
2715 edge_t *ep, *nep;
2716 proc_vertex_t *process_stack;
2717
2718 STACK_INIT(process_stack);
2719
2720 mutex_enter(&flock_lock);
2721 start_vertex = flk_get_proc_vertex(lock);
2722 ASSERT(start_vertex != NULL);
2723
2724 /* construct the edges from this process to other processes */
2725
2726 ep = FIRST_ADJ(lock);
2727 while (ep != HEAD(lock)) {
2728 proc_vertex_t *adj_proc;
2729
2730 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2731 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
2732 if (pep->to_proc == adj_proc) {
2733 ASSERT(pep->refcount);
2734 pep->refcount++;
2735 break;
2736 }
2737 }
2738 if (pep == NULL) {
2739 pep = flk_get_proc_edge();
2740 pep->to_proc = adj_proc;
2741 pep->refcount = 1;
2742 adj_proc->incount++;
2743 pep->next = start_vertex->edge;
2744 start_vertex->edge = pep;
2745 }
2746 ep = NEXT_ADJ(ep);
2747 }
2748
2749 ep = FIRST_IN(lock);
2750
2751 while (ep != HEAD(lock)) {
2752 proc_vertex_t *in_proc;
2753
2754 in_proc = flk_get_proc_vertex(ep->from_vertex);
2755
2756 for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
2757 if (pep->to_proc == start_vertex) {
2758 ASSERT(pep->refcount);
2759 pep->refcount++;
2760 break;
2761 }
2762 }
2763 if (pep == NULL) {
2764 pep = flk_get_proc_edge();
2765 pep->to_proc = start_vertex;
2766 pep->refcount = 1;
2767 start_vertex->incount++;
2768 pep->next = in_proc->edge;
2769 in_proc->edge = pep;
2770 }
2771 ep = NEXT_IN(ep);
2772 }
2773
2774 if (start_vertex->incount == 0) {
2775 mutex_exit(&flock_lock);
2776 return (0);
2777 }
2778
2779 flk_proc_graph_uncolor();
2780
2781 start_vertex->p_sedge = start_vertex->edge;
2782
2783 STACK_PUSH(process_stack, start_vertex, p_stack);
2784
2785 while ((pvertex = STACK_TOP(process_stack)) != NULL) {
2786 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
2787 dvertex = pep->to_proc;
2788 if (!PROC_ARRIVED(dvertex)) {
2789 STACK_PUSH(process_stack, dvertex, p_stack);
2790 dvertex->p_sedge = dvertex->edge;
2791 PROC_ARRIVE(pvertex);
2792 pvertex->p_sedge = pep->next;
2793 break;
2794 }
2795 if (!PROC_DEPARTED(dvertex))
2796 goto deadlock;
2797 }
2798 if (pep == NULL) {
2799 PROC_DEPART(pvertex);
2800 STACK_POP(process_stack, p_stack);
2801 }
2802 }
2803 mutex_exit(&flock_lock);
2804 return (0);
2805
2806 deadlock:
2807
2808 /* we remove all lock edges and proc edges */
2809
2810 ep = FIRST_ADJ(lock);
2811 while (ep != HEAD(lock)) {
2812 proc_vertex_t *adj_proc;
2813 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2814 nep = NEXT_ADJ(ep);
2815 IN_LIST_REMOVE(ep);
2816 ADJ_LIST_REMOVE(ep);
2817 flk_free_edge(ep);
2818 ppep = start_vertex->edge;
2819 for (pep = start_vertex->edge; pep != NULL; ppep = pep,
2820 pep = ppep->next) {
2821 if (pep->to_proc == adj_proc) {
2822 pep->refcount--;
2823 if (pep->refcount == 0) {
2824 if (pep == ppep) {
2825 start_vertex->edge = pep->next;
2826 } else {
2827 ppep->next = pep->next;
2828 }
2829 adj_proc->incount--;
2830 flk_proc_release(adj_proc);
2831 flk_free_proc_edge(pep);
2832 }
2833 break;
2834 }
2835 }
2836 ep = nep;
2837 }
2838 ep = FIRST_IN(lock);
2839 while (ep != HEAD(lock)) {
2840 proc_vertex_t *in_proc;
2841 in_proc = flk_get_proc_vertex(ep->from_vertex);
2842 nep = NEXT_IN(ep);
2843 IN_LIST_REMOVE(ep);
2844 ADJ_LIST_REMOVE(ep);
2845 flk_free_edge(ep);
2846 ppep = in_proc->edge;
2847 for (pep = in_proc->edge; pep != NULL; ppep = pep,
2848 pep = ppep->next) {
2849 if (pep->to_proc == start_vertex) {
2850 pep->refcount--;
2851 if (pep->refcount == 0) {
2852 if (pep == ppep) {
2853 in_proc->edge = pep->next;
2854 } else {
2855 ppep->next = pep->next;
2856 }
2857 start_vertex->incount--;
2858 flk_proc_release(in_proc);
2859 flk_free_proc_edge(pep);
2860 }
2861 break;
2862 }
2863 }
2864 ep = nep;
2865 }
2866 flk_proc_release(start_vertex);
2867 mutex_exit(&flock_lock);
2868 return (1);
2869 }
2870
2871 /*
2872 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex
2873 * from the list we return that, otherwise we allocate one. If necessary,
2874 * we grow the list of vertices also.
2875 */
2876
2877 static proc_vertex_t *
2878 flk_get_proc_vertex(lock_descriptor_t *lock)
2879 {
2880 int i;
2881 proc_vertex_t *pv;
2882 proc_vertex_t **palloc;
2883
2884 ASSERT(MUTEX_HELD(&flock_lock));
2885 if (lock->pvertex != -1) {
2886 ASSERT(lock->pvertex >= 0);
2887 pv = pgraph.proc[lock->pvertex];
2888 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2889 return (pv);
2890 }
2891 }
2892 for (i = 0; i < pgraph.gcount; i++) {
2893 pv = pgraph.proc[i];
2894 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2895 lock->pvertex = pv->index = i;
2896 return (pv);
2897 }
2898 }
2899 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
2900 pv->pid = lock->l_flock.l_pid;
2901 pv->sysid = lock->l_flock.l_sysid;
2902 flk_proc_vertex_allocs++;
2903 if (pgraph.free != 0) {
2904 for (i = 0; i < pgraph.gcount; i++) {
2905 if (pgraph.proc[i] == NULL) {
2906 pgraph.proc[i] = pv;
2907 lock->pvertex = pv->index = i;
2908 pgraph.free--;
2909 return (pv);
2910 }
2911 }
2912 }
2913 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
2914 sizeof (proc_vertex_t *), KM_SLEEP);
2915
2916 if (pgraph.proc) {
2917 bcopy(pgraph.proc, palloc,
2918 pgraph.gcount * sizeof (proc_vertex_t *));
2919
2920 kmem_free(pgraph.proc,
2921 pgraph.gcount * sizeof (proc_vertex_t *));
2922 }
2923 pgraph.proc = palloc;
2924 pgraph.free += (PROC_CHUNK - 1);
2925 pv->index = lock->pvertex = pgraph.gcount;
2926 pgraph.gcount += PROC_CHUNK;
2927 pgraph.proc[pv->index] = pv;
2928 return (pv);
2929 }
2930
2931 /*
2932 * Allocate a proc edge.
2933 */
2934
2935 static proc_edge_t *
2936 flk_get_proc_edge()
2937 {
2938 proc_edge_t *pep;
2939
2940 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
2941 flk_proc_edge_allocs++;
2942 return (pep);
2943 }
2944
2945 /*
2946 * Free the proc edge. Called whenever its reference count goes to zero.
2947 */
2948
2949 static void
2950 flk_free_proc_edge(proc_edge_t *pep)
2951 {
2952 ASSERT(pep->refcount == 0);
2953 kmem_free((void *)pep, sizeof (proc_edge_t));
2954 flk_proc_edge_frees++;
2955 }
2956
2957 /*
2958 * Color the graph explicitly done only when the mark value hits max value.
2959 */
2960
2961 static void
2962 flk_proc_graph_uncolor()
2963 {
2964 int i;
2965
2966 if (pgraph.mark == UINT_MAX) {
2967 for (i = 0; i < pgraph.gcount; i++)
2968 if (pgraph.proc[i] != NULL) {
2969 pgraph.proc[i]->atime = 0;
2970 pgraph.proc[i]->dtime = 0;
2971 }
2972 pgraph.mark = 1;
2973 } else {
2974 pgraph.mark++;
2975 }
2976 }
2977
2978 /*
2979 * Release the proc vertex iff both there are no in edges and out edges
2980 */
2981
2982 static void
2983 flk_proc_release(proc_vertex_t *proc)
2984 {
2985 ASSERT(MUTEX_HELD(&flock_lock));
2986 if (proc->edge == NULL && proc->incount == 0) {
2987 pgraph.proc[proc->index] = NULL;
2988 pgraph.free++;
2989 kmem_free(proc, sizeof (proc_vertex_t));
2990 flk_proc_vertex_frees++;
2991 }
2992 }
2993
2994 /*
2995 * Updates process graph to reflect change in a lock_graph.
2996 * Note: We should call this function only after we have a correctly
2997 * recomputed lock graph. Otherwise we might miss a deadlock detection.
2998 * eg: in function flk_relation() we call this function after flk_recompute_
2999 * dependencies() otherwise if a process tries to lock a vnode hashed
3000 * into another graph it might sleep for ever.
3001 */
3002
3003 static void
3004 flk_update_proc_graph(edge_t *ep, int delete)
3005 {
3006 proc_vertex_t *toproc, *fromproc;
3007 proc_edge_t *pep, *prevpep;
3008
3009 mutex_enter(&flock_lock);
3010 toproc = flk_get_proc_vertex(ep->to_vertex);
3011 fromproc = flk_get_proc_vertex(ep->from_vertex);
3012
3013 if (!delete)
3014 goto add;
3015 pep = prevpep = fromproc->edge;
3016
3017 ASSERT(pep != NULL);
3018 while (pep != NULL) {
3019 if (pep->to_proc == toproc) {
3020 ASSERT(pep->refcount > 0);
3021 pep->refcount--;
3022 if (pep->refcount == 0) {
3023 if (pep == prevpep) {
3024 fromproc->edge = pep->next;
3025 } else {
3026 prevpep->next = pep->next;
3027 }
3028 toproc->incount--;
3029 flk_proc_release(toproc);
3030 flk_free_proc_edge(pep);
3031 }
3032 break;
3033 }
3034 prevpep = pep;
3035 pep = pep->next;
3036 }
3037 flk_proc_release(fromproc);
3038 mutex_exit(&flock_lock);
3039 return;
3040 add:
3041
3042 pep = fromproc->edge;
3043
3044 while (pep != NULL) {
3045 if (pep->to_proc == toproc) {
3046 ASSERT(pep->refcount > 0);
3047 pep->refcount++;
3048 break;
3049 }
3050 pep = pep->next;
3051 }
3052 if (pep == NULL) {
3053 pep = flk_get_proc_edge();
3054 pep->to_proc = toproc;
3055 pep->refcount = 1;
3056 toproc->incount++;
3057 pep->next = fromproc->edge;
3058 fromproc->edge = pep;
3059 }
3060 mutex_exit(&flock_lock);
3061 }
3062
3063 /*
3064 * Set the control status for lock manager requests.
3065 *
3066 */
3067
3068 /*
3069 * PSARC case 1997/292
3070 *
3071 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid().
3072 * Effects: Set the state of the NLM server identified by "nlmid"
3073 * in the NLM registry to state "nlm_state."
3074 * Raises exception no_such_nlm if "nlmid" doesn't identify a known
3075 * NLM server to this LLM.
3076 * Note that when this routine is called with NLM_SHUTTING_DOWN there
3077 * may be locks requests that have gotten started but not finished. In
3078 * particular, there may be blocking requests that are in the callback code
3079 * before sleeping (so they're not holding the lock for the graph). If
3080 * such a thread reacquires the graph's lock (to go to sleep) after
3081 * NLM state in the NLM registry is set to a non-up value,
3082 * it will notice the status and bail out. If the request gets
3083 * granted before the thread can check the NLM registry, let it
3084 * continue normally. It will get flushed when we are called with NLM_DOWN.
3085 *
3086 * Modifies: nlm_reg_obj (global)
3087 * Arguments:
3088 * nlmid (IN): id uniquely identifying an NLM server
3089 * nlm_state (IN): NLM server state to change "nlmid" to
3090 */
3091 void
3092 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state)
3093 {
3094 /*
3095 * Check to see if node is booted as a cluster. If not, return.
3096 */
3097 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3098 return;
3099 }
3100
3101 /*
3102 * Check for development/debugging. It is possible to boot a node
3103 * in non-cluster mode, and then run a special script, currently
3104 * available only to developers, to bring up the node as part of a
3105 * cluster. The problem is that running such a script does not
3106 * result in the routine flk_init() being called and hence global array
3107 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode,
3108 * but the LLM needs to do an additional check to see if the global
3109 * array has been created or not. If nlm_reg_status is NULL, then
3110 * return, else continue.
3111 */
3112 if (nlm_reg_status == NULL) {
3113 return;
3114 }
3115
3116 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3117 mutex_enter(&nlm_reg_lock);
3118
3119 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) {
3120 /*
3121 * If the NLM server "nlmid" is unknown in the NLM registry,
3122 * add it to the registry in the nlm shutting down state.
3123 */
3124 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3125 FLK_NLM_SHUTTING_DOWN);
3126 } else {
3127 /*
3128 * Change the state of the NLM server identified by "nlmid"
3129 * in the NLM registry to the argument "nlm_state."
3130 */
3131 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3132 nlm_state);
3133 }
3134
3135 /*
3136 * The reason we must register the NLM server that is shutting down
3137 * with an LLM that doesn't already know about it (never sent a lock
3138 * request) is to handle correctly a race between shutdown and a new
3139 * lock request. Suppose that a shutdown request from the NLM server
3140 * invokes this routine at the LLM, and a thread is spawned to
3141 * service the request. Now suppose a new lock request is in
3142 * progress and has already passed the first line of defense in
3143 * reclock(), which denies new locks requests from NLM servers
3144 * that are not in the NLM_UP state. After the current routine
3145 * is invoked for both phases of shutdown, the routine will return,
3146 * having done nothing, and the lock request will proceed and
3147 * probably be granted. The problem is that the shutdown was ignored
3148 * by the lock request because there was no record of that NLM server
3149 * shutting down. We will be in the peculiar position of thinking
3150 * that we've shutdown the NLM server and all locks at all LLMs have
3151 * been discarded, but in fact there's still one lock held.
3152 * The solution is to record the existence of NLM server and change
3153 * its state immediately to NLM_SHUTTING_DOWN. The lock request in
3154 * progress may proceed because the next phase NLM_DOWN will catch
3155 * this lock and discard it.
3156 */
3157 mutex_exit(&nlm_reg_lock);
3158
3159 switch (nlm_state) {
3160 case FLK_NLM_UP:
3161 /*
3162 * Change the NLM state of all locks still held on behalf of
3163 * the NLM server identified by "nlmid" to NLM_UP.
3164 */
3165 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP);
3166 break;
3167
3168 case FLK_NLM_SHUTTING_DOWN:
3169 /*
3170 * Wake up all sleeping locks for the NLM server identified
3171 * by "nlmid." Note that eventually all woken threads will
3172 * have their lock requests cancelled and descriptors
3173 * removed from the sleeping lock list. Note that the NLM
3174 * server state associated with each lock descriptor is
3175 * changed to FLK_NLM_SHUTTING_DOWN.
3176 */
3177 cl_flk_wakeup_sleeping_nlm_locks(nlmid);
3178 break;
3179
3180 case FLK_NLM_DOWN:
3181 /*
3182 * Discard all active, granted locks for this NLM server
3183 * identified by "nlmid."
3184 */
3185 cl_flk_unlock_nlm_granted(nlmid);
3186 break;
3187
3188 default:
3189 panic("cl_set_nlm_status: bad status (%d)", nlm_state);
3190 }
3191 }
3192
3193 /*
3194 * Set the control status for lock manager requests.
3195 *
3196 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there
3197 * may be locks requests that have gotten started but not finished. In
3198 * particular, there may be blocking requests that are in the callback code
3199 * before sleeping (so they're not holding the lock for the graph). If
3200 * such a thread reacquires the graph's lock (to go to sleep) after
3201 * flk_lockmgr_status is set to a non-up value, it will notice the status
3202 * and bail out. If the request gets granted before the thread can check
3203 * flk_lockmgr_status, let it continue normally. It will get flushed when
3204 * we are called with FLK_LOCKMGR_DOWN.
3205 */
3206
3207 void
3208 flk_set_lockmgr_status(flk_lockmgr_status_t status)
3209 {
3210 int i;
3211 graph_t *gp;
3212 struct flock_globals *fg;
3213
3214 fg = flk_get_globals();
3215 ASSERT(fg != NULL);
3216
3217 mutex_enter(&flock_lock);
3218 fg->flk_lockmgr_status = status;
3219 mutex_exit(&flock_lock);
3220
3221 /*
3222 * If the lock manager is coming back up, all that's needed is to
3223 * propagate this information to the graphs. If the lock manager
3224 * is going down, additional action is required, and each graph's
3225 * copy of the state is updated atomically with this other action.
3226 */
3227 switch (status) {
3228 case FLK_LOCKMGR_UP:
3229 for (i = 0; i < HASH_SIZE; i++) {
3230 mutex_enter(&flock_lock);
3231 gp = lock_graph[i];
3232 mutex_exit(&flock_lock);
3233 if (gp == NULL)
3234 continue;
3235 mutex_enter(&gp->gp_mutex);
3236 fg->lockmgr_status[i] = status;
3237 mutex_exit(&gp->gp_mutex);
3238 }
3239 break;
3240 case FLK_WAKEUP_SLEEPERS:
3241 wakeup_sleeping_lockmgr_locks(fg);
3242 break;
3243 case FLK_LOCKMGR_DOWN:
3244 unlock_lockmgr_granted(fg);
3245 break;
3246 default:
3247 panic("flk_set_lockmgr_status: bad status (%d)", status);
3248 break;
3249 }
3250 }
3251
3252 /*
3253 * This routine returns all the locks that are active or sleeping and are
3254 * associated with a particular set of identifiers. If lock_state != 0, then
3255 * only locks that match the lock_state are returned. If lock_state == 0, then
3256 * all locks are returned. If pid == NOPID, the pid is ignored. If
3257 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the
3258 * vnode pointer is ignored.
3259 *
3260 * A list containing the vnode pointer and an flock structure
3261 * describing the lock is returned. Each element in the list is
3262 * dynamically allocated and must be freed by the caller. The
3263 * last item in the list is denoted by a NULL value in the ll_next
3264 * field.
3265 *
3266 * The vnode pointers returned are held. The caller is responsible
3267 * for releasing these. Note that the returned list is only a snapshot of
3268 * the current lock information, and that it is a snapshot of a moving
3269 * target (only one graph is locked at a time).
3270 */
3271
3272 locklist_t *
3273 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
3274 pid_t pid, const vnode_t *vp, zoneid_t zoneid)
3275 {
3276 lock_descriptor_t *lock;
3277 lock_descriptor_t *graph_head;
3278 locklist_t listhead;
3279 locklist_t *llheadp;
3280 locklist_t *llp;
3281 locklist_t *lltp;
3282 graph_t *gp;
3283 int i;
3284 int first_index; /* graph index */
3285 int num_indexes; /* graph index */
3286
3287 ASSERT((list_type == FLK_ACTIVE_STATE) ||
3288 (list_type == FLK_SLEEPING_STATE));
3289
3290 /*
3291 * Get a pointer to something to use as a list head while building
3292 * the rest of the list.
3293 */
3294 llheadp = &listhead;
3295 lltp = llheadp;
3296 llheadp->ll_next = (locklist_t *)NULL;
3297
3298 /* Figure out which graphs we want to look at. */
3299 if (vp == NULL) {
3300 first_index = 0;
3301 num_indexes = HASH_SIZE;
3302 } else {
3303 first_index = HASH_INDEX(vp);
3304 num_indexes = 1;
3305 }
3306
3307 for (i = first_index; i < first_index + num_indexes; i++) {
3308 mutex_enter(&flock_lock);
3309 gp = lock_graph[i];
3310 mutex_exit(&flock_lock);
3311 if (gp == NULL) {
3312 continue;
3313 }
3314
3315 mutex_enter(&gp->gp_mutex);
3316 graph_head = (list_type == FLK_ACTIVE_STATE) ?
3317 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
3318 for (lock = graph_head->l_next;
3319 lock != graph_head;
3320 lock = lock->l_next) {
3321 if (use_sysid && lock->l_flock.l_sysid != sysid)
3322 continue;
3323 if (pid != NOPID && lock->l_flock.l_pid != pid)
3324 continue;
3325 if (vp != NULL && lock->l_vnode != vp)
3326 continue;
3327 if (lock_state && !(lock_state & lock->l_state))
3328 continue;
3329 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
3330 continue;
3331 /*
3332 * A matching lock was found. Allocate
3333 * space for a new locklist entry and fill
3334 * it in.
3335 */
3336 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
3337 lltp->ll_next = llp;
3338 VN_HOLD(lock->l_vnode);
3339 llp->ll_vp = lock->l_vnode;
3340 create_flock(lock, &(llp->ll_flock));
3341 llp->ll_next = (locklist_t *)NULL;
3342 lltp = llp;
3343 }
3344 mutex_exit(&gp->gp_mutex);
3345 }
3346
3347 llp = llheadp->ll_next;
3348 return (llp);
3349 }
3350
3351 /*
3352 * These two functions are simply interfaces to get_lock_list. They return
3353 * a list of sleeping or active locks for the given sysid and pid. See
3354 * get_lock_list for details.
3355 *
3356 * In either case we don't particularly care to specify the zone of interest;
3357 * the sysid-space is global across zones, so the sysid will map to exactly one
3358 * zone, and we'll return information for that zone.
3359 */
3360
3361 locklist_t *
3362 flk_get_sleeping_locks(int sysid, pid_t pid)
3363 {
3364 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
3365 ALL_ZONES));
3366 }
3367
3368 locklist_t *
3369 flk_get_active_locks(int sysid, pid_t pid)
3370 {
3371 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
3372 ALL_ZONES));
3373 }
3374
3375 /*
3376 * Another interface to get_lock_list. This one returns all the active
3377 * locks for a given vnode. Again, see get_lock_list for details.
3378 *
3379 * We don't need to specify which zone's locks we're interested in. The matter
3380 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't
3381 * be used by multiple zones, so the list of locks will all be from the right
3382 * zone.
3383 */
3384
3385 locklist_t *
3386 flk_active_locks_for_vp(const vnode_t *vp)
3387 {
3388 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
3389 ALL_ZONES));
3390 }
3391
3392 /*
3393 * Another interface to get_lock_list. This one returns all the active
3394 * nbmand locks for a given vnode. Again, see get_lock_list for details.
3395 *
3396 * See the comment for flk_active_locks_for_vp() for why we don't care to
3397 * specify the particular zone of interest.
3398 */
3399 locklist_t *
3400 flk_active_nbmand_locks_for_vp(const vnode_t *vp)
3401 {
3402 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3403 NOPID, vp, ALL_ZONES));
3404 }
3405
3406 /*
3407 * Another interface to get_lock_list. This one returns all the active
3408 * nbmand locks for a given pid. Again, see get_lock_list for details.
3409 *
3410 * The zone doesn't need to be specified here; the locks held by a
3411 * particular process will either be local (ie, non-NFS) or from the zone
3412 * the process is executing in. This is because other parts of the system
3413 * ensure that an NFS vnode can't be used in a zone other than that in
3414 * which it was opened.
3415 */
3416 locklist_t *
3417 flk_active_nbmand_locks(pid_t pid)
3418 {
3419 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3420 pid, NULL, ALL_ZONES));
3421 }
3422
3423 /*
3424 * Free up all entries in the locklist.
3425 */
3426 void
3427 flk_free_locklist(locklist_t *llp)
3428 {
3429 locklist_t *next_llp;
3430
3431 while (llp) {
3432 next_llp = llp->ll_next;
3433 VN_RELE(llp->ll_vp);
3434 kmem_free(llp, sizeof (*llp));
3435 llp = next_llp;
3436 }
3437 }
3438
3439 static void
3440 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state)
3441 {
3442 /*
3443 * For each graph "lg" in the hash table lock_graph do
3444 * a. Get the list of sleeping locks
3445 * b. For each lock descriptor in the list do
3446 * i. If the requested lock is an NLM server request AND
3447 * the nlmid is the same as the routine argument then
3448 * change the lock descriptor's state field to
3449 * "nlm_state."
3450 * c. Get the list of active locks
3451 * d. For each lock descriptor in the list do
3452 * i. If the requested lock is an NLM server request AND
3453 * the nlmid is the same as the routine argument then
3454 * change the lock descriptor's state field to
3455 * "nlm_state."
3456 */
3457
3458 int i;
3459 graph_t *gp; /* lock graph */
3460 lock_descriptor_t *lock; /* lock */
3461 lock_descriptor_t *nlock = NULL; /* next lock */
3462 int lock_nlmid;
3463
3464 for (i = 0; i < HASH_SIZE; i++) {
3465 mutex_enter(&flock_lock);
3466 gp = lock_graph[i];
3467 mutex_exit(&flock_lock);
3468 if (gp == NULL) {
3469 continue;
3470 }
3471
3472 /* Get list of sleeping locks in current lock graph. */
3473 mutex_enter(&gp->gp_mutex);
3474 for (lock = SLEEPING_HEAD(gp)->l_next;
3475 lock != SLEEPING_HEAD(gp);
3476 lock = nlock) {
3477 nlock = lock->l_next;
3478 /* get NLM id */
3479 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3480
3481 /*
3482 * If NLM server request AND nlmid of lock matches
3483 * nlmid of argument, then set the NLM state of the
3484 * lock to "nlm_state."
3485 */
3486 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3487 SET_NLM_STATE(lock, nlm_state);
3488 }
3489 }
3490
3491 /* Get list of active locks in current lock graph. */
3492 for (lock = ACTIVE_HEAD(gp)->l_next;
3493 lock != ACTIVE_HEAD(gp);
3494 lock = nlock) {
3495 nlock = lock->l_next;
3496 /* get NLM id */
3497 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3498
3499 /*
3500 * If NLM server request AND nlmid of lock matches
3501 * nlmid of argument, then set the NLM state of the
3502 * lock to "nlm_state."
3503 */
3504 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3505 ASSERT(IS_ACTIVE(lock));
3506 SET_NLM_STATE(lock, nlm_state);
3507 }
3508 }
3509 mutex_exit(&gp->gp_mutex);
3510 }
3511 }
3512
3513 /*
3514 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid().
3515 * Effects: Find all sleeping lock manager requests _only_ for the NLM server
3516 * identified by "nlmid." Poke those lock requests.
3517 */
3518 static void
3519 cl_flk_wakeup_sleeping_nlm_locks(int nlmid)
3520 {
3521 lock_descriptor_t *lock;
3522 lock_descriptor_t *nlock = NULL; /* next lock */
3523 int i;
3524 graph_t *gp;
3525 int lock_nlmid;
3526
3527 for (i = 0; i < HASH_SIZE; i++) {
3528 mutex_enter(&flock_lock);
3529 gp = lock_graph[i];
3530 mutex_exit(&flock_lock);
3531 if (gp == NULL) {
3532 continue;
3533 }
3534
3535 mutex_enter(&gp->gp_mutex);
3536 for (lock = SLEEPING_HEAD(gp)->l_next;
3537 lock != SLEEPING_HEAD(gp);
3538 lock = nlock) {
3539 nlock = lock->l_next;
3540 /*
3541 * If NLM server request _and_ nlmid of lock matches
3542 * nlmid of argument, then set the NLM state of the
3543 * lock to NLM_SHUTTING_DOWN, and wake up sleeping
3544 * request.
3545 */
3546 if (IS_LOCKMGR(lock)) {
3547 /* get NLM id */
3548 lock_nlmid =
3549 GETNLMID(lock->l_flock.l_sysid);
3550 if (nlmid == lock_nlmid) {
3551 SET_NLM_STATE(lock,
3552 FLK_NLM_SHUTTING_DOWN);
3553 INTERRUPT_WAKEUP(lock);
3554 }
3555 }
3556 }
3557 mutex_exit(&gp->gp_mutex);
3558 }
3559 }
3560
3561 /*
3562 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid()
3563 * Effects: Find all active (granted) lock manager locks _only_ for the
3564 * NLM server identified by "nlmid" and release them.
3565 */
3566 static void
3567 cl_flk_unlock_nlm_granted(int nlmid)
3568 {
3569 lock_descriptor_t *lock;
3570 lock_descriptor_t *nlock = NULL; /* next lock */
3571 int i;
3572 graph_t *gp;
3573 int lock_nlmid;
3574
3575 for (i = 0; i < HASH_SIZE; i++) {
3576 mutex_enter(&flock_lock);
3577 gp = lock_graph[i];
3578 mutex_exit(&flock_lock);
3579 if (gp == NULL) {
3580 continue;
3581 }
3582
3583 mutex_enter(&gp->gp_mutex);
3584 for (lock = ACTIVE_HEAD(gp)->l_next;
3585 lock != ACTIVE_HEAD(gp);
3586 lock = nlock) {
3587 nlock = lock->l_next;
3588 ASSERT(IS_ACTIVE(lock));
3589
3590 /*
3591 * If it's an NLM server request _and_ nlmid of
3592 * the lock matches nlmid of argument, then
3593 * remove the active lock the list, wakup blocked
3594 * threads, and free the storage for the lock.
3595 * Note that there's no need to mark the NLM state
3596 * of this lock to NLM_DOWN because the lock will
3597 * be deleted anyway and its storage freed.
3598 */
3599 if (IS_LOCKMGR(lock)) {
3600 /* get NLM id */
3601 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3602 if (nlmid == lock_nlmid) {
3603 flk_delete_active_lock(lock, 0);
3604 flk_wakeup(lock, 1);
3605 flk_free_lock(lock);
3606 }
3607 }
3608 }
3609 mutex_exit(&gp->gp_mutex);
3610 }
3611 }
3612
3613 /*
3614 * Find all sleeping lock manager requests and poke them.
3615 */
3616 static void
3617 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
3618 {
3619 lock_descriptor_t *lock;
3620 lock_descriptor_t *nlock = NULL; /* next lock */
3621 int i;
3622 graph_t *gp;
3623 zoneid_t zoneid = getzoneid();
3624
3625 for (i = 0; i < HASH_SIZE; i++) {
3626 mutex_enter(&flock_lock);
3627 gp = lock_graph[i];
3628 mutex_exit(&flock_lock);
3629 if (gp == NULL) {
3630 continue;
3631 }
3632
3633 mutex_enter(&gp->gp_mutex);
3634 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
3635 for (lock = SLEEPING_HEAD(gp)->l_next;
3636 lock != SLEEPING_HEAD(gp);
3637 lock = nlock) {
3638 nlock = lock->l_next;
3639 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3640 INTERRUPT_WAKEUP(lock);
3641 }
3642 }
3643 mutex_exit(&gp->gp_mutex);
3644 }
3645 }
3646
3647
3648 /*
3649 * Find all active (granted) lock manager locks and release them.
3650 */
3651 static void
3652 unlock_lockmgr_granted(struct flock_globals *fg)
3653 {
3654 lock_descriptor_t *lock;
3655 lock_descriptor_t *nlock = NULL; /* next lock */
3656 int i;
3657 graph_t *gp;
3658 zoneid_t zoneid = getzoneid();
3659
3660 for (i = 0; i < HASH_SIZE; i++) {
3661 mutex_enter(&flock_lock);
3662 gp = lock_graph[i];
3663 mutex_exit(&flock_lock);
3664 if (gp == NULL) {
3665 continue;
3666 }
3667
3668 mutex_enter(&gp->gp_mutex);
3669 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
3670 for (lock = ACTIVE_HEAD(gp)->l_next;
3671 lock != ACTIVE_HEAD(gp);
3672 lock = nlock) {
3673 nlock = lock->l_next;
3674 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3675 ASSERT(IS_ACTIVE(lock));
3676 flk_delete_active_lock(lock, 0);
3677 flk_wakeup(lock, 1);
3678 flk_free_lock(lock);
3679 }
3680 }
3681 mutex_exit(&gp->gp_mutex);
3682 }
3683 }
3684
3685
3686 /*
3687 * Wait until a lock is granted, cancelled, or interrupted.
3688 */
3689
3690 static void
3691 wait_for_lock(lock_descriptor_t *request)
3692 {
3693 graph_t *gp = request->l_graph;
3694
3695 ASSERT(MUTEX_HELD(&gp->gp_mutex));
3696
3697 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
3698 !(IS_INTERRUPTED(request))) {
3699 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
3700 flk_set_state(request, FLK_INTERRUPTED_STATE);
3701 request->l_state |= INTERRUPTED_LOCK;
3702 }
3703 }
3704 }
3705
3706 /*
3707 * Create an flock structure from the existing lock information
3708 *
3709 * This routine is used to create flock structures for the lock manager
3710 * to use in a reclaim request. Since the lock was originated on this
3711 * host, it must be conforming to UNIX semantics, so no checking is
3712 * done to make sure it falls within the lower half of the 32-bit range.
3713 */
3714
3715 static void
3716 create_flock(lock_descriptor_t *lp, flock64_t *flp)
3717 {
3718 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
3719 ASSERT(lp->l_end >= lp->l_start);
3720
3721 flp->l_type = lp->l_type;
3722 flp->l_whence = 0;
3723 flp->l_start = lp->l_start;
3724 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
3725 (lp->l_end - lp->l_start + 1);
3726 flp->l_sysid = lp->l_flock.l_sysid;
3727 flp->l_pid = lp->l_flock.l_pid;
3728 }
3729
3730 /*
3731 * Convert flock_t data describing a lock range into unsigned long starting
3732 * and ending points, which are put into lock_request. Returns 0 or an
3733 * errno value.
3734 * Large Files: max is passed by the caller and we return EOVERFLOW
3735 * as defined by LFS API.
3736 */
3737
3738 int
3739 flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
3740 u_offset_t *start, u_offset_t *end, offset_t offset)
3741 {
3742 struct vattr vattr;
3743 int error;
3744
3745 /*
3746 * Determine the starting point of the request
3747 */
3748 switch (flp->l_whence) {
3749 case 0: /* SEEK_SET */
3750 *start = (u_offset_t)flp->l_start;
3751 break;
3752 case 1: /* SEEK_CUR */
3753 *start = (u_offset_t)(flp->l_start + offset);
3754 break;
3755 case 2: /* SEEK_END */
3756 vattr.va_mask = AT_SIZE;
3757 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
3758 return (error);
3759 *start = (u_offset_t)(flp->l_start + vattr.va_size);
3760 break;
3761 default:
3762 return (EINVAL);
3763 }
3764
3765 /*
3766 * Determine the range covered by the request.
3767 */
3768 if (flp->l_len == 0)
3769 *end = MAX_U_OFFSET_T;
3770 else if ((offset_t)flp->l_len > 0) {
3771 *end = (u_offset_t)(*start + (flp->l_len - 1));
3772 } else {
3773 /*
3774 * Negative length; why do we even allow this ?
3775 * Because this allows easy specification of
3776 * the last n bytes of the file.
3777 */
3778 *end = *start;
3779 *start += (u_offset_t)flp->l_len;
3780 (*start)++;
3781 }
3782 return (0);
3783 }
3784
3785 /*
3786 * Check the validity of lock data. This can used by the NFS
3787 * frlock routines to check data before contacting the server. The
3788 * server must support semantics that aren't as restrictive as
3789 * the UNIX API, so the NFS client is required to check.
3790 * The maximum is now passed in by the caller.
3791 */
3792
3793 int
3794 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
3795 {
3796 /*
3797 * The end (length) for local locking should never be greater
3798 * than MAXEND. However, the representation for
3799 * the entire file is MAX_U_OFFSET_T.
3800 */
3801 if ((start > max) ||
3802 ((end > max) && (end != MAX_U_OFFSET_T))) {
3803 return (EINVAL);
3804 }
3805 if (start > end) {
3806 return (EINVAL);
3807 }
3808 return (0);
3809 }
3810
3811 /*
3812 * Fill in request->l_flock with information about the lock blocking the
3813 * request. The complexity here is that lock manager requests are allowed
3814 * to see into the upper part of the 32-bit address range, whereas local
3815 * requests are only allowed to see signed values.
3816 *
3817 * What should be done when "blocker" is a lock manager lock that uses the
3818 * upper portion of the 32-bit range, but "request" is local? Since the
3819 * request has already been determined to have been blocked by the blocker,
3820 * at least some portion of "blocker" must be in the range of the request,
3821 * or the request extends to the end of file. For the first case, the
3822 * portion in the lower range is returned with the indication that it goes
3823 * "to EOF." For the second case, the last byte of the lower range is
3824 * returned with the indication that it goes "to EOF."
3825 */
3826
3827 static void
3828 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
3829 {
3830 flock64_t *flrp; /* l_flock portion of request */
3831
3832 ASSERT(blocker != NULL);
3833
3834 flrp = &request->l_flock;
3835 flrp->l_whence = 0;
3836 flrp->l_type = blocker->l_type;
3837 flrp->l_pid = blocker->l_flock.l_pid;
3838 flrp->l_sysid = blocker->l_flock.l_sysid;
3839
3840 if (IS_LOCKMGR(request)) {
3841 flrp->l_start = blocker->l_start;
3842 if (blocker->l_end == MAX_U_OFFSET_T)
3843 flrp->l_len = 0;
3844 else
3845 flrp->l_len = blocker->l_end - blocker->l_start + 1;
3846 } else {
3847 if (blocker->l_start > MAXEND) {
3848 flrp->l_start = MAXEND;
3849 flrp->l_len = 0;
3850 } else {
3851 flrp->l_start = blocker->l_start;
3852 if (blocker->l_end == MAX_U_OFFSET_T)
3853 flrp->l_len = 0;
3854 else
3855 flrp->l_len = blocker->l_end -
3856 blocker->l_start + 1;
3857 }
3858 }
3859 }
3860
3861 /*
3862 * PSARC case 1997/292
3863 */
3864 /*
3865 * This is the public routine exported by flock.h.
3866 */
3867 void
3868 cl_flk_change_nlm_state_to_unknown(int nlmid)
3869 {
3870 /*
3871 * Check to see if node is booted as a cluster. If not, return.
3872 */
3873 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3874 return;
3875 }
3876
3877 /*
3878 * See comment in cl_flk_set_nlm_status().
3879 */
3880 if (nlm_reg_status == NULL) {
3881 return;
3882 }
3883
3884 /*
3885 * protect NLM registry state with a mutex.
3886 */
3887 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3888 mutex_enter(&nlm_reg_lock);
3889 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN);
3890 mutex_exit(&nlm_reg_lock);
3891 }
3892
3893 /*
3894 * Return non-zero if the given I/O request conflicts with an active NBMAND
3895 * lock.
3896 * If svmand is non-zero, it means look at all active locks, not just NBMAND
3897 * locks.
3898 */
3899
3900 int
3901 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset,
3902 ssize_t length, int svmand, caller_context_t *ct)
3903 {
3904 int conflict = 0;
3905 graph_t *gp;
3906 lock_descriptor_t *lock;
3907 pid_t pid;
3908 int sysid;
3909
3910 if (ct == NULL) {
3911 pid = curproc->p_pid;
3912 sysid = 0;
3913 } else {
3914 pid = ct->cc_pid;
3915 sysid = ct->cc_sysid;
3916 }
3917
3918 mutex_enter(&flock_lock);
3919 gp = lock_graph[HASH_INDEX(vp)];
3920 mutex_exit(&flock_lock);
3921 if (gp == NULL)
3922 return (0);
3923
3924 mutex_enter(&gp->gp_mutex);
3925 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
3926
3927 for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
3928 if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
3929 (lock->l_flock.l_sysid != sysid ||
3930 lock->l_flock.l_pid != pid) &&
3931 lock_blocks_io(op, offset, length,
3932 lock->l_type, lock->l_start, lock->l_end)) {
3933 conflict = 1;
3934 break;
3935 }
3936 }
3937 mutex_exit(&gp->gp_mutex);
3938
3939 return (conflict);
3940 }
3941
3942 /*
3943 * Return non-zero if the given I/O request conflicts with the given lock.
3944 */
3945
3946 static int
3947 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length,
3948 int lock_type, u_offset_t lock_start, u_offset_t lock_end)
3949 {
3950 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
3951 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
3952
3953 if (op == NBL_READ && lock_type == F_RDLCK)
3954 return (0);
3955
3956 if (offset <= lock_start && lock_start < offset + length)
3957 return (1);
3958 if (lock_start <= offset && offset <= lock_end)
3959 return (1);
3960
3961 return (0);
3962 }
3963
3964 #ifdef DEBUG
3965 static void
3966 check_active_locks(graph_t *gp)
3967 {
3968 lock_descriptor_t *lock, *lock1;
3969 edge_t *ep;
3970
3971 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
3972 lock = lock->l_next) {
3973 ASSERT(IS_ACTIVE(lock));
3974 ASSERT(NOT_BLOCKED(lock));
3975 ASSERT(!IS_BARRIER(lock));
3976
3977 ep = FIRST_IN(lock);
3978
3979 while (ep != HEAD(lock)) {
3980 ASSERT(IS_SLEEPING(ep->from_vertex));
3981 ASSERT(!NOT_BLOCKED(ep->from_vertex));
3982 ep = NEXT_IN(ep);
3983 }
3984
3985 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
3986 lock1 = lock1->l_next) {
3987 if (lock1->l_vnode == lock->l_vnode) {
3988 if (BLOCKS(lock1, lock)) {
3989 cmn_err(CE_PANIC,
3990 "active lock %p blocks %p",
3991 (void *)lock1, (void *)lock);
3992 } else if (BLOCKS(lock, lock1)) {
3993 cmn_err(CE_PANIC,
3994 "active lock %p blocks %p",
3995 (void *)lock, (void *)lock1);
3996 }
3997 }
3998 }
3999 }
4000 }
4001
4002 /*
4003 * Effect: This functions checks to see if the transition from 'old_state' to
4004 * 'new_state' is a valid one. It returns 0 if the transition is valid
4005 * and 1 if it is not.
4006 * For a map of valid transitions, see sys/flock_impl.h
4007 */
4008 static int
4009 check_lock_transition(int old_state, int new_state)
4010 {
4011 switch (old_state) {
4012 case FLK_INITIAL_STATE:
4013 if ((new_state == FLK_START_STATE) ||
4014 (new_state == FLK_SLEEPING_STATE) ||
4015 (new_state == FLK_ACTIVE_STATE) ||
4016 (new_state == FLK_DEAD_STATE)) {
4017 return (0);
4018 } else {
4019 return (1);
4020 }
4021 case FLK_START_STATE:
4022 if ((new_state == FLK_ACTIVE_STATE) ||
4023 (new_state == FLK_DEAD_STATE)) {
4024 return (0);
4025 } else {
4026 return (1);
4027 }
4028 case FLK_ACTIVE_STATE:
4029 if (new_state == FLK_DEAD_STATE) {
4030 return (0);
4031 } else {
4032 return (1);
4033 }
4034 case FLK_SLEEPING_STATE:
4035 if ((new_state == FLK_GRANTED_STATE) ||
4036 (new_state == FLK_INTERRUPTED_STATE) ||
4037 (new_state == FLK_CANCELLED_STATE)) {
4038 return (0);
4039 } else {
4040 return (1);
4041 }
4042 case FLK_GRANTED_STATE:
4043 if ((new_state == FLK_START_STATE) ||
4044 (new_state == FLK_INTERRUPTED_STATE) ||
4045 (new_state == FLK_CANCELLED_STATE)) {
4046 return (0);
4047 } else {
4048 return (1);
4049 }
4050 case FLK_CANCELLED_STATE:
4051 if ((new_state == FLK_INTERRUPTED_STATE) ||
4052 (new_state == FLK_DEAD_STATE)) {
4053 return (0);
4054 } else {
4055 return (1);
4056 }
4057 case FLK_INTERRUPTED_STATE:
4058 if (new_state == FLK_DEAD_STATE) {
4059 return (0);
4060 } else {
4061 return (1);
4062 }
4063 case FLK_DEAD_STATE:
4064 /* May be set more than once */
4065 if (new_state == FLK_DEAD_STATE) {
4066 return (0);
4067 } else {
4068 return (1);
4069 }
4070 default:
4071 return (1);
4072 }
4073 }
4074
4075 static void
4076 check_sleeping_locks(graph_t *gp)
4077 {
4078 lock_descriptor_t *lock1, *lock2;
4079 edge_t *ep;
4080 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
4081 lock1 = lock1->l_next) {
4082 ASSERT(!IS_BARRIER(lock1));
4083 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
4084 lock2 = lock2->l_next) {
4085 if (lock1->l_vnode == lock2->l_vnode) {
4086 if (BLOCKS(lock2, lock1)) {
4087 ASSERT(!IS_GRANTED(lock1));
4088 ASSERT(!NOT_BLOCKED(lock1));
4089 path(lock1, lock2);
4090 }
4091 }
4092 }
4093
4094 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
4095 lock2 = lock2->l_next) {
4096 ASSERT(!IS_BARRIER(lock1));
4097 if (lock1->l_vnode == lock2->l_vnode) {
4098 if (BLOCKS(lock2, lock1)) {
4099 ASSERT(!IS_GRANTED(lock1));
4100 ASSERT(!NOT_BLOCKED(lock1));
4101 path(lock1, lock2);
4102 }
4103 }
4104 }
4105 ep = FIRST_ADJ(lock1);
4106 while (ep != HEAD(lock1)) {
4107 ASSERT(BLOCKS(ep->to_vertex, lock1));
4108 ep = NEXT_ADJ(ep);
4109 }
4110 }
4111 }
4112
4113 static int
4114 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
4115 {
4116 edge_t *ep;
4117 lock_descriptor_t *vertex;
4118 lock_descriptor_t *vertex_stack;
4119
4120 STACK_INIT(vertex_stack);
4121
4122 flk_graph_uncolor(lock1->l_graph);
4123 ep = FIRST_ADJ(lock1);
4124 ASSERT(ep != HEAD(lock1));
4125 while (ep != HEAD(lock1)) {
4126 if (no_path)
4127 ASSERT(ep->to_vertex != lock2);
4128 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4129 COLOR(ep->to_vertex);
4130 ep = NEXT_ADJ(ep);
4131 }
4132
4133 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
4134 STACK_POP(vertex_stack, l_dstack);
4135 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
4136 ep = NEXT_ADJ(ep)) {
4137 if (COLORED(ep->to_vertex))
4138 continue;
4139 COLOR(ep->to_vertex);
4140 if (ep->to_vertex == lock2)
4141 return (1);
4142
4143 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4144 }
4145 }
4146 return (0);
4147 }
4148
4149 static void
4150 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
4151 {
4152 lock_descriptor_t *lock;
4153
4154 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4155
4156 if (lock) {
4157 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
4158 if (lock->l_flock.l_pid == pid &&
4159 lock->l_flock.l_sysid == sysid)
4160 cmn_err(CE_PANIC,
4161 "owner pid %d's lock %p in active queue",
4162 pid, (void *)lock);
4163 lock = lock->l_next;
4164 }
4165 }
4166 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
4167
4168 if (lock) {
4169 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
4170 if (lock->l_flock.l_pid == pid &&
4171 lock->l_flock.l_sysid == sysid)
4172 cmn_err(CE_PANIC,
4173 "owner pid %d's lock %p in sleep queue",
4174 pid, (void *)lock);
4175 lock = lock->l_next;
4176 }
4177 }
4178 }
4179
4180 static int
4181 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4182 {
4183 edge_t *ep = FIRST_ADJ(lock1);
4184
4185 while (ep != HEAD(lock1)) {
4186 if (ep->to_vertex == lock2)
4187 return (1);
4188 else
4189 ep = NEXT_ADJ(ep);
4190 }
4191 return (0);
4192 }
4193
4194 static int
4195 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4196 {
4197 return (!level_two_path(lock1, lock2, 1));
4198 }
4199
4200 static void
4201 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4202 {
4203 if (level_one_path(lock1, lock2)) {
4204 if (level_two_path(lock1, lock2, 0) != 0) {
4205 cmn_err(CE_WARN,
4206 "one edge one path from lock1 %p lock2 %p",
4207 (void *)lock1, (void *)lock2);
4208 }
4209 } else if (no_path(lock1, lock2)) {
4210 cmn_err(CE_PANIC,
4211 "No path from lock1 %p to lock2 %p",
4212 (void *)lock1, (void *)lock2);
4213 }
4214 }
4215 #endif /* DEBUG */