1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 /*
31 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
32 */
33
34 #include <sys/flock_impl.h>
35 #include <sys/vfs.h>
36 #include <sys/t_lock.h> /* for <sys/callb.h> */
37 #include <sys/callb.h>
38 #include <sys/clconf.h>
39 #include <sys/cladm.h>
40 #include <sys/nbmlock.h>
41 #include <sys/cred.h>
42 #include <sys/policy.h>
43
44 /*
45 * The following four variables are for statistics purposes and they are
46 * not protected by locks. They may not be accurate but will at least be
47 * close to the actual value.
48 */
49
50 int flk_lock_allocs;
51 int flk_lock_frees;
52 int edge_allocs;
53 int edge_frees;
54 int flk_proc_vertex_allocs;
55 int flk_proc_edge_allocs;
56 int flk_proc_vertex_frees;
57 int flk_proc_edge_frees;
58
59 static kmutex_t flock_lock;
60
61 #ifdef DEBUG
62 int check_debug = 0;
63 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \
64 check_active_locks(gp);
65 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \
66 check_sleeping_locks(gp);
67 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \
68 if (check_debug) \
69 check_owner_locks(gp, pid, sysid, vp);
70 #define CHECK_LOCK_TRANSITION(old_state, new_state) \
71 { \
72 if (check_lock_transition(old_state, new_state)) { \
73 cmn_err(CE_PANIC, "Illegal lock transition \
74 from %d to %d", old_state, new_state); \
75 } \
76 }
77 #else
78
79 #define CHECK_ACTIVE_LOCKS(gp)
80 #define CHECK_SLEEPING_LOCKS(gp)
81 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
82 #define CHECK_LOCK_TRANSITION(old_state, new_state)
83
84 #endif /* DEBUG */
85
86 struct kmem_cache *flk_edge_cache;
87
88 graph_t *lock_graph[HASH_SIZE];
89 proc_graph_t pgraph;
90
91 /*
92 * Clustering.
93 *
94 * NLM REGISTRY TYPE IMPLEMENTATION
95 *
96 * Assumptions:
97 * 1. Nodes in a cluster are numbered starting at 1; always non-negative
98 * integers; maximum node id is returned by clconf_maximum_nodeid().
99 * 2. We use this node id to identify the node an NLM server runs on.
100 */
101
102 /*
103 * NLM registry object keeps track of NLM servers via their
104 * nlmids (which are the node ids of the node in the cluster they run on)
105 * that have requested locks at this LLM with which this registry is
106 * associated.
107 *
108 * Representation of abstraction:
109 * rep = record[ states: array[nlm_state],
110 * lock: mutex]
111 *
112 * Representation invariants:
113 * 1. index i of rep.states is between 0 and n - 1 where n is number
114 * of elements in the array, which happen to be the maximum number
115 * of nodes in the cluster configuration + 1.
116 * 2. map nlmid to index i of rep.states
117 * 0 -> 0
118 * 1 -> 1
119 * 2 -> 2
120 * n-1 -> clconf_maximum_nodeid()+1
121 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting
122 * from forgetting to subtract 1 from the index.
123 * 4. The reason we keep the 0th index is the following. A legitimate
124 * cluster configuration includes making a UFS file system NFS
125 * exportable. The code is structured so that if you're in a cluster
126 * you do one thing; otherwise, you do something else. The problem
127 * is what to do if you think you're in a cluster with PXFS loaded,
128 * but you're using UFS not PXFS? The upper two bytes of the sysid
129 * encode the node id of the node where NLM server runs; these bytes
130 * are zero for UFS. Since the nodeid is used to index into the
131 * registry, we can record the NLM server state information at index
132 * 0 using the same mechanism used for PXFS file locks!
133 */
134 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */
135 static kmutex_t nlm_reg_lock; /* lock to protect arrary */
136 static uint_t nlm_status_size; /* size of state array */
137
138 /*
139 * Although we need a global lock dependency graph (and associated data
140 * structures), we also need a per-zone notion of whether the lock manager is
141 * running, and so whether to allow lock manager requests or not.
142 *
143 * Thus, on a per-zone basis we maintain a ``global'' variable
144 * (flk_lockmgr_status), protected by flock_lock, and set when the lock
145 * manager is determined to be changing state (starting or stopping).
146 *
147 * Each graph/zone pair also has a copy of this variable, which is protected by
148 * the graph's mutex.
149 *
150 * The per-graph copies are used to synchronize lock requests with shutdown
151 * requests. The global copy is used to initialize the per-graph field when a
152 * new graph is created.
153 */
154 struct flock_globals {
155 flk_lockmgr_status_t flk_lockmgr_status;
156 flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
157 };
158
159 zone_key_t flock_zone_key;
160
161 static void create_flock(lock_descriptor_t *, flock64_t *);
162 static lock_descriptor_t *flk_get_lock(void);
163 static void flk_free_lock(lock_descriptor_t *lock);
164 static void flk_get_first_blocking_lock(lock_descriptor_t *request);
165 static int flk_process_request(lock_descriptor_t *);
166 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
167 static edge_t *flk_get_edge(void);
168 static int flk_wait_execute_request(lock_descriptor_t *);
169 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
170 static void flk_insert_active_lock(lock_descriptor_t *);
171 static void flk_delete_active_lock(lock_descriptor_t *, int);
172 static void flk_insert_sleeping_lock(lock_descriptor_t *);
173 static void flk_graph_uncolor(graph_t *);
174 static void flk_wakeup(lock_descriptor_t *, int);
175 static void flk_free_edge(edge_t *);
176 static void flk_recompute_dependencies(lock_descriptor_t *,
177 lock_descriptor_t **, int, int);
178 static int flk_find_barriers(lock_descriptor_t *);
179 static void flk_update_barriers(lock_descriptor_t *);
180 static int flk_color_reachables(lock_descriptor_t *);
181 static int flk_canceled(lock_descriptor_t *);
182 static void flk_delete_locks_by_sysid(lock_descriptor_t *);
183 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
184 static void wait_for_lock(lock_descriptor_t *);
185 static void unlock_lockmgr_granted(struct flock_globals *);
186 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
187
188 /* Clustering hooks */
189 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t);
190 static void cl_flk_wakeup_sleeping_nlm_locks(int);
191 static void cl_flk_unlock_nlm_granted(int);
192
193 #ifdef DEBUG
194 static int check_lock_transition(int, int);
195 static void check_sleeping_locks(graph_t *);
196 static void check_active_locks(graph_t *);
197 static int no_path(lock_descriptor_t *, lock_descriptor_t *);
198 static void path(lock_descriptor_t *, lock_descriptor_t *);
199 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
200 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
201 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
202 #endif
203
204 /* proc_graph function definitions */
205 static int flk_check_deadlock(lock_descriptor_t *);
206 static void flk_proc_graph_uncolor(void);
207 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
208 static proc_edge_t *flk_get_proc_edge(void);
209 static void flk_proc_release(proc_vertex_t *);
210 static void flk_free_proc_edge(proc_edge_t *);
211 static void flk_update_proc_graph(edge_t *, int);
212
213 /* Non-blocking mandatory locking */
214 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t,
215 u_offset_t);
216
217 static struct flock_globals *
218 flk_get_globals(void)
219 {
220 /*
221 * The KLM module had better be loaded if we're attempting to handle
222 * lockmgr requests.
223 */
224 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
225 return (zone_getspecific(flock_zone_key, curproc->p_zone));
226 }
227
228 static flk_lockmgr_status_t
229 flk_get_lockmgr_status(void)
230 {
231 struct flock_globals *fg;
232
233 ASSERT(MUTEX_HELD(&flock_lock));
234
235 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
236 /*
237 * KLM module not loaded; lock manager definitely not running.
238 */
239 return (FLK_LOCKMGR_DOWN);
240 }
241 fg = flk_get_globals();
242 return (fg->flk_lockmgr_status);
243 }
244
245 /*
246 * Routine called from fs_frlock in fs/fs_subr.c
247 */
248
249 int
250 reclock(vnode_t *vp,
251 flock64_t *lckdat,
252 int cmd,
253 int flag,
254 u_offset_t offset,
255 flk_callback_t *flk_cbp)
256 {
257 lock_descriptor_t stack_lock_request;
258 lock_descriptor_t *lock_request;
259 int error = 0;
260 graph_t *gp;
261 int nlmid;
262
263 /*
264 * Check access permissions
265 */
266 if ((cmd & SETFLCK) &&
267 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
268 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
269 return (EBADF);
270
271 /*
272 * for query and unlock we use the stack_lock_request
273 */
274
275 if ((lckdat->l_type == F_UNLCK) ||
276 !((cmd & INOFLCK) || (cmd & SETFLCK))) {
277 lock_request = &stack_lock_request;
278 (void) bzero((caddr_t)lock_request,
279 sizeof (lock_descriptor_t));
280
281 /*
282 * following is added to make the assertions in
283 * flk_execute_request() to pass through
284 */
285
286 lock_request->l_edge.edge_in_next = &lock_request->l_edge;
287 lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
288 lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
289 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
290 lock_request->l_status = FLK_INITIAL_STATE;
291 } else {
292 lock_request = flk_get_lock();
293 }
294 lock_request->l_state = 0;
295 lock_request->l_vnode = vp;
296 lock_request->l_zoneid = getzoneid();
297
298 /*
299 * Convert the request range into the canonical start and end
300 * values. The NLM protocol supports locking over the entire
301 * 32-bit range, so there's no range checking for remote requests,
302 * but we still need to verify that local requests obey the rules.
303 */
304 /* Clustering */
305 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) {
306 ASSERT(lckdat->l_whence == 0);
307 lock_request->l_start = lckdat->l_start;
308 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
309 lckdat->l_start + (lckdat->l_len - 1);
310 } else {
311 /* check the validity of the lock range */
312 error = flk_convert_lock_data(vp, lckdat,
313 &lock_request->l_start, &lock_request->l_end,
314 offset);
315 if (error) {
316 goto done;
317 }
318 error = flk_check_lock_data(lock_request->l_start,
319 lock_request->l_end, MAXEND);
320 if (error) {
321 goto done;
322 }
323 }
324
325 ASSERT(lock_request->l_end >= lock_request->l_start);
326
327 lock_request->l_type = lckdat->l_type;
328 if (cmd & INOFLCK)
329 lock_request->l_state |= IO_LOCK;
330 if (cmd & SLPFLCK)
331 lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
332 if (cmd & RCMDLCK)
333 lock_request->l_state |= LOCKMGR_LOCK;
334 if (cmd & NBMLCK)
335 lock_request->l_state |= NBMAND_LOCK;
336 /*
337 * Clustering: set flag for PXFS locks
338 * We do not _only_ check for the PCMDLCK flag because PXFS locks could
339 * also be of type 'RCMDLCK'.
340 * We do not _only_ check the GETPXFSID() macro because local PXFS
341 * clients use a pxfsid of zero to permit deadlock detection in the LLM.
342 */
343
344 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) {
345 lock_request->l_state |= PXFS_LOCK;
346 }
347 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
348 if (lock_request->l_type == F_RDLCK ||
349 lock_request->l_type == F_WRLCK)
350 lock_request->l_state |= QUERY_LOCK;
351 }
352 lock_request->l_flock = (*lckdat);
353 lock_request->l_callbacks = flk_cbp;
354
355 /*
356 * We are ready for processing the request
357 */
358 if (IS_LOCKMGR(lock_request)) {
359 /*
360 * If the lock request is an NLM server request ....
361 */
362 if (nlm_status_size == 0) { /* not booted as cluster */
363 mutex_enter(&flock_lock);
364 /*
365 * Bail out if this is a lock manager request and the
366 * lock manager is not supposed to be running.
367 */
368 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
369 mutex_exit(&flock_lock);
370 error = ENOLCK;
371 goto done;
372 }
373 mutex_exit(&flock_lock);
374 } else { /* booted as a cluster */
375 nlmid = GETNLMID(lock_request->l_flock.l_sysid);
376 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
377
378 mutex_enter(&nlm_reg_lock);
379 /*
380 * If the NLM registry does not know about this
381 * NLM server making the request, add its nlmid
382 * to the registry.
383 */
384 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
385 nlmid)) {
386 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
387 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
388 nlmid)) {
389 /*
390 * If the NLM server is already known (has made
391 * previous lock requests) and its state is
392 * not NLM_UP (means that NLM server is
393 * shutting down), then bail out with an
394 * error to deny the lock request.
395 */
396 mutex_exit(&nlm_reg_lock);
397 error = ENOLCK;
398 goto done;
399 }
400 mutex_exit(&nlm_reg_lock);
401 }
402 }
403
404 /* Now get the lock graph for a particular vnode */
405 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
406
407 /*
408 * We drop rwlock here otherwise this might end up causing a
409 * deadlock if this IOLOCK sleeps. (bugid # 1183392).
410 */
411
412 if (IS_IO_LOCK(lock_request)) {
413 VOP_RWUNLOCK(vp,
414 (lock_request->l_type == F_RDLCK) ?
415 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
416 }
417 mutex_enter(&gp->gp_mutex);
418
419 lock_request->l_state |= REFERENCED_LOCK;
420 lock_request->l_graph = gp;
421
422 switch (lock_request->l_type) {
423 case F_RDLCK:
424 case F_WRLCK:
425 if (IS_QUERY_LOCK(lock_request)) {
426 flk_get_first_blocking_lock(lock_request);
427 (*lckdat) = lock_request->l_flock;
428 break;
429 }
430
431 /* process the request now */
432
433 error = flk_process_request(lock_request);
434 break;
435
436 case F_UNLCK:
437 /* unlock request will not block so execute it immediately */
438
439 if (IS_LOCKMGR(lock_request) &&
440 flk_canceled(lock_request)) {
441 error = 0;
442 } else {
443 error = flk_execute_request(lock_request);
444 }
445 break;
446
447 case F_UNLKSYS:
448 /*
449 * Recovery mechanism to release lock manager locks when
450 * NFS client crashes and restart. NFS server will clear
451 * old locks and grant new locks.
452 */
453
454 if (lock_request->l_flock.l_sysid == 0) {
455 mutex_exit(&gp->gp_mutex);
456 return (EINVAL);
457 }
458 if (secpolicy_nfs(CRED()) != 0) {
459 mutex_exit(&gp->gp_mutex);
460 return (EPERM);
461 }
462 flk_delete_locks_by_sysid(lock_request);
463 lock_request->l_state &= ~REFERENCED_LOCK;
464 flk_set_state(lock_request, FLK_DEAD_STATE);
465 flk_free_lock(lock_request);
466 mutex_exit(&gp->gp_mutex);
467 return (0);
468
469 default:
470 error = EINVAL;
471 break;
472 }
473
474 /* Clustering: For blocked PXFS locks, return */
475 if (error == PXFS_LOCK_BLOCKED) {
476 lock_request->l_state &= ~REFERENCED_LOCK;
477 mutex_exit(&gp->gp_mutex);
478 return (error);
479 }
480
481 /*
482 * Now that we have seen the status of locks in the system for
483 * this vnode we acquire the rwlock if it is an IO_LOCK.
484 */
485
486 if (IS_IO_LOCK(lock_request)) {
487 (void) VOP_RWLOCK(vp,
488 (lock_request->l_type == F_RDLCK) ?
489 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
490 if (!error) {
491 lckdat->l_type = F_UNLCK;
492
493 /*
494 * This wake up is needed otherwise
495 * if IO_LOCK has slept the dependents on this
496 * will not be woken up at all. (bugid # 1185482).
497 */
498
499 flk_wakeup(lock_request, 1);
500 flk_set_state(lock_request, FLK_DEAD_STATE);
501 flk_free_lock(lock_request);
502 }
503 /*
504 * else if error had occurred either flk_process_request()
505 * has returned EDEADLK in which case there will be no
506 * dependents for this lock or EINTR from flk_wait_execute_
507 * request() in which case flk_cancel_sleeping_lock()
508 * would have been done. same is true with EBADF.
509 */
510 }
511
512 if (lock_request == &stack_lock_request) {
513 flk_set_state(lock_request, FLK_DEAD_STATE);
514 } else {
515 lock_request->l_state &= ~REFERENCED_LOCK;
516 if ((error != 0) || IS_DELETED(lock_request)) {
517 flk_set_state(lock_request, FLK_DEAD_STATE);
518 flk_free_lock(lock_request);
519 }
520 }
521
522 mutex_exit(&gp->gp_mutex);
523 return (error);
524
525 done:
526 flk_set_state(lock_request, FLK_DEAD_STATE);
527 if (lock_request != &stack_lock_request)
528 flk_free_lock(lock_request);
529 return (error);
530 }
531
532 /*
533 * Invoke the callbacks in the given list. If before sleeping, invoke in
534 * list order. If after sleeping, invoke in reverse order.
535 *
536 * CPR (suspend/resume) support: if one of the callbacks returns a
537 * callb_cpr_t, return it. This will be used to make the thread CPR-safe
538 * while it is sleeping. There should be at most one callb_cpr_t for the
539 * thread.
540 * XXX This is unnecessarily complicated. The CPR information should just
541 * get passed in directly through VOP_FRLOCK and reclock, rather than
542 * sneaking it in via a callback.
543 */
544
545 callb_cpr_t *
546 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
547 {
548 callb_cpr_t *cpr_callbackp = NULL;
549 callb_cpr_t *one_result;
550 flk_callback_t *cb;
551
552 if (cblist == NULL)
553 return (NULL);
554
555 if (when == FLK_BEFORE_SLEEP) {
556 cb = cblist;
557 do {
558 one_result = (*cb->cb_callback)(when, cb->cb_data);
559 if (one_result != NULL) {
560 ASSERT(cpr_callbackp == NULL);
561 cpr_callbackp = one_result;
562 }
563 cb = cb->cb_next;
564 } while (cb != cblist);
565 } else {
566 cb = cblist->cb_prev;
567 do {
568 one_result = (*cb->cb_callback)(when, cb->cb_data);
569 if (one_result != NULL) {
570 cpr_callbackp = one_result;
571 }
572 cb = cb->cb_prev;
573 } while (cb != cblist->cb_prev);
574 }
575
576 return (cpr_callbackp);
577 }
578
579 /*
580 * Initialize a flk_callback_t to hold the given callback.
581 */
582
583 void
584 flk_init_callback(flk_callback_t *flk_cb,
585 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
586 {
587 flk_cb->cb_next = flk_cb;
588 flk_cb->cb_prev = flk_cb;
589 flk_cb->cb_callback = cb_fcn;
590 flk_cb->cb_data = cbdata;
591 }
592
593 /*
594 * Initialize an flk_callback_t and then link it into the head of an
595 * existing list (which may be NULL).
596 */
597
598 void
599 flk_add_callback(flk_callback_t *newcb,
600 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
601 void *cbdata, flk_callback_t *cblist)
602 {
603 flk_init_callback(newcb, cb_fcn, cbdata);
604
605 if (cblist == NULL)
606 return;
607
608 newcb->cb_prev = cblist->cb_prev;
609 newcb->cb_next = cblist;
610 cblist->cb_prev->cb_next = newcb;
611 cblist->cb_prev = newcb;
612 }
613
614 /*
615 * Initialize the flk_edge_cache data structure and create the
616 * nlm_reg_status array.
617 */
618
619 void
620 flk_init(void)
621 {
622 uint_t i;
623
624 flk_edge_cache = kmem_cache_create("flk_edges",
625 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
626 if (flk_edge_cache == NULL) {
627 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
628 }
629 /*
630 * Create the NLM registry object.
631 */
632
633 if (cluster_bootflags & CLUSTER_BOOTED) {
634 /*
635 * This routine tells you the maximum node id that will be used
636 * in the cluster. This number will be the size of the nlm
637 * registry status array. We add 1 because we will be using
638 * all entries indexed from 0 to maxnodeid; e.g., from 0
639 * to 64, for a total of 65 entries.
640 */
641 nlm_status_size = clconf_maximum_nodeid() + 1;
642 } else {
643 nlm_status_size = 0;
644 }
645
646 if (nlm_status_size != 0) { /* booted as a cluster */
647 nlm_reg_status = (flk_nlm_status_t *)
648 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size,
649 KM_SLEEP);
650
651 /* initialize all NLM states in array to NLM_UNKNOWN */
652 for (i = 0; i < nlm_status_size; i++) {
653 nlm_reg_status[i] = FLK_NLM_UNKNOWN;
654 }
655 }
656 }
657
658 /*
659 * Zone constructor/destructor callbacks to be executed when a zone is
660 * created/destroyed.
661 */
662 /* ARGSUSED */
663 void *
664 flk_zone_init(zoneid_t zoneid)
665 {
666 struct flock_globals *fg;
667 uint_t i;
668
669 fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
670 fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
671 for (i = 0; i < HASH_SIZE; i++)
672 fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
673 return (fg);
674 }
675
676 /* ARGSUSED */
677 void
678 flk_zone_fini(zoneid_t zoneid, void *data)
679 {
680 struct flock_globals *fg = data;
681
682 kmem_free(fg, sizeof (*fg));
683 }
684
685 /*
686 * Get a lock_descriptor structure with initialization of edge lists.
687 */
688
689 static lock_descriptor_t *
690 flk_get_lock(void)
691 {
692 lock_descriptor_t *l;
693
694 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
695
696 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
697 l->l_edge.edge_in_next = &l->l_edge;
698 l->l_edge.edge_in_prev = &l->l_edge;
699 l->l_edge.edge_adj_next = &l->l_edge;
700 l->l_edge.edge_adj_prev = &l->l_edge;
701 l->pvertex = -1;
702 l->l_status = FLK_INITIAL_STATE;
703 flk_lock_allocs++;
704 return (l);
705 }
706
707 /*
708 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag
709 * when some thread has a reference to it as in reclock().
710 */
711
712 void
713 flk_free_lock(lock_descriptor_t *lock)
714 {
715 ASSERT(IS_DEAD(lock));
716 if (IS_REFERENCED(lock)) {
717 lock->l_state |= DELETED_LOCK;
718 return;
719 }
720 flk_lock_frees++;
721 kmem_free((void *)lock, sizeof (lock_descriptor_t));
722 }
723
724 void
725 flk_set_state(lock_descriptor_t *lock, int new_state)
726 {
727 /*
728 * Locks in the sleeping list may be woken up in a number of ways,
729 * and more than once. If a sleeping lock is signaled awake more
730 * than once, then it may or may not change state depending on its
731 * current state.
732 * Also note that NLM locks that are sleeping could be moved to an
733 * interrupted state more than once if the unlock request is
734 * retransmitted by the NLM client - the second time around, this is
735 * just a nop.
736 * The ordering of being signaled awake is:
737 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE.
738 * The checks below implement this ordering.
739 */
740 if (IS_INTERRUPTED(lock)) {
741 if ((new_state == FLK_CANCELLED_STATE) ||
742 (new_state == FLK_GRANTED_STATE) ||
743 (new_state == FLK_INTERRUPTED_STATE)) {
744 return;
745 }
746 }
747 if (IS_CANCELLED(lock)) {
748 if ((new_state == FLK_GRANTED_STATE) ||
749 (new_state == FLK_CANCELLED_STATE)) {
750 return;
751 }
752 }
753 CHECK_LOCK_TRANSITION(lock->l_status, new_state);
754 if (IS_PXFS(lock)) {
755 cl_flk_state_transition_notify(lock, lock->l_status, new_state);
756 }
757 lock->l_status = new_state;
758 }
759
760 /*
761 * Routine that checks whether there are any blocking locks in the system.
762 *
763 * The policy followed is if a write lock is sleeping we don't allow read
764 * locks before this write lock even though there may not be any active
765 * locks corresponding to the read locks' region.
766 *
767 * flk_add_edge() function adds an edge between l1 and l2 iff there
768 * is no path between l1 and l2. This is done to have a "minimum
769 * storage representation" of the dependency graph.
770 *
771 * Another property of the graph is since only the new request throws
772 * edges to the existing locks in the graph, the graph is always topologically
773 * ordered.
774 */
775
776 static int
777 flk_process_request(lock_descriptor_t *request)
778 {
779 graph_t *gp = request->l_graph;
780 lock_descriptor_t *lock;
781 int request_blocked_by_active = 0;
782 int request_blocked_by_granted = 0;
783 int request_blocked_by_sleeping = 0;
784 vnode_t *vp = request->l_vnode;
785 int error = 0;
786 int request_will_wait = 0;
787 int found_covering_lock = 0;
788 lock_descriptor_t *covered_by = NULL;
789
790 ASSERT(MUTEX_HELD(&gp->gp_mutex));
791 request_will_wait = IS_WILLING_TO_SLEEP(request);
792
793 /*
794 * check active locks
795 */
796
797 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
798
799
800 if (lock) {
801 do {
802 if (BLOCKS(lock, request)) {
803 if (!request_will_wait)
804 return (EAGAIN);
805 request_blocked_by_active = 1;
806 break;
807 }
808 /*
809 * Grant lock if it is for the same owner holding active
810 * lock that covers the request.
811 */
812
813 if (SAME_OWNER(lock, request) &&
814 COVERS(lock, request) &&
815 (request->l_type == F_RDLCK))
816 return (flk_execute_request(request));
817 lock = lock->l_next;
818 } while (lock->l_vnode == vp);
819 }
820
821 if (!request_blocked_by_active) {
822 lock_descriptor_t *lk[1];
823 lock_descriptor_t *first_glock = NULL;
824 /*
825 * Shall we grant this?! NO!!
826 * What about those locks that were just granted and still
827 * in sleep queue. Those threads are woken up and so locks
828 * are almost active.
829 */
830 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
831 if (lock) {
832 do {
833 if (BLOCKS(lock, request)) {
834 if (IS_GRANTED(lock)) {
835 request_blocked_by_granted = 1;
836 } else {
837 request_blocked_by_sleeping = 1;
838 }
839 }
840
841 lock = lock->l_next;
842 } while ((lock->l_vnode == vp));
843 first_glock = lock->l_prev;
844 ASSERT(first_glock->l_vnode == vp);
845 }
846
847 if (request_blocked_by_granted)
848 goto block;
849
850 if (!request_blocked_by_sleeping) {
851 /*
852 * If the request isn't going to be blocked by a
853 * sleeping request, we know that it isn't going to
854 * be blocked; we can just execute the request --
855 * without performing costly deadlock detection.
856 */
857 ASSERT(!request_blocked_by_active);
858 return (flk_execute_request(request));
859 } else if (request->l_type == F_RDLCK) {
860 /*
861 * If we have a sleeping writer in the requested
862 * lock's range, block.
863 */
864 goto block;
865 }
866
867 lk[0] = request;
868 request->l_state |= RECOMPUTE_LOCK;
869 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
870 if (lock) {
871 do {
872 flk_recompute_dependencies(lock, lk, 1, 0);
873 lock = lock->l_next;
874 } while (lock->l_vnode == vp);
875 }
876 lock = first_glock;
877 if (lock) {
878 do {
879 if (IS_GRANTED(lock)) {
880 flk_recompute_dependencies(lock, lk, 1, 0);
881 }
882 lock = lock->l_prev;
883 } while ((lock->l_vnode == vp));
884 }
885 request->l_state &= ~RECOMPUTE_LOCK;
886 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
887 return (EDEADLK);
888 return (flk_execute_request(request));
889 }
890
891 block:
892 if (request_will_wait)
893 flk_graph_uncolor(gp);
894
895 /* check sleeping locks */
896
897 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
898
899 /*
900 * If we find a sleeping write lock that is a superset of the
901 * region wanted by request we can be assured that by adding an
902 * edge to this write lock we have paths to all locks in the
903 * graph that blocks the request except in one case and that is why
904 * another check for SAME_OWNER in the loop below. The exception
905 * case is when this process that owns the sleeping write lock 'l1'
906 * has other locks l2, l3, l4 that are in the system and arrived
907 * before l1. l1 does not have path to these locks as they are from
908 * same process. We break when we find a second covering sleeping
909 * lock l5 owned by a process different from that owning l1, because
910 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if
911 * it has l1 would have produced a deadlock already.
912 */
913
914 if (lock) {
915 do {
916 if (BLOCKS(lock, request)) {
917 if (!request_will_wait)
918 return (EAGAIN);
919 if (COVERS(lock, request) &&
920 lock->l_type == F_WRLCK) {
921 if (found_covering_lock &&
922 !SAME_OWNER(lock, covered_by)) {
923 found_covering_lock++;
924 break;
925 }
926 found_covering_lock = 1;
927 covered_by = lock;
928 }
929 if (found_covering_lock &&
930 !SAME_OWNER(lock, covered_by)) {
931 lock = lock->l_next;
932 continue;
933 }
934 if ((error = flk_add_edge(request, lock,
935 !found_covering_lock, 0)))
936 return (error);
937 }
938 lock = lock->l_next;
939 } while (lock->l_vnode == vp);
940 }
941
942 /*
943 * found_covering_lock == 2 iff at this point 'request' has paths
944 * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
945 * point 'request' has paths to all locks that blocks 'request' whose owners
946 * are not same as the one that covers 'request' (covered_by above) and
947 * we can have locks whose owner is same as covered_by in the active list.
948 */
949
950 if (request_blocked_by_active && found_covering_lock != 2) {
951 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
952 ASSERT(lock != NULL);
953 do {
954 if (BLOCKS(lock, request)) {
955 if (found_covering_lock &&
956 !SAME_OWNER(lock, covered_by)) {
957 lock = lock->l_next;
958 continue;
959 }
960 if ((error = flk_add_edge(request, lock,
961 CHECK_CYCLE, 0)))
962 return (error);
963 }
964 lock = lock->l_next;
965 } while (lock->l_vnode == vp);
966 }
967
968 if (NOT_BLOCKED(request)) {
969 /*
970 * request not dependent on any other locks
971 * so execute this request
972 */
973 return (flk_execute_request(request));
974 } else {
975 /*
976 * check for deadlock
977 */
978 if (flk_check_deadlock(request))
979 return (EDEADLK);
980 /*
981 * this thread has to sleep
982 */
983 return (flk_wait_execute_request(request));
984 }
985 }
986
987 /*
988 * The actual execution of the request in the simple case is only to
989 * insert the 'request' in the list of active locks if it is not an
990 * UNLOCK.
991 * We have to consider the existing active locks' relation to
992 * this 'request' if they are owned by same process. flk_relation() does
993 * this job and sees to that the dependency graph information is maintained
994 * properly.
995 */
996
997 int
998 flk_execute_request(lock_descriptor_t *request)
999 {
1000 graph_t *gp = request->l_graph;
1001 vnode_t *vp = request->l_vnode;
1002 lock_descriptor_t *lock, *lock1;
1003 int done_searching = 0;
1004
1005 CHECK_SLEEPING_LOCKS(gp);
1006 CHECK_ACTIVE_LOCKS(gp);
1007
1008 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1009
1010 flk_set_state(request, FLK_START_STATE);
1011
1012 ASSERT(NOT_BLOCKED(request));
1013
1014 /* IO_LOCK requests are only to check status */
1015
1016 if (IS_IO_LOCK(request))
1017 return (0);
1018
1019 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1020
1021 if (lock == NULL && request->l_type == F_UNLCK)
1022 return (0);
1023 if (lock == NULL) {
1024 flk_insert_active_lock(request);
1025 return (0);
1026 }
1027
1028 do {
1029 lock1 = lock->l_next;
1030 if (SAME_OWNER(request, lock)) {
1031 done_searching = flk_relation(lock, request);
1032 }
1033 lock = lock1;
1034 } while (lock->l_vnode == vp && !done_searching);
1035
1036 /*
1037 * insert in active queue
1038 */
1039
1040 if (request->l_type != F_UNLCK)
1041 flk_insert_active_lock(request);
1042
1043 return (0);
1044 }
1045
1046 /*
1047 * 'request' is blocked by some one therefore we put it into sleep queue.
1048 */
1049 static int
1050 flk_wait_execute_request(lock_descriptor_t *request)
1051 {
1052 graph_t *gp = request->l_graph;
1053 callb_cpr_t *cprp; /* CPR info from callback */
1054 struct flock_globals *fg;
1055 int index;
1056
1057 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1058 ASSERT(IS_WILLING_TO_SLEEP(request));
1059
1060 flk_insert_sleeping_lock(request);
1061
1062 if (IS_LOCKMGR(request)) {
1063 index = HASH_INDEX(request->l_vnode);
1064 fg = flk_get_globals();
1065
1066 if (nlm_status_size == 0) { /* not booted as a cluster */
1067 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
1068 flk_cancel_sleeping_lock(request, 1);
1069 return (ENOLCK);
1070 }
1071 } else { /* booted as a cluster */
1072 /*
1073 * If the request is an NLM server lock request,
1074 * and the NLM state of the lock request is not
1075 * NLM_UP (because the NLM server is shutting
1076 * down), then cancel the sleeping lock and
1077 * return error ENOLCK that will encourage the
1078 * client to retransmit.
1079 */
1080 if (!IS_NLM_UP(request)) {
1081 flk_cancel_sleeping_lock(request, 1);
1082 return (ENOLCK);
1083 }
1084 }
1085 }
1086
1087 /* Clustering: For blocking PXFS locks, return */
1088 if (IS_PXFS(request)) {
1089 /*
1090 * PXFS locks sleep on the client side.
1091 * The callback argument is used to wake up the sleeper
1092 * when the lock is granted.
1093 * We return -1 (rather than an errno value) to indicate
1094 * the client side should sleep
1095 */
1096 return (PXFS_LOCK_BLOCKED);
1097 }
1098
1099 if (request->l_callbacks != NULL) {
1100 /*
1101 * To make sure the shutdown code works correctly, either
1102 * the callback must happen after putting the lock on the
1103 * sleep list, or we must check the shutdown status after
1104 * returning from the callback (and before sleeping). At
1105 * least for now, we'll use the first option. If a
1106 * shutdown or signal or whatever happened while the graph
1107 * mutex was dropped, that will be detected by
1108 * wait_for_lock().
1109 */
1110 mutex_exit(&gp->gp_mutex);
1111
1112 cprp = flk_invoke_callbacks(request->l_callbacks,
1113 FLK_BEFORE_SLEEP);
1114
1115 mutex_enter(&gp->gp_mutex);
1116
1117 if (cprp == NULL) {
1118 wait_for_lock(request);
1119 } else {
1120 mutex_enter(cprp->cc_lockp);
1121 CALLB_CPR_SAFE_BEGIN(cprp);
1122 mutex_exit(cprp->cc_lockp);
1123 wait_for_lock(request);
1124 mutex_enter(cprp->cc_lockp);
1125 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
1126 mutex_exit(cprp->cc_lockp);
1127 }
1128
1129 mutex_exit(&gp->gp_mutex);
1130 (void) flk_invoke_callbacks(request->l_callbacks,
1131 FLK_AFTER_SLEEP);
1132 mutex_enter(&gp->gp_mutex);
1133 } else {
1134 wait_for_lock(request);
1135 }
1136
1137 if (IS_LOCKMGR(request)) {
1138 /*
1139 * If the lock manager is shutting down, return an
1140 * error that will encourage the client to retransmit.
1141 */
1142 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
1143 !IS_GRANTED(request)) {
1144 flk_cancel_sleeping_lock(request, 1);
1145 return (ENOLCK);
1146 }
1147 }
1148
1149 if (IS_INTERRUPTED(request)) {
1150 /* we got a signal, or act like we did */
1151 flk_cancel_sleeping_lock(request, 1);
1152 return (EINTR);
1153 }
1154
1155 /* Cancelled if some other thread has closed the file */
1156
1157 if (IS_CANCELLED(request)) {
1158 flk_cancel_sleeping_lock(request, 1);
1159 return (EBADF);
1160 }
1161
1162 request->l_state &= ~GRANTED_LOCK;
1163 REMOVE_SLEEP_QUEUE(request);
1164 return (flk_execute_request(request));
1165 }
1166
1167 /*
1168 * This routine adds an edge between from and to because from depends
1169 * to. If asked to check for deadlock it checks whether there are any
1170 * reachable locks from "from_lock" that is owned by the same process
1171 * as "from_lock".
1172 * NOTE: It is the caller's responsibility to make sure that the color
1173 * of the graph is consistent between the calls to flk_add_edge as done
1174 * in flk_process_request. This routine does not color and check for
1175 * deadlock explicitly.
1176 */
1177
1178 static int
1179 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
1180 int check_cycle, int update_graph)
1181 {
1182 edge_t *edge;
1183 edge_t *ep;
1184 lock_descriptor_t *vertex;
1185 lock_descriptor_t *vertex_stack;
1186
1187 STACK_INIT(vertex_stack);
1188
1189 /*
1190 * if to vertex already has mark_color just return
1191 * don't add an edge as it is reachable from from vertex
1192 * before itself.
1193 */
1194
1195 if (COLORED(to_lock))
1196 return (0);
1197
1198 edge = flk_get_edge();
1199
1200 /*
1201 * set the from and to vertex
1202 */
1203
1204 edge->from_vertex = from_lock;
1205 edge->to_vertex = to_lock;
1206
1207 /*
1208 * put in adjacency list of from vertex
1209 */
1210
1211 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
1212 edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
1213 edge->edge_adj_prev = &from_lock->l_edge;
1214 from_lock->l_edge.edge_adj_next = edge;
1215
1216 /*
1217 * put in in list of to vertex
1218 */
1219
1220 to_lock->l_edge.edge_in_next->edge_in_prev = edge;
1221 edge->edge_in_next = to_lock->l_edge.edge_in_next;
1222 to_lock->l_edge.edge_in_next = edge;
1223 edge->edge_in_prev = &to_lock->l_edge;
1224
1225
1226 if (update_graph) {
1227 flk_update_proc_graph(edge, 0);
1228 return (0);
1229 }
1230 if (!check_cycle) {
1231 return (0);
1232 }
1233
1234 STACK_PUSH(vertex_stack, from_lock, l_stack);
1235
1236 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1237
1238 STACK_POP(vertex_stack, l_stack);
1239
1240 for (ep = FIRST_ADJ(vertex);
1241 ep != HEAD(vertex);
1242 ep = NEXT_ADJ(ep)) {
1243 if (COLORED(ep->to_vertex))
1244 continue;
1245 COLOR(ep->to_vertex);
1246 if (SAME_OWNER(ep->to_vertex, from_lock))
1247 goto dead_lock;
1248 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1249 }
1250 }
1251 return (0);
1252
1253 dead_lock:
1254
1255 /*
1256 * remove all edges
1257 */
1258
1259 ep = FIRST_ADJ(from_lock);
1260
1261 while (ep != HEAD(from_lock)) {
1262 IN_LIST_REMOVE(ep);
1263 from_lock->l_sedge = NEXT_ADJ(ep);
1264 ADJ_LIST_REMOVE(ep);
1265 flk_free_edge(ep);
1266 ep = from_lock->l_sedge;
1267 }
1268 return (EDEADLK);
1269 }
1270
1271 /*
1272 * Get an edge structure for representing the dependency between two locks.
1273 */
1274
1275 static edge_t *
1276 flk_get_edge()
1277 {
1278 edge_t *ep;
1279
1280 ASSERT(flk_edge_cache != NULL);
1281
1282 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
1283 edge_allocs++;
1284 return (ep);
1285 }
1286
1287 /*
1288 * Free the edge structure.
1289 */
1290
1291 static void
1292 flk_free_edge(edge_t *ep)
1293 {
1294 edge_frees++;
1295 kmem_cache_free(flk_edge_cache, (void *)ep);
1296 }
1297
1298 /*
1299 * Check the relationship of request with lock and perform the
1300 * recomputation of dependencies, break lock if required, and return
1301 * 1 if request cannot have any more relationship with the next
1302 * active locks.
1303 * The 'lock' and 'request' are compared and in case of overlap we
1304 * delete the 'lock' and form new locks to represent the non-overlapped
1305 * portion of original 'lock'. This function has side effects such as
1306 * 'lock' will be freed, new locks will be added to the active list.
1307 */
1308
1309 static int
1310 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
1311 {
1312 int lock_effect;
1313 lock_descriptor_t *lock1, *lock2;
1314 lock_descriptor_t *topology[3];
1315 int nvertex = 0;
1316 int i;
1317 edge_t *ep;
1318 graph_t *gp = (lock->l_graph);
1319
1320
1321 CHECK_SLEEPING_LOCKS(gp);
1322 CHECK_ACTIVE_LOCKS(gp);
1323
1324 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1325
1326 topology[0] = topology[1] = topology[2] = NULL;
1327
1328 if (request->l_type == F_UNLCK)
1329 lock_effect = FLK_UNLOCK;
1330 else if (request->l_type == F_RDLCK &&
1331 lock->l_type == F_WRLCK)
1332 lock_effect = FLK_DOWNGRADE;
1333 else if (request->l_type == F_WRLCK &&
1334 lock->l_type == F_RDLCK)
1335 lock_effect = FLK_UPGRADE;
1336 else
1337 lock_effect = FLK_STAY_SAME;
1338
1339 if (lock->l_end < request->l_start) {
1340 if (lock->l_end == request->l_start - 1 &&
1341 lock_effect == FLK_STAY_SAME) {
1342 topology[0] = request;
1343 request->l_start = lock->l_start;
1344 nvertex = 1;
1345 goto recompute;
1346 } else {
1347 return (0);
1348 }
1349 }
1350
1351 if (lock->l_start > request->l_end) {
1352 if (request->l_end == lock->l_start - 1 &&
1353 lock_effect == FLK_STAY_SAME) {
1354 topology[0] = request;
1355 request->l_end = lock->l_end;
1356 nvertex = 1;
1357 goto recompute;
1358 } else {
1359 return (1);
1360 }
1361 }
1362
1363 if (request->l_end < lock->l_end) {
1364 if (request->l_start > lock->l_start) {
1365 if (lock_effect == FLK_STAY_SAME) {
1366 request->l_start = lock->l_start;
1367 request->l_end = lock->l_end;
1368 topology[0] = request;
1369 nvertex = 1;
1370 } else {
1371 lock1 = flk_get_lock();
1372 lock2 = flk_get_lock();
1373 COPY(lock1, lock);
1374 COPY(lock2, lock);
1375 lock1->l_start = lock->l_start;
1376 lock1->l_end = request->l_start - 1;
1377 lock2->l_start = request->l_end + 1;
1378 lock2->l_end = lock->l_end;
1379 topology[0] = lock1;
1380 topology[1] = lock2;
1381 topology[2] = request;
1382 nvertex = 3;
1383 }
1384 } else if (request->l_start < lock->l_start) {
1385 if (lock_effect == FLK_STAY_SAME) {
1386 request->l_end = lock->l_end;
1387 topology[0] = request;
1388 nvertex = 1;
1389 } else {
1390 lock1 = flk_get_lock();
1391 COPY(lock1, lock);
1392 lock1->l_start = request->l_end + 1;
1393 topology[0] = lock1;
1394 topology[1] = request;
1395 nvertex = 2;
1396 }
1397 } else {
1398 if (lock_effect == FLK_STAY_SAME) {
1399 request->l_start = lock->l_start;
1400 request->l_end = lock->l_end;
1401 topology[0] = request;
1402 nvertex = 1;
1403 } else {
1404 lock1 = flk_get_lock();
1405 COPY(lock1, lock);
1406 lock1->l_start = request->l_end + 1;
1407 topology[0] = lock1;
1408 topology[1] = request;
1409 nvertex = 2;
1410 }
1411 }
1412 } else if (request->l_end > lock->l_end) {
1413 if (request->l_start > lock->l_start) {
1414 if (lock_effect == FLK_STAY_SAME) {
1415 request->l_start = lock->l_start;
1416 topology[0] = request;
1417 nvertex = 1;
1418 } else {
1419 lock1 = flk_get_lock();
1420 COPY(lock1, lock);
1421 lock1->l_end = request->l_start - 1;
1422 topology[0] = lock1;
1423 topology[1] = request;
1424 nvertex = 2;
1425 }
1426 } else if (request->l_start < lock->l_start) {
1427 topology[0] = request;
1428 nvertex = 1;
1429 } else {
1430 topology[0] = request;
1431 nvertex = 1;
1432 }
1433 } else {
1434 if (request->l_start > lock->l_start) {
1435 if (lock_effect == FLK_STAY_SAME) {
1436 request->l_start = lock->l_start;
1437 topology[0] = request;
1438 nvertex = 1;
1439 } else {
1440 lock1 = flk_get_lock();
1441 COPY(lock1, lock);
1442 lock1->l_end = request->l_start - 1;
1443 topology[0] = lock1;
1444 topology[1] = request;
1445 nvertex = 2;
1446 }
1447 } else if (request->l_start < lock->l_start) {
1448 topology[0] = request;
1449 nvertex = 1;
1450 } else {
1451 if (lock_effect != FLK_UNLOCK) {
1452 topology[0] = request;
1453 nvertex = 1;
1454 } else {
1455 flk_delete_active_lock(lock, 0);
1456 flk_wakeup(lock, 1);
1457 flk_free_lock(lock);
1458 CHECK_SLEEPING_LOCKS(gp);
1459 CHECK_ACTIVE_LOCKS(gp);
1460 return (1);
1461 }
1462 }
1463 }
1464
1465 recompute:
1466
1467 /*
1468 * For unlock we don't send the 'request' to for recomputing
1469 * dependencies because no lock will add an edge to this.
1470 */
1471
1472 if (lock_effect == FLK_UNLOCK) {
1473 topology[nvertex-1] = NULL;
1474 nvertex--;
1475 }
1476 for (i = 0; i < nvertex; i++) {
1477 topology[i]->l_state |= RECOMPUTE_LOCK;
1478 topology[i]->l_color = NO_COLOR;
1479 }
1480
1481 ASSERT(FIRST_ADJ(lock) == HEAD(lock));
1482
1483 /*
1484 * we remove the adjacent edges for all vertices' to this vertex
1485 * 'lock'.
1486 */
1487
1488 ep = FIRST_IN(lock);
1489 while (ep != HEAD(lock)) {
1490 ADJ_LIST_REMOVE(ep);
1491 ep = NEXT_IN(ep);
1492 }
1493
1494 flk_delete_active_lock(lock, 0);
1495
1496 /* We are ready for recomputing the dependencies now */
1497
1498 flk_recompute_dependencies(lock, topology, nvertex, 1);
1499
1500 for (i = 0; i < nvertex; i++) {
1501 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1502 topology[i]->l_color = NO_COLOR;
1503 }
1504
1505
1506 if (lock_effect == FLK_UNLOCK) {
1507 nvertex++;
1508 }
1509 for (i = 0; i < nvertex - 1; i++) {
1510 flk_insert_active_lock(topology[i]);
1511 }
1512
1513
1514 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
1515 flk_wakeup(lock, 0);
1516 } else {
1517 ep = FIRST_IN(lock);
1518 while (ep != HEAD(lock)) {
1519 lock->l_sedge = NEXT_IN(ep);
1520 IN_LIST_REMOVE(ep);
1521 flk_update_proc_graph(ep, 1);
1522 flk_free_edge(ep);
1523 ep = lock->l_sedge;
1524 }
1525 }
1526 flk_free_lock(lock);
1527
1528 CHECK_SLEEPING_LOCKS(gp);
1529 CHECK_ACTIVE_LOCKS(gp);
1530 return (0);
1531 }
1532
1533 /*
1534 * Insert a lock into the active queue.
1535 */
1536
1537 static void
1538 flk_insert_active_lock(lock_descriptor_t *new_lock)
1539 {
1540 graph_t *gp = new_lock->l_graph;
1541 vnode_t *vp = new_lock->l_vnode;
1542 lock_descriptor_t *first_lock, *lock;
1543
1544 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1545
1546 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1547 first_lock = lock;
1548
1549 if (first_lock != NULL) {
1550 for (; (lock->l_vnode == vp &&
1551 lock->l_start < new_lock->l_start); lock = lock->l_next)
1552 ;
1553 } else {
1554 lock = ACTIVE_HEAD(gp);
1555 }
1556
1557 lock->l_prev->l_next = new_lock;
1558 new_lock->l_next = lock;
1559 new_lock->l_prev = lock->l_prev;
1560 lock->l_prev = new_lock;
1561
1562 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
1563 vp->v_filocks = (struct filock *)new_lock;
1564 }
1565 flk_set_state(new_lock, FLK_ACTIVE_STATE);
1566 new_lock->l_state |= ACTIVE_LOCK;
1567
1568 CHECK_ACTIVE_LOCKS(gp);
1569 CHECK_SLEEPING_LOCKS(gp);
1570 }
1571
1572 /*
1573 * Delete the active lock : Performs two functions depending on the
1574 * value of second parameter. One is to remove from the active lists
1575 * only and other is to both remove and free the lock.
1576 */
1577
1578 static void
1579 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
1580 {
1581 vnode_t *vp = lock->l_vnode;
1582 graph_t *gp = lock->l_graph;
1583
1584 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1585 if (free_lock)
1586 ASSERT(NO_DEPENDENTS(lock));
1587 ASSERT(NOT_BLOCKED(lock));
1588 ASSERT(IS_ACTIVE(lock));
1589
1590 ASSERT((vp->v_filocks != NULL));
1591
1592 if (vp->v_filocks == (struct filock *)lock) {
1593 vp->v_filocks = (struct filock *)
1594 ((lock->l_next->l_vnode == vp) ? lock->l_next :
1595 NULL);
1596 }
1597 lock->l_next->l_prev = lock->l_prev;
1598 lock->l_prev->l_next = lock->l_next;
1599 lock->l_next = lock->l_prev = NULL;
1600 flk_set_state(lock, FLK_DEAD_STATE);
1601 lock->l_state &= ~ACTIVE_LOCK;
1602
1603 if (free_lock)
1604 flk_free_lock(lock);
1605 CHECK_ACTIVE_LOCKS(gp);
1606 CHECK_SLEEPING_LOCKS(gp);
1607 }
1608
1609 /*
1610 * Insert into the sleep queue.
1611 */
1612
1613 static void
1614 flk_insert_sleeping_lock(lock_descriptor_t *request)
1615 {
1616 graph_t *gp = request->l_graph;
1617 vnode_t *vp = request->l_vnode;
1618 lock_descriptor_t *lock;
1619
1620 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1621 ASSERT(IS_INITIAL(request));
1622
1623 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
1624 lock->l_vnode < vp); lock = lock->l_next)
1625 ;
1626
1627 lock->l_prev->l_next = request;
1628 request->l_prev = lock->l_prev;
1629 lock->l_prev = request;
1630 request->l_next = lock;
1631 flk_set_state(request, FLK_SLEEPING_STATE);
1632 request->l_state |= SLEEPING_LOCK;
1633 }
1634
1635 /*
1636 * Cancelling a sleeping lock implies removing a vertex from the
1637 * dependency graph and therefore we should recompute the dependencies
1638 * of all vertices that have a path to this vertex, w.r.t. all
1639 * vertices reachable from this vertex.
1640 */
1641
1642 void
1643 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
1644 {
1645 graph_t *gp = request->l_graph;
1646 vnode_t *vp = request->l_vnode;
1647 lock_descriptor_t **topology = NULL;
1648 edge_t *ep;
1649 lock_descriptor_t *vertex, *lock;
1650 int nvertex = 0;
1651 int i;
1652 lock_descriptor_t *vertex_stack;
1653
1654 STACK_INIT(vertex_stack);
1655
1656 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1657 /*
1658 * count number of vertex pointers that has to be allocated
1659 * All vertices that are reachable from request.
1660 */
1661
1662 STACK_PUSH(vertex_stack, request, l_stack);
1663
1664 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1665 STACK_POP(vertex_stack, l_stack);
1666 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
1667 ep = NEXT_ADJ(ep)) {
1668 if (IS_RECOMPUTE(ep->to_vertex))
1669 continue;
1670 ep->to_vertex->l_state |= RECOMPUTE_LOCK;
1671 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1672 nvertex++;
1673 }
1674 }
1675
1676 /*
1677 * allocate memory for holding the vertex pointers
1678 */
1679
1680 if (nvertex) {
1681 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
1682 KM_SLEEP);
1683 }
1684
1685 /*
1686 * one more pass to actually store the vertices in the
1687 * allocated array.
1688 * We first check sleeping locks and then active locks
1689 * so that topology array will be in a topological
1690 * order.
1691 */
1692
1693 nvertex = 0;
1694 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1695
1696 if (lock) {
1697 do {
1698 if (IS_RECOMPUTE(lock)) {
1699 lock->l_index = nvertex;
1700 topology[nvertex++] = lock;
1701 }
1702 lock->l_color = NO_COLOR;
1703 lock = lock->l_next;
1704 } while (lock->l_vnode == vp);
1705 }
1706
1707 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1708
1709 if (lock) {
1710 do {
1711 if (IS_RECOMPUTE(lock)) {
1712 lock->l_index = nvertex;
1713 topology[nvertex++] = lock;
1714 }
1715 lock->l_color = NO_COLOR;
1716 lock = lock->l_next;
1717 } while (lock->l_vnode == vp);
1718 }
1719
1720 /*
1721 * remove in and out edges of request
1722 * They are freed after updating proc_graph below.
1723 */
1724
1725 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
1726 ADJ_LIST_REMOVE(ep);
1727 }
1728
1729
1730 if (remove_from_queue)
1731 REMOVE_SLEEP_QUEUE(request);
1732
1733 /* we are ready to recompute */
1734
1735 flk_recompute_dependencies(request, topology, nvertex, 1);
1736
1737 ep = FIRST_ADJ(request);
1738 while (ep != HEAD(request)) {
1739 IN_LIST_REMOVE(ep);
1740 request->l_sedge = NEXT_ADJ(ep);
1741 ADJ_LIST_REMOVE(ep);
1742 flk_update_proc_graph(ep, 1);
1743 flk_free_edge(ep);
1744 ep = request->l_sedge;
1745 }
1746
1747
1748 /*
1749 * unset the RECOMPUTE flag in those vertices
1750 */
1751
1752 for (i = 0; i < nvertex; i++) {
1753 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1754 }
1755
1756 /*
1757 * free the topology
1758 */
1759 if (nvertex)
1760 kmem_free((void *)topology,
1761 (nvertex * sizeof (lock_descriptor_t *)));
1762 /*
1763 * Possibility of some locks unblocked now
1764 */
1765
1766 flk_wakeup(request, 0);
1767
1768 /*
1769 * we expect to have a correctly recomputed graph now.
1770 */
1771 flk_set_state(request, FLK_DEAD_STATE);
1772 flk_free_lock(request);
1773 CHECK_SLEEPING_LOCKS(gp);
1774 CHECK_ACTIVE_LOCKS(gp);
1775
1776 }
1777
1778 /*
1779 * Uncoloring the graph is simply to increment the mark value of the graph
1780 * And only when wrap round takes place will we color all vertices in
1781 * the graph explicitly.
1782 */
1783
1784 static void
1785 flk_graph_uncolor(graph_t *gp)
1786 {
1787 lock_descriptor_t *lock;
1788
1789 if (gp->mark == UINT_MAX) {
1790 gp->mark = 1;
1791 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
1792 lock = lock->l_next)
1793 lock->l_color = 0;
1794
1795 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp);
1796 lock = lock->l_next)
1797 lock->l_color = 0;
1798 } else {
1799 gp->mark++;
1800 }
1801 }
1802
1803 /*
1804 * Wake up locks that are blocked on the given lock.
1805 */
1806
1807 static void
1808 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
1809 {
1810 edge_t *ep;
1811 graph_t *gp = lock->l_graph;
1812 lock_descriptor_t *lck;
1813
1814 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1815 if (NO_DEPENDENTS(lock))
1816 return;
1817 ep = FIRST_IN(lock);
1818 do {
1819 /*
1820 * delete the edge from the adjacency list
1821 * of from vertex. if no more adjacent edges
1822 * for this vertex wake this process.
1823 */
1824 lck = ep->from_vertex;
1825 if (adj_list_remove)
1826 ADJ_LIST_REMOVE(ep);
1827 flk_update_proc_graph(ep, 1);
1828 if (NOT_BLOCKED(lck)) {
1829 GRANT_WAKEUP(lck);
1830 }
1831 lock->l_sedge = NEXT_IN(ep);
1832 IN_LIST_REMOVE(ep);
1833 flk_free_edge(ep);
1834 ep = lock->l_sedge;
1835 } while (ep != HEAD(lock));
1836 ASSERT(NO_DEPENDENTS(lock));
1837 }
1838
1839 /*
1840 * The dependents of request, is checked for its dependency against the
1841 * locks in topology (called topology because the array is and should be in
1842 * topological order for this algorithm, if not in topological order the
1843 * inner loop below might add more edges than necessary. Topological ordering
1844 * of vertices satisfies the property that all edges will be from left to
1845 * right i.e., topology[i] can have an edge to topology[j], iff i<j)
1846 * If lock l1 in the dependent set of request is dependent (blocked by)
1847 * on lock l2 in topology but does not have a path to it, we add an edge
1848 * in the inner loop below.
1849 *
1850 * We don't want to add an edge between l1 and l2 if there exists
1851 * already a path from l1 to l2, so care has to be taken for those vertices
1852 * that have two paths to 'request'. These vertices are referred to here
1853 * as barrier locks.
1854 *
1855 * The barriers has to be found (those vertex that originally had two paths
1856 * to request) because otherwise we may end up adding edges unnecessarily
1857 * to vertices in topology, and thus barrier vertices can have an edge
1858 * to a vertex in topology as well a path to it.
1859 */
1860
1861 static void
1862 flk_recompute_dependencies(lock_descriptor_t *request,
1863 lock_descriptor_t **topology,
1864 int nvertex, int update_graph)
1865 {
1866 lock_descriptor_t *vertex, *lock;
1867 graph_t *gp = request->l_graph;
1868 int i, count;
1869 int barrier_found = 0;
1870 edge_t *ep;
1871 lock_descriptor_t *vertex_stack;
1872
1873 STACK_INIT(vertex_stack);
1874
1875 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1876 if (nvertex == 0)
1877 return;
1878 flk_graph_uncolor(request->l_graph);
1879 barrier_found = flk_find_barriers(request);
1880 request->l_state |= RECOMPUTE_DONE;
1881
1882 STACK_PUSH(vertex_stack, request, l_stack);
1883 request->l_sedge = FIRST_IN(request);
1884
1885
1886 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1887 if (vertex->l_state & RECOMPUTE_DONE) {
1888 count = 0;
1889 goto next_in_edge;
1890 }
1891 if (IS_BARRIER(vertex)) {
1892 /* decrement the barrier count */
1893 if (vertex->l_index) {
1894 vertex->l_index--;
1895 /* this guy will be pushed again anyway ? */
1896 STACK_POP(vertex_stack, l_stack);
1897 if (vertex->l_index == 0) {
1898 /*
1899 * barrier is over we can recompute
1900 * dependencies for this lock in the
1901 * next stack pop
1902 */
1903 vertex->l_state &= ~BARRIER_LOCK;
1904 }
1905 continue;
1906 }
1907 }
1908 vertex->l_state |= RECOMPUTE_DONE;
1909 flk_graph_uncolor(gp);
1910 count = flk_color_reachables(vertex);
1911 for (i = 0; i < nvertex; i++) {
1912 lock = topology[i];
1913 if (COLORED(lock))
1914 continue;
1915 if (BLOCKS(lock, vertex)) {
1916 (void) flk_add_edge(vertex, lock,
1917 NO_CHECK_CYCLE, update_graph);
1918 COLOR(lock);
1919 count++;
1920 count += flk_color_reachables(lock);
1921 }
1922
1923 }
1924
1925 next_in_edge:
1926 if (count == nvertex ||
1927 vertex->l_sedge == HEAD(vertex)) {
1928 /* prune the tree below this */
1929 STACK_POP(vertex_stack, l_stack);
1930 vertex->l_state &= ~RECOMPUTE_DONE;
1931 /* update the barrier locks below this! */
1932 if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
1933 flk_graph_uncolor(gp);
1934 flk_update_barriers(vertex);
1935 }
1936 continue;
1937 }
1938
1939 ep = vertex->l_sedge;
1940 lock = ep->from_vertex;
1941 STACK_PUSH(vertex_stack, lock, l_stack);
1942 lock->l_sedge = FIRST_IN(lock);
1943 vertex->l_sedge = NEXT_IN(ep);
1944 }
1945
1946 }
1947
1948 /*
1949 * Color all reachable vertices from vertex that belongs to topology (here
1950 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored.
1951 *
1952 * Note: we need to use a different stack_link l_stack1 because this is
1953 * called from flk_recompute_dependencies() that already uses a stack with
1954 * l_stack as stack_link.
1955 */
1956
1957 static int
1958 flk_color_reachables(lock_descriptor_t *vertex)
1959 {
1960 lock_descriptor_t *ver, *lock;
1961 int count;
1962 edge_t *ep;
1963 lock_descriptor_t *vertex_stack;
1964
1965 STACK_INIT(vertex_stack);
1966
1967 STACK_PUSH(vertex_stack, vertex, l_stack1);
1968 count = 0;
1969 while ((ver = STACK_TOP(vertex_stack)) != NULL) {
1970
1971 STACK_POP(vertex_stack, l_stack1);
1972 for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
1973 ep = NEXT_ADJ(ep)) {
1974 lock = ep->to_vertex;
1975 if (COLORED(lock))
1976 continue;
1977 COLOR(lock);
1978 if (IS_RECOMPUTE(lock))
1979 count++;
1980 STACK_PUSH(vertex_stack, lock, l_stack1);
1981 }
1982
1983 }
1984 return (count);
1985 }
1986
1987 /*
1988 * Called from flk_recompute_dependencies() this routine decrements
1989 * the barrier count of barrier vertices that are reachable from lock.
1990 */
1991
1992 static void
1993 flk_update_barriers(lock_descriptor_t *lock)
1994 {
1995 lock_descriptor_t *vertex, *lck;
1996 edge_t *ep;
1997 lock_descriptor_t *vertex_stack;
1998
1999 STACK_INIT(vertex_stack);
2000
2001 STACK_PUSH(vertex_stack, lock, l_stack1);
2002
2003 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2004 STACK_POP(vertex_stack, l_stack1);
2005 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2006 ep = NEXT_IN(ep)) {
2007 lck = ep->from_vertex;
2008 if (COLORED(lck)) {
2009 if (IS_BARRIER(lck)) {
2010 ASSERT(lck->l_index > 0);
2011 lck->l_index--;
2012 if (lck->l_index == 0)
2013 lck->l_state &= ~BARRIER_LOCK;
2014 }
2015 continue;
2016 }
2017 COLOR(lck);
2018 if (IS_BARRIER(lck)) {
2019 ASSERT(lck->l_index > 0);
2020 lck->l_index--;
2021 if (lck->l_index == 0)
2022 lck->l_state &= ~BARRIER_LOCK;
2023 }
2024 STACK_PUSH(vertex_stack, lck, l_stack1);
2025 }
2026 }
2027 }
2028
2029 /*
2030 * Finds all vertices that are reachable from 'lock' more than once and
2031 * mark them as barrier vertices and increment their barrier count.
2032 * The barrier count is one minus the total number of paths from lock
2033 * to that vertex.
2034 */
2035
2036 static int
2037 flk_find_barriers(lock_descriptor_t *lock)
2038 {
2039 lock_descriptor_t *vertex, *lck;
2040 int found = 0;
2041 edge_t *ep;
2042 lock_descriptor_t *vertex_stack;
2043
2044 STACK_INIT(vertex_stack);
2045
2046 STACK_PUSH(vertex_stack, lock, l_stack1);
2047
2048 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2049 STACK_POP(vertex_stack, l_stack1);
2050 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2051 ep = NEXT_IN(ep)) {
2052 lck = ep->from_vertex;
2053 if (COLORED(lck)) {
2054 /* this is a barrier */
2055 lck->l_state |= BARRIER_LOCK;
2056 /* index will have barrier count */
2057 lck->l_index++;
2058 if (!found)
2059 found = 1;
2060 continue;
2061 }
2062 COLOR(lck);
2063 lck->l_index = 0;
2064 STACK_PUSH(vertex_stack, lck, l_stack1);
2065 }
2066 }
2067 return (found);
2068 }
2069
2070 /*
2071 * Finds the first lock that is mainly responsible for blocking this
2072 * request. If there is no such lock, request->l_flock.l_type is set to
2073 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars
2074 * of the blocking lock.
2075 *
2076 * Note: It is possible a request is blocked by a sleeping lock because
2077 * of the fairness policy used in flk_process_request() to construct the
2078 * dependencies. (see comments before flk_process_request()).
2079 */
2080
2081 static void
2082 flk_get_first_blocking_lock(lock_descriptor_t *request)
2083 {
2084 graph_t *gp = request->l_graph;
2085 vnode_t *vp = request->l_vnode;
2086 lock_descriptor_t *lock, *blocker;
2087
2088 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2089 blocker = NULL;
2090 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2091
2092 if (lock) {
2093 do {
2094 if (BLOCKS(lock, request)) {
2095 blocker = lock;
2096 break;
2097 }
2098 lock = lock->l_next;
2099 } while (lock->l_vnode == vp);
2100 }
2101
2102 if (blocker == NULL && request->l_flock.l_type == F_RDLCK) {
2103 /*
2104 * No active lock is blocking this request, but if a read
2105 * lock is requested, it may also get blocked by a waiting
2106 * writer. So search all sleeping locks and see if there is
2107 * a writer waiting.
2108 */
2109 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2110 if (lock) {
2111 do {
2112 if (BLOCKS(lock, request)) {
2113 blocker = lock;
2114 break;
2115 }
2116 lock = lock->l_next;
2117 } while (lock->l_vnode == vp);
2118 }
2119 }
2120
2121 if (blocker) {
2122 report_blocker(blocker, request);
2123 } else
2124 request->l_flock.l_type = F_UNLCK;
2125 }
2126
2127 /*
2128 * Get the graph_t structure associated with a vnode.
2129 * If 'initialize' is non-zero, and the graph_t structure for this vnode has
2130 * not yet been initialized, then a new element is allocated and returned.
2131 */
2132 graph_t *
2133 flk_get_lock_graph(vnode_t *vp, int initialize)
2134 {
2135 graph_t *gp;
2136 graph_t *gp_alloc = NULL;
2137 int index = HASH_INDEX(vp);
2138
2139 if (initialize == FLK_USE_GRAPH) {
2140 mutex_enter(&flock_lock);
2141 gp = lock_graph[index];
2142 mutex_exit(&flock_lock);
2143 return (gp);
2144 }
2145
2146 ASSERT(initialize == FLK_INIT_GRAPH);
2147
2148 if (lock_graph[index] == NULL) {
2149
2150 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
2151
2152 /* Initialize the graph */
2153
2154 gp_alloc->active_locks.l_next =
2155 gp_alloc->active_locks.l_prev =
2156 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
2157 gp_alloc->sleeping_locks.l_next =
2158 gp_alloc->sleeping_locks.l_prev =
2159 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
2160 gp_alloc->index = index;
2161 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
2162 }
2163
2164 mutex_enter(&flock_lock);
2165
2166 gp = lock_graph[index];
2167
2168 /* Recheck the value within flock_lock */
2169 if (gp == NULL) {
2170 struct flock_globals *fg;
2171
2172 /* We must have previously allocated the graph_t structure */
2173 ASSERT(gp_alloc != NULL);
2174 lock_graph[index] = gp = gp_alloc;
2175 /*
2176 * The lockmgr status is only needed if KLM is loaded.
2177 */
2178 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
2179 fg = flk_get_globals();
2180 fg->lockmgr_status[index] = fg->flk_lockmgr_status;
2181 }
2182 }
2183
2184 mutex_exit(&flock_lock);
2185
2186 if ((gp_alloc != NULL) && (gp != gp_alloc)) {
2187 /* There was a race to allocate the graph_t and we lost */
2188 mutex_destroy(&gp_alloc->gp_mutex);
2189 kmem_free(gp_alloc, sizeof (graph_t));
2190 }
2191
2192 return (gp);
2193 }
2194
2195 /*
2196 * PSARC case 1997/292
2197 */
2198 int
2199 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid)
2200 {
2201 lock_descriptor_t *lock;
2202 int result = 0;
2203 graph_t *gp;
2204 int lock_nlmid;
2205
2206 /*
2207 * Check to see if node is booted as a cluster. If not, return.
2208 */
2209 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2210 return (0);
2211 }
2212
2213 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2214 if (gp == NULL) {
2215 return (0);
2216 }
2217
2218 mutex_enter(&gp->gp_mutex);
2219
2220 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2221
2222 if (lock) {
2223 while (lock->l_vnode == vp) {
2224 /* get NLM id from sysid */
2225 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2226
2227 /*
2228 * If NLM server request _and_ nlmid of lock matches
2229 * nlmid of argument, then we've found a remote lock.
2230 */
2231 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2232 result = 1;
2233 goto done;
2234 }
2235 lock = lock->l_next;
2236 }
2237 }
2238
2239 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2240
2241 if (lock) {
2242 while (lock->l_vnode == vp) {
2243 /* get NLM id from sysid */
2244 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2245
2246 /*
2247 * If NLM server request _and_ nlmid of lock matches
2248 * nlmid of argument, then we've found a remote lock.
2249 */
2250 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2251 result = 1;
2252 goto done;
2253 }
2254 lock = lock->l_next;
2255 }
2256 }
2257
2258 done:
2259 mutex_exit(&gp->gp_mutex);
2260 return (result);
2261 }
2262
2263 /*
2264 * Determine whether there are any locks for the given vnode with a remote
2265 * sysid. Returns zero if not, non-zero if there are.
2266 *
2267 * Note that the return value from this function is potentially invalid
2268 * once it has been returned. The caller is responsible for providing its
2269 * own synchronization mechanism to ensure that the return value is useful
2270 * (e.g., see nfs_lockcompletion()).
2271 */
2272 int
2273 flk_has_remote_locks(vnode_t *vp)
2274 {
2275 lock_descriptor_t *lock;
2276 int result = 0;
2277 graph_t *gp;
2278
2279 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2280 if (gp == NULL) {
2281 return (0);
2282 }
2283
2284 mutex_enter(&gp->gp_mutex);
2285
2286 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2287
2288 if (lock) {
2289 while (lock->l_vnode == vp) {
2290 if (IS_REMOTE(lock)) {
2291 result = 1;
2292 goto done;
2293 }
2294 lock = lock->l_next;
2295 }
2296 }
2297
2298 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2299
2300 if (lock) {
2301 while (lock->l_vnode == vp) {
2302 if (IS_REMOTE(lock)) {
2303 result = 1;
2304 goto done;
2305 }
2306 lock = lock->l_next;
2307 }
2308 }
2309
2310 done:
2311 mutex_exit(&gp->gp_mutex);
2312 return (result);
2313 }
2314
2315 /*
2316 * Determine whether there are any locks for the given vnode with a remote
2317 * sysid matching given sysid.
2318 * Used by the new (open source) NFS Lock Manager (NLM)
2319 */
2320 int
2321 flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid)
2322 {
2323 lock_descriptor_t *lock;
2324 int result = 0;
2325 graph_t *gp;
2326
2327 if (sysid == 0)
2328 return (0);
2329
2330 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2331 if (gp == NULL) {
2332 return (0);
2333 }
2334
2335 mutex_enter(&gp->gp_mutex);
2336
2337 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2338
2339 if (lock) {
2340 while (lock->l_vnode == vp) {
2341 if (lock->l_flock.l_sysid == sysid) {
2342 result = 1;
2343 goto done;
2344 }
2345 lock = lock->l_next;
2346 }
2347 }
2348
2349 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2350
2351 if (lock) {
2352 while (lock->l_vnode == vp) {
2353 if (lock->l_flock.l_sysid == sysid) {
2354 result = 1;
2355 goto done;
2356 }
2357 lock = lock->l_next;
2358 }
2359 }
2360
2361 done:
2362 mutex_exit(&gp->gp_mutex);
2363 return (result);
2364 }
2365
2366 /*
2367 * Determine if there are any locks owned by the given sysid.
2368 * Returns zero if not, non-zero if there are. Note that this return code
2369 * could be derived from flk_get_{sleeping,active}_locks, but this routine
2370 * avoids all the memory allocations of those routines.
2371 *
2372 * This routine has the same synchronization issues as
2373 * flk_has_remote_locks.
2374 */
2375
2376 int
2377 flk_sysid_has_locks(int sysid, int lck_type)
2378 {
2379 int has_locks = 0;
2380 lock_descriptor_t *lock;
2381 graph_t *gp;
2382 int i;
2383
2384 for (i = 0; i < HASH_SIZE && !has_locks; i++) {
2385 mutex_enter(&flock_lock);
2386 gp = lock_graph[i];
2387 mutex_exit(&flock_lock);
2388 if (gp == NULL) {
2389 continue;
2390 }
2391
2392 mutex_enter(&gp->gp_mutex);
2393
2394 if (lck_type & FLK_QUERY_ACTIVE) {
2395 for (lock = ACTIVE_HEAD(gp)->l_next;
2396 lock != ACTIVE_HEAD(gp) && !has_locks;
2397 lock = lock->l_next) {
2398 if (lock->l_flock.l_sysid == sysid)
2399 has_locks = 1;
2400 }
2401 }
2402
2403 if (lck_type & FLK_QUERY_SLEEPING) {
2404 for (lock = SLEEPING_HEAD(gp)->l_next;
2405 lock != SLEEPING_HEAD(gp) && !has_locks;
2406 lock = lock->l_next) {
2407 if (lock->l_flock.l_sysid == sysid)
2408 has_locks = 1;
2409 }
2410 }
2411 mutex_exit(&gp->gp_mutex);
2412 }
2413
2414 return (has_locks);
2415 }
2416
2417
2418 /*
2419 * PSARC case 1997/292
2420 *
2421 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit
2422 * quantity, the real sysid generated by the NLM server; the upper half
2423 * identifies the node of the cluster where the NLM server ran.
2424 * This routine is only called by an NLM server running in a cluster.
2425 * Effects: Remove all locks held on behalf of the client identified
2426 * by "sysid."
2427 */
2428 void
2429 cl_flk_remove_locks_by_sysid(int sysid)
2430 {
2431 graph_t *gp;
2432 int i;
2433 lock_descriptor_t *lock, *nlock;
2434
2435 /*
2436 * Check to see if node is booted as a cluster. If not, return.
2437 */
2438 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2439 return;
2440 }
2441
2442 ASSERT(sysid != 0);
2443 for (i = 0; i < HASH_SIZE; i++) {
2444 mutex_enter(&flock_lock);
2445 gp = lock_graph[i];
2446 mutex_exit(&flock_lock);
2447
2448 if (gp == NULL)
2449 continue;
2450
2451 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */
2452
2453 /* signal sleeping requests so that they bail out */
2454 lock = SLEEPING_HEAD(gp)->l_next;
2455 while (lock != SLEEPING_HEAD(gp)) {
2456 nlock = lock->l_next;
2457 if (lock->l_flock.l_sysid == sysid) {
2458 INTERRUPT_WAKEUP(lock);
2459 }
2460 lock = nlock;
2461 }
2462
2463 /* delete active locks */
2464 lock = ACTIVE_HEAD(gp)->l_next;
2465 while (lock != ACTIVE_HEAD(gp)) {
2466 nlock = lock->l_next;
2467 if (lock->l_flock.l_sysid == sysid) {
2468 flk_delete_active_lock(lock, 0);
2469 flk_wakeup(lock, 1);
2470 flk_free_lock(lock);
2471 }
2472 lock = nlock;
2473 }
2474 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */
2475 }
2476 }
2477
2478 /*
2479 * Delete all locks in the system that belongs to the sysid of the request.
2480 */
2481
2482 static void
2483 flk_delete_locks_by_sysid(lock_descriptor_t *request)
2484 {
2485 int sysid = request->l_flock.l_sysid;
2486 lock_descriptor_t *lock, *nlock;
2487 graph_t *gp;
2488 int i;
2489
2490 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
2491 ASSERT(sysid != 0);
2492
2493 mutex_exit(&request->l_graph->gp_mutex);
2494
2495 for (i = 0; i < HASH_SIZE; i++) {
2496 mutex_enter(&flock_lock);
2497 gp = lock_graph[i];
2498 mutex_exit(&flock_lock);
2499
2500 if (gp == NULL)
2501 continue;
2502
2503 mutex_enter(&gp->gp_mutex);
2504
2505 /* signal sleeping requests so that they bail out */
2506 lock = SLEEPING_HEAD(gp)->l_next;
2507 while (lock != SLEEPING_HEAD(gp)) {
2508 nlock = lock->l_next;
2509 if (lock->l_flock.l_sysid == sysid) {
2510 INTERRUPT_WAKEUP(lock);
2511 }
2512 lock = nlock;
2513 }
2514
2515 /* delete active locks */
2516 lock = ACTIVE_HEAD(gp)->l_next;
2517 while (lock != ACTIVE_HEAD(gp)) {
2518 nlock = lock->l_next;
2519 if (lock->l_flock.l_sysid == sysid) {
2520 flk_delete_active_lock(lock, 0);
2521 flk_wakeup(lock, 1);
2522 flk_free_lock(lock);
2523 }
2524 lock = nlock;
2525 }
2526 mutex_exit(&gp->gp_mutex);
2527 }
2528
2529 mutex_enter(&request->l_graph->gp_mutex);
2530 }
2531
2532 /*
2533 * Clustering: Deletes PXFS locks
2534 * Effects: Delete all locks on files in the given file system and with the
2535 * given PXFS id.
2536 */
2537 void
2538 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid)
2539 {
2540 lock_descriptor_t *lock, *nlock;
2541 graph_t *gp;
2542 int i;
2543
2544 for (i = 0; i < HASH_SIZE; i++) {
2545 mutex_enter(&flock_lock);
2546 gp = lock_graph[i];
2547 mutex_exit(&flock_lock);
2548
2549 if (gp == NULL)
2550 continue;
2551
2552 mutex_enter(&gp->gp_mutex);
2553
2554 /* signal sleeping requests so that they bail out */
2555 lock = SLEEPING_HEAD(gp)->l_next;
2556 while (lock != SLEEPING_HEAD(gp)) {
2557 nlock = lock->l_next;
2558 if (lock->l_vnode->v_vfsp == vfsp) {
2559 ASSERT(IS_PXFS(lock));
2560 if (GETPXFSID(lock->l_flock.l_sysid) ==
2561 pxfsid) {
2562 flk_set_state(lock,
2563 FLK_CANCELLED_STATE);
2564 flk_cancel_sleeping_lock(lock, 1);
2565 }
2566 }
2567 lock = nlock;
2568 }
2569
2570 /* delete active locks */
2571 lock = ACTIVE_HEAD(gp)->l_next;
2572 while (lock != ACTIVE_HEAD(gp)) {
2573 nlock = lock->l_next;
2574 if (lock->l_vnode->v_vfsp == vfsp) {
2575 ASSERT(IS_PXFS(lock));
2576 if (GETPXFSID(lock->l_flock.l_sysid) ==
2577 pxfsid) {
2578 flk_delete_active_lock(lock, 0);
2579 flk_wakeup(lock, 1);
2580 flk_free_lock(lock);
2581 }
2582 }
2583 lock = nlock;
2584 }
2585 mutex_exit(&gp->gp_mutex);
2586 }
2587 }
2588
2589 /*
2590 * Search for a sleeping lock manager lock which matches exactly this lock
2591 * request; if one is found, fake a signal to cancel it.
2592 *
2593 * Return 1 if a matching lock was found, 0 otherwise.
2594 */
2595
2596 static int
2597 flk_canceled(lock_descriptor_t *request)
2598 {
2599 lock_descriptor_t *lock, *nlock;
2600 graph_t *gp = request->l_graph;
2601 vnode_t *vp = request->l_vnode;
2602
2603 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2604 ASSERT(IS_LOCKMGR(request));
2605 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2606
2607 if (lock) {
2608 while (lock->l_vnode == vp) {
2609 nlock = lock->l_next;
2610 if (SAME_OWNER(lock, request) &&
2611 lock->l_start == request->l_start &&
2612 lock->l_end == request->l_end) {
2613 INTERRUPT_WAKEUP(lock);
2614 return (1);
2615 }
2616 lock = nlock;
2617 }
2618 }
2619 return (0);
2620 }
2621
2622 /*
2623 * Remove all the locks for the vnode belonging to the given pid and sysid.
2624 */
2625
2626 void
2627 cleanlocks(vnode_t *vp, pid_t pid, int sysid)
2628 {
2629 graph_t *gp;
2630 lock_descriptor_t *lock, *nlock;
2631 lock_descriptor_t *link_stack;
2632
2633 STACK_INIT(link_stack);
2634
2635 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2636
2637 if (gp == NULL)
2638 return;
2639 mutex_enter(&gp->gp_mutex);
2640
2641 CHECK_SLEEPING_LOCKS(gp);
2642 CHECK_ACTIVE_LOCKS(gp);
2643
2644 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2645
2646 if (lock) {
2647 do {
2648 nlock = lock->l_next;
2649 if ((lock->l_flock.l_pid == pid ||
2650 pid == IGN_PID) &&
2651 lock->l_flock.l_sysid == sysid) {
2652 CANCEL_WAKEUP(lock);
2653 }
2654 lock = nlock;
2655 } while (lock->l_vnode == vp);
2656 }
2657
2658 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2659
2660 if (lock) {
2661 do {
2662 nlock = lock->l_next;
2663 if ((lock->l_flock.l_pid == pid ||
2664 pid == IGN_PID) &&
2665 lock->l_flock.l_sysid == sysid) {
2666 flk_delete_active_lock(lock, 0);
2667 STACK_PUSH(link_stack, lock, l_stack);
2668 }
2669 lock = nlock;
2670 } while (lock->l_vnode == vp);
2671 }
2672
2673 while ((lock = STACK_TOP(link_stack)) != NULL) {
2674 STACK_POP(link_stack, l_stack);
2675 flk_wakeup(lock, 1);
2676 flk_free_lock(lock);
2677 }
2678
2679 CHECK_SLEEPING_LOCKS(gp);
2680 CHECK_ACTIVE_LOCKS(gp);
2681 CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
2682 mutex_exit(&gp->gp_mutex);
2683 }
2684
2685
2686 /*
2687 * Called from 'fs' read and write routines for files that have mandatory
2688 * locking enabled.
2689 */
2690
2691 int
2692 chklock(
2693 struct vnode *vp,
2694 int iomode,
2695 u_offset_t offset,
2696 ssize_t len,
2697 int fmode,
2698 caller_context_t *ct)
2699 {
2700 register int i;
2701 struct flock64 bf;
2702 int error = 0;
2703
2704 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
2705 bf.l_whence = 0;
2706 bf.l_start = offset;
2707 bf.l_len = len;
2708 if (ct == NULL) {
2709 bf.l_pid = curproc->p_pid;
2710 bf.l_sysid = 0;
2711 } else {
2712 bf.l_pid = ct->cc_pid;
2713 bf.l_sysid = ct->cc_sysid;
2714 }
2715 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
2716 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
2717 bf.l_type != F_UNLCK)
2718 error = i ? i : EAGAIN;
2719 return (error);
2720 }
2721
2722 /*
2723 * convoff - converts the given data (start, whence) to the
2724 * given whence.
2725 */
2726 int
2727 convoff(vp, lckdat, whence, offset)
2728 struct vnode *vp;
2729 struct flock64 *lckdat;
2730 int whence;
2731 offset_t offset;
2732 {
2733 int error;
2734 struct vattr vattr;
2735
2736 if ((lckdat->l_whence == 2) || (whence == 2)) {
2737 vattr.va_mask = AT_SIZE;
2738 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
2739 return (error);
2740 }
2741
2742 switch (lckdat->l_whence) {
2743 case 1:
2744 lckdat->l_start += offset;
2745 break;
2746 case 2:
2747 lckdat->l_start += vattr.va_size;
2748 /* FALLTHRU */
2749 case 0:
2750 break;
2751 default:
2752 return (EINVAL);
2753 }
2754
2755 if (lckdat->l_start < 0)
2756 return (EINVAL);
2757
2758 switch (whence) {
2759 case 1:
2760 lckdat->l_start -= offset;
2761 break;
2762 case 2:
2763 lckdat->l_start -= vattr.va_size;
2764 /* FALLTHRU */
2765 case 0:
2766 break;
2767 default:
2768 return (EINVAL);
2769 }
2770
2771 lckdat->l_whence = (short)whence;
2772 return (0);
2773 }
2774
2775
2776 /* proc_graph function definitions */
2777
2778 /*
2779 * Function checks for deadlock due to the new 'lock'. If deadlock found
2780 * edges of this lock are freed and returned.
2781 */
2782
2783 static int
2784 flk_check_deadlock(lock_descriptor_t *lock)
2785 {
2786 proc_vertex_t *start_vertex, *pvertex;
2787 proc_vertex_t *dvertex;
2788 proc_edge_t *pep, *ppep;
2789 edge_t *ep, *nep;
2790 proc_vertex_t *process_stack;
2791
2792 STACK_INIT(process_stack);
2793
2794 mutex_enter(&flock_lock);
2795 start_vertex = flk_get_proc_vertex(lock);
2796 ASSERT(start_vertex != NULL);
2797
2798 /* construct the edges from this process to other processes */
2799
2800 ep = FIRST_ADJ(lock);
2801 while (ep != HEAD(lock)) {
2802 proc_vertex_t *adj_proc;
2803
2804 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2805 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
2806 if (pep->to_proc == adj_proc) {
2807 ASSERT(pep->refcount);
2808 pep->refcount++;
2809 break;
2810 }
2811 }
2812 if (pep == NULL) {
2813 pep = flk_get_proc_edge();
2814 pep->to_proc = adj_proc;
2815 pep->refcount = 1;
2816 adj_proc->incount++;
2817 pep->next = start_vertex->edge;
2818 start_vertex->edge = pep;
2819 }
2820 ep = NEXT_ADJ(ep);
2821 }
2822
2823 ep = FIRST_IN(lock);
2824
2825 while (ep != HEAD(lock)) {
2826 proc_vertex_t *in_proc;
2827
2828 in_proc = flk_get_proc_vertex(ep->from_vertex);
2829
2830 for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
2831 if (pep->to_proc == start_vertex) {
2832 ASSERT(pep->refcount);
2833 pep->refcount++;
2834 break;
2835 }
2836 }
2837 if (pep == NULL) {
2838 pep = flk_get_proc_edge();
2839 pep->to_proc = start_vertex;
2840 pep->refcount = 1;
2841 start_vertex->incount++;
2842 pep->next = in_proc->edge;
2843 in_proc->edge = pep;
2844 }
2845 ep = NEXT_IN(ep);
2846 }
2847
2848 if (start_vertex->incount == 0) {
2849 mutex_exit(&flock_lock);
2850 return (0);
2851 }
2852
2853 flk_proc_graph_uncolor();
2854
2855 start_vertex->p_sedge = start_vertex->edge;
2856
2857 STACK_PUSH(process_stack, start_vertex, p_stack);
2858
2859 while ((pvertex = STACK_TOP(process_stack)) != NULL) {
2860 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
2861 dvertex = pep->to_proc;
2862 if (!PROC_ARRIVED(dvertex)) {
2863 STACK_PUSH(process_stack, dvertex, p_stack);
2864 dvertex->p_sedge = dvertex->edge;
2865 PROC_ARRIVE(pvertex);
2866 pvertex->p_sedge = pep->next;
2867 break;
2868 }
2869 if (!PROC_DEPARTED(dvertex))
2870 goto deadlock;
2871 }
2872 if (pep == NULL) {
2873 PROC_DEPART(pvertex);
2874 STACK_POP(process_stack, p_stack);
2875 }
2876 }
2877 mutex_exit(&flock_lock);
2878 return (0);
2879
2880 deadlock:
2881
2882 /* we remove all lock edges and proc edges */
2883
2884 ep = FIRST_ADJ(lock);
2885 while (ep != HEAD(lock)) {
2886 proc_vertex_t *adj_proc;
2887 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2888 nep = NEXT_ADJ(ep);
2889 IN_LIST_REMOVE(ep);
2890 ADJ_LIST_REMOVE(ep);
2891 flk_free_edge(ep);
2892 ppep = start_vertex->edge;
2893 for (pep = start_vertex->edge; pep != NULL; ppep = pep,
2894 pep = ppep->next) {
2895 if (pep->to_proc == adj_proc) {
2896 pep->refcount--;
2897 if (pep->refcount == 0) {
2898 if (pep == ppep) {
2899 start_vertex->edge = pep->next;
2900 } else {
2901 ppep->next = pep->next;
2902 }
2903 adj_proc->incount--;
2904 flk_proc_release(adj_proc);
2905 flk_free_proc_edge(pep);
2906 }
2907 break;
2908 }
2909 }
2910 ep = nep;
2911 }
2912 ep = FIRST_IN(lock);
2913 while (ep != HEAD(lock)) {
2914 proc_vertex_t *in_proc;
2915 in_proc = flk_get_proc_vertex(ep->from_vertex);
2916 nep = NEXT_IN(ep);
2917 IN_LIST_REMOVE(ep);
2918 ADJ_LIST_REMOVE(ep);
2919 flk_free_edge(ep);
2920 ppep = in_proc->edge;
2921 for (pep = in_proc->edge; pep != NULL; ppep = pep,
2922 pep = ppep->next) {
2923 if (pep->to_proc == start_vertex) {
2924 pep->refcount--;
2925 if (pep->refcount == 0) {
2926 if (pep == ppep) {
2927 in_proc->edge = pep->next;
2928 } else {
2929 ppep->next = pep->next;
2930 }
2931 start_vertex->incount--;
2932 flk_proc_release(in_proc);
2933 flk_free_proc_edge(pep);
2934 }
2935 break;
2936 }
2937 }
2938 ep = nep;
2939 }
2940 flk_proc_release(start_vertex);
2941 mutex_exit(&flock_lock);
2942 return (1);
2943 }
2944
2945 /*
2946 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex
2947 * from the list we return that, otherwise we allocate one. If necessary,
2948 * we grow the list of vertices also.
2949 */
2950
2951 static proc_vertex_t *
2952 flk_get_proc_vertex(lock_descriptor_t *lock)
2953 {
2954 int i;
2955 proc_vertex_t *pv;
2956 proc_vertex_t **palloc;
2957
2958 ASSERT(MUTEX_HELD(&flock_lock));
2959 if (lock->pvertex != -1) {
2960 ASSERT(lock->pvertex >= 0);
2961 pv = pgraph.proc[lock->pvertex];
2962 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2963 return (pv);
2964 }
2965 }
2966 for (i = 0; i < pgraph.gcount; i++) {
2967 pv = pgraph.proc[i];
2968 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2969 lock->pvertex = pv->index = i;
2970 return (pv);
2971 }
2972 }
2973 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
2974 pv->pid = lock->l_flock.l_pid;
2975 pv->sysid = lock->l_flock.l_sysid;
2976 flk_proc_vertex_allocs++;
2977 if (pgraph.free != 0) {
2978 for (i = 0; i < pgraph.gcount; i++) {
2979 if (pgraph.proc[i] == NULL) {
2980 pgraph.proc[i] = pv;
2981 lock->pvertex = pv->index = i;
2982 pgraph.free--;
2983 return (pv);
2984 }
2985 }
2986 }
2987 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
2988 sizeof (proc_vertex_t *), KM_SLEEP);
2989
2990 if (pgraph.proc) {
2991 bcopy(pgraph.proc, palloc,
2992 pgraph.gcount * sizeof (proc_vertex_t *));
2993
2994 kmem_free(pgraph.proc,
2995 pgraph.gcount * sizeof (proc_vertex_t *));
2996 }
2997 pgraph.proc = palloc;
2998 pgraph.free += (PROC_CHUNK - 1);
2999 pv->index = lock->pvertex = pgraph.gcount;
3000 pgraph.gcount += PROC_CHUNK;
3001 pgraph.proc[pv->index] = pv;
3002 return (pv);
3003 }
3004
3005 /*
3006 * Allocate a proc edge.
3007 */
3008
3009 static proc_edge_t *
3010 flk_get_proc_edge()
3011 {
3012 proc_edge_t *pep;
3013
3014 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
3015 flk_proc_edge_allocs++;
3016 return (pep);
3017 }
3018
3019 /*
3020 * Free the proc edge. Called whenever its reference count goes to zero.
3021 */
3022
3023 static void
3024 flk_free_proc_edge(proc_edge_t *pep)
3025 {
3026 ASSERT(pep->refcount == 0);
3027 kmem_free((void *)pep, sizeof (proc_edge_t));
3028 flk_proc_edge_frees++;
3029 }
3030
3031 /*
3032 * Color the graph explicitly done only when the mark value hits max value.
3033 */
3034
3035 static void
3036 flk_proc_graph_uncolor()
3037 {
3038 int i;
3039
3040 if (pgraph.mark == UINT_MAX) {
3041 for (i = 0; i < pgraph.gcount; i++)
3042 if (pgraph.proc[i] != NULL) {
3043 pgraph.proc[i]->atime = 0;
3044 pgraph.proc[i]->dtime = 0;
3045 }
3046 pgraph.mark = 1;
3047 } else {
3048 pgraph.mark++;
3049 }
3050 }
3051
3052 /*
3053 * Release the proc vertex iff both there are no in edges and out edges
3054 */
3055
3056 static void
3057 flk_proc_release(proc_vertex_t *proc)
3058 {
3059 ASSERT(MUTEX_HELD(&flock_lock));
3060 if (proc->edge == NULL && proc->incount == 0) {
3061 pgraph.proc[proc->index] = NULL;
3062 pgraph.free++;
3063 kmem_free(proc, sizeof (proc_vertex_t));
3064 flk_proc_vertex_frees++;
3065 }
3066 }
3067
3068 /*
3069 * Updates process graph to reflect change in a lock_graph.
3070 * Note: We should call this function only after we have a correctly
3071 * recomputed lock graph. Otherwise we might miss a deadlock detection.
3072 * eg: in function flk_relation() we call this function after flk_recompute_
3073 * dependencies() otherwise if a process tries to lock a vnode hashed
3074 * into another graph it might sleep for ever.
3075 */
3076
3077 static void
3078 flk_update_proc_graph(edge_t *ep, int delete)
3079 {
3080 proc_vertex_t *toproc, *fromproc;
3081 proc_edge_t *pep, *prevpep;
3082
3083 mutex_enter(&flock_lock);
3084 toproc = flk_get_proc_vertex(ep->to_vertex);
3085 fromproc = flk_get_proc_vertex(ep->from_vertex);
3086
3087 if (!delete)
3088 goto add;
3089 pep = prevpep = fromproc->edge;
3090
3091 ASSERT(pep != NULL);
3092 while (pep != NULL) {
3093 if (pep->to_proc == toproc) {
3094 ASSERT(pep->refcount > 0);
3095 pep->refcount--;
3096 if (pep->refcount == 0) {
3097 if (pep == prevpep) {
3098 fromproc->edge = pep->next;
3099 } else {
3100 prevpep->next = pep->next;
3101 }
3102 toproc->incount--;
3103 flk_proc_release(toproc);
3104 flk_free_proc_edge(pep);
3105 }
3106 break;
3107 }
3108 prevpep = pep;
3109 pep = pep->next;
3110 }
3111 flk_proc_release(fromproc);
3112 mutex_exit(&flock_lock);
3113 return;
3114 add:
3115
3116 pep = fromproc->edge;
3117
3118 while (pep != NULL) {
3119 if (pep->to_proc == toproc) {
3120 ASSERT(pep->refcount > 0);
3121 pep->refcount++;
3122 break;
3123 }
3124 pep = pep->next;
3125 }
3126 if (pep == NULL) {
3127 pep = flk_get_proc_edge();
3128 pep->to_proc = toproc;
3129 pep->refcount = 1;
3130 toproc->incount++;
3131 pep->next = fromproc->edge;
3132 fromproc->edge = pep;
3133 }
3134 mutex_exit(&flock_lock);
3135 }
3136
3137 /*
3138 * Set the control status for lock manager requests.
3139 *
3140 */
3141
3142 /*
3143 * PSARC case 1997/292
3144 *
3145 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid().
3146 * Effects: Set the state of the NLM server identified by "nlmid"
3147 * in the NLM registry to state "nlm_state."
3148 * Raises exception no_such_nlm if "nlmid" doesn't identify a known
3149 * NLM server to this LLM.
3150 * Note that when this routine is called with NLM_SHUTTING_DOWN there
3151 * may be locks requests that have gotten started but not finished. In
3152 * particular, there may be blocking requests that are in the callback code
3153 * before sleeping (so they're not holding the lock for the graph). If
3154 * such a thread reacquires the graph's lock (to go to sleep) after
3155 * NLM state in the NLM registry is set to a non-up value,
3156 * it will notice the status and bail out. If the request gets
3157 * granted before the thread can check the NLM registry, let it
3158 * continue normally. It will get flushed when we are called with NLM_DOWN.
3159 *
3160 * Modifies: nlm_reg_obj (global)
3161 * Arguments:
3162 * nlmid (IN): id uniquely identifying an NLM server
3163 * nlm_state (IN): NLM server state to change "nlmid" to
3164 */
3165 void
3166 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state)
3167 {
3168 /*
3169 * Check to see if node is booted as a cluster. If not, return.
3170 */
3171 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3172 return;
3173 }
3174
3175 /*
3176 * Check for development/debugging. It is possible to boot a node
3177 * in non-cluster mode, and then run a special script, currently
3178 * available only to developers, to bring up the node as part of a
3179 * cluster. The problem is that running such a script does not
3180 * result in the routine flk_init() being called and hence global array
3181 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode,
3182 * but the LLM needs to do an additional check to see if the global
3183 * array has been created or not. If nlm_reg_status is NULL, then
3184 * return, else continue.
3185 */
3186 if (nlm_reg_status == NULL) {
3187 return;
3188 }
3189
3190 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3191 mutex_enter(&nlm_reg_lock);
3192
3193 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) {
3194 /*
3195 * If the NLM server "nlmid" is unknown in the NLM registry,
3196 * add it to the registry in the nlm shutting down state.
3197 */
3198 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3199 FLK_NLM_SHUTTING_DOWN);
3200 } else {
3201 /*
3202 * Change the state of the NLM server identified by "nlmid"
3203 * in the NLM registry to the argument "nlm_state."
3204 */
3205 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3206 nlm_state);
3207 }
3208
3209 /*
3210 * The reason we must register the NLM server that is shutting down
3211 * with an LLM that doesn't already know about it (never sent a lock
3212 * request) is to handle correctly a race between shutdown and a new
3213 * lock request. Suppose that a shutdown request from the NLM server
3214 * invokes this routine at the LLM, and a thread is spawned to
3215 * service the request. Now suppose a new lock request is in
3216 * progress and has already passed the first line of defense in
3217 * reclock(), which denies new locks requests from NLM servers
3218 * that are not in the NLM_UP state. After the current routine
3219 * is invoked for both phases of shutdown, the routine will return,
3220 * having done nothing, and the lock request will proceed and
3221 * probably be granted. The problem is that the shutdown was ignored
3222 * by the lock request because there was no record of that NLM server
3223 * shutting down. We will be in the peculiar position of thinking
3224 * that we've shutdown the NLM server and all locks at all LLMs have
3225 * been discarded, but in fact there's still one lock held.
3226 * The solution is to record the existence of NLM server and change
3227 * its state immediately to NLM_SHUTTING_DOWN. The lock request in
3228 * progress may proceed because the next phase NLM_DOWN will catch
3229 * this lock and discard it.
3230 */
3231 mutex_exit(&nlm_reg_lock);
3232
3233 switch (nlm_state) {
3234 case FLK_NLM_UP:
3235 /*
3236 * Change the NLM state of all locks still held on behalf of
3237 * the NLM server identified by "nlmid" to NLM_UP.
3238 */
3239 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP);
3240 break;
3241
3242 case FLK_NLM_SHUTTING_DOWN:
3243 /*
3244 * Wake up all sleeping locks for the NLM server identified
3245 * by "nlmid." Note that eventually all woken threads will
3246 * have their lock requests cancelled and descriptors
3247 * removed from the sleeping lock list. Note that the NLM
3248 * server state associated with each lock descriptor is
3249 * changed to FLK_NLM_SHUTTING_DOWN.
3250 */
3251 cl_flk_wakeup_sleeping_nlm_locks(nlmid);
3252 break;
3253
3254 case FLK_NLM_DOWN:
3255 /*
3256 * Discard all active, granted locks for this NLM server
3257 * identified by "nlmid."
3258 */
3259 cl_flk_unlock_nlm_granted(nlmid);
3260 break;
3261
3262 default:
3263 panic("cl_set_nlm_status: bad status (%d)", nlm_state);
3264 }
3265 }
3266
3267 /*
3268 * Set the control status for lock manager requests.
3269 *
3270 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there
3271 * may be locks requests that have gotten started but not finished. In
3272 * particular, there may be blocking requests that are in the callback code
3273 * before sleeping (so they're not holding the lock for the graph). If
3274 * such a thread reacquires the graph's lock (to go to sleep) after
3275 * flk_lockmgr_status is set to a non-up value, it will notice the status
3276 * and bail out. If the request gets granted before the thread can check
3277 * flk_lockmgr_status, let it continue normally. It will get flushed when
3278 * we are called with FLK_LOCKMGR_DOWN.
3279 */
3280
3281 void
3282 flk_set_lockmgr_status(flk_lockmgr_status_t status)
3283 {
3284 int i;
3285 graph_t *gp;
3286 struct flock_globals *fg;
3287
3288 fg = flk_get_globals();
3289 ASSERT(fg != NULL);
3290
3291 mutex_enter(&flock_lock);
3292 fg->flk_lockmgr_status = status;
3293 mutex_exit(&flock_lock);
3294
3295 /*
3296 * If the lock manager is coming back up, all that's needed is to
3297 * propagate this information to the graphs. If the lock manager
3298 * is going down, additional action is required, and each graph's
3299 * copy of the state is updated atomically with this other action.
3300 */
3301 switch (status) {
3302 case FLK_LOCKMGR_UP:
3303 for (i = 0; i < HASH_SIZE; i++) {
3304 mutex_enter(&flock_lock);
3305 gp = lock_graph[i];
3306 mutex_exit(&flock_lock);
3307 if (gp == NULL)
3308 continue;
3309 mutex_enter(&gp->gp_mutex);
3310 fg->lockmgr_status[i] = status;
3311 mutex_exit(&gp->gp_mutex);
3312 }
3313 break;
3314 case FLK_WAKEUP_SLEEPERS:
3315 wakeup_sleeping_lockmgr_locks(fg);
3316 break;
3317 case FLK_LOCKMGR_DOWN:
3318 unlock_lockmgr_granted(fg);
3319 break;
3320 default:
3321 panic("flk_set_lockmgr_status: bad status (%d)", status);
3322 break;
3323 }
3324 }
3325
3326 /*
3327 * This routine returns all the locks that are active or sleeping and are
3328 * associated with a particular set of identifiers. If lock_state != 0, then
3329 * only locks that match the lock_state are returned. If lock_state == 0, then
3330 * all locks are returned. If pid == NOPID, the pid is ignored. If
3331 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the
3332 * vnode pointer is ignored.
3333 *
3334 * A list containing the vnode pointer and an flock structure
3335 * describing the lock is returned. Each element in the list is
3336 * dynamically allocated and must be freed by the caller. The
3337 * last item in the list is denoted by a NULL value in the ll_next
3338 * field.
3339 *
3340 * The vnode pointers returned are held. The caller is responsible
3341 * for releasing these. Note that the returned list is only a snapshot of
3342 * the current lock information, and that it is a snapshot of a moving
3343 * target (only one graph is locked at a time).
3344 */
3345
3346 locklist_t *
3347 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
3348 pid_t pid, const vnode_t *vp, zoneid_t zoneid)
3349 {
3350 lock_descriptor_t *lock;
3351 lock_descriptor_t *graph_head;
3352 locklist_t listhead;
3353 locklist_t *llheadp;
3354 locklist_t *llp;
3355 locklist_t *lltp;
3356 graph_t *gp;
3357 int i;
3358 int first_index; /* graph index */
3359 int num_indexes; /* graph index */
3360
3361 ASSERT((list_type == FLK_ACTIVE_STATE) ||
3362 (list_type == FLK_SLEEPING_STATE));
3363
3364 /*
3365 * Get a pointer to something to use as a list head while building
3366 * the rest of the list.
3367 */
3368 llheadp = &listhead;
3369 lltp = llheadp;
3370 llheadp->ll_next = (locklist_t *)NULL;
3371
3372 /* Figure out which graphs we want to look at. */
3373 if (vp == NULL) {
3374 first_index = 0;
3375 num_indexes = HASH_SIZE;
3376 } else {
3377 first_index = HASH_INDEX(vp);
3378 num_indexes = 1;
3379 }
3380
3381 for (i = first_index; i < first_index + num_indexes; i++) {
3382 mutex_enter(&flock_lock);
3383 gp = lock_graph[i];
3384 mutex_exit(&flock_lock);
3385 if (gp == NULL) {
3386 continue;
3387 }
3388
3389 mutex_enter(&gp->gp_mutex);
3390 graph_head = (list_type == FLK_ACTIVE_STATE) ?
3391 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
3392 for (lock = graph_head->l_next;
3393 lock != graph_head;
3394 lock = lock->l_next) {
3395 if (use_sysid && lock->l_flock.l_sysid != sysid)
3396 continue;
3397 if (pid != NOPID && lock->l_flock.l_pid != pid)
3398 continue;
3399 if (vp != NULL && lock->l_vnode != vp)
3400 continue;
3401 if (lock_state && !(lock_state & lock->l_state))
3402 continue;
3403 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
3404 continue;
3405 /*
3406 * A matching lock was found. Allocate
3407 * space for a new locklist entry and fill
3408 * it in.
3409 */
3410 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
3411 lltp->ll_next = llp;
3412 VN_HOLD(lock->l_vnode);
3413 llp->ll_vp = lock->l_vnode;
3414 create_flock(lock, &(llp->ll_flock));
3415 llp->ll_next = (locklist_t *)NULL;
3416 lltp = llp;
3417 }
3418 mutex_exit(&gp->gp_mutex);
3419 }
3420
3421 llp = llheadp->ll_next;
3422 return (llp);
3423 }
3424
3425 /*
3426 * These two functions are simply interfaces to get_lock_list. They return
3427 * a list of sleeping or active locks for the given sysid and pid. See
3428 * get_lock_list for details.
3429 *
3430 * In either case we don't particularly care to specify the zone of interest;
3431 * the sysid-space is global across zones, so the sysid will map to exactly one
3432 * zone, and we'll return information for that zone.
3433 */
3434
3435 locklist_t *
3436 flk_get_sleeping_locks(int sysid, pid_t pid)
3437 {
3438 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
3439 ALL_ZONES));
3440 }
3441
3442 locklist_t *
3443 flk_get_active_locks(int sysid, pid_t pid)
3444 {
3445 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
3446 ALL_ZONES));
3447 }
3448
3449 /*
3450 * Another interface to get_lock_list. This one returns all the active
3451 * locks for a given vnode. Again, see get_lock_list for details.
3452 *
3453 * We don't need to specify which zone's locks we're interested in. The matter
3454 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't
3455 * be used by multiple zones, so the list of locks will all be from the right
3456 * zone.
3457 */
3458
3459 locklist_t *
3460 flk_active_locks_for_vp(const vnode_t *vp)
3461 {
3462 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
3463 ALL_ZONES));
3464 }
3465
3466 /*
3467 * Another interface to get_lock_list. This one returns all the active
3468 * nbmand locks for a given vnode. Again, see get_lock_list for details.
3469 *
3470 * See the comment for flk_active_locks_for_vp() for why we don't care to
3471 * specify the particular zone of interest.
3472 */
3473 locklist_t *
3474 flk_active_nbmand_locks_for_vp(const vnode_t *vp)
3475 {
3476 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3477 NOPID, vp, ALL_ZONES));
3478 }
3479
3480 /*
3481 * Another interface to get_lock_list. This one returns all the active
3482 * nbmand locks for a given pid. Again, see get_lock_list for details.
3483 *
3484 * The zone doesn't need to be specified here; the locks held by a
3485 * particular process will either be local (ie, non-NFS) or from the zone
3486 * the process is executing in. This is because other parts of the system
3487 * ensure that an NFS vnode can't be used in a zone other than that in
3488 * which it was opened.
3489 */
3490 locklist_t *
3491 flk_active_nbmand_locks(pid_t pid)
3492 {
3493 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3494 pid, NULL, ALL_ZONES));
3495 }
3496
3497 /*
3498 * Free up all entries in the locklist.
3499 */
3500 void
3501 flk_free_locklist(locklist_t *llp)
3502 {
3503 locklist_t *next_llp;
3504
3505 while (llp) {
3506 next_llp = llp->ll_next;
3507 VN_RELE(llp->ll_vp);
3508 kmem_free(llp, sizeof (*llp));
3509 llp = next_llp;
3510 }
3511 }
3512
3513 static void
3514 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state)
3515 {
3516 /*
3517 * For each graph "lg" in the hash table lock_graph do
3518 * a. Get the list of sleeping locks
3519 * b. For each lock descriptor in the list do
3520 * i. If the requested lock is an NLM server request AND
3521 * the nlmid is the same as the routine argument then
3522 * change the lock descriptor's state field to
3523 * "nlm_state."
3524 * c. Get the list of active locks
3525 * d. For each lock descriptor in the list do
3526 * i. If the requested lock is an NLM server request AND
3527 * the nlmid is the same as the routine argument then
3528 * change the lock descriptor's state field to
3529 * "nlm_state."
3530 */
3531
3532 int i;
3533 graph_t *gp; /* lock graph */
3534 lock_descriptor_t *lock; /* lock */
3535 lock_descriptor_t *nlock = NULL; /* next lock */
3536 int lock_nlmid;
3537
3538 for (i = 0; i < HASH_SIZE; i++) {
3539 mutex_enter(&flock_lock);
3540 gp = lock_graph[i];
3541 mutex_exit(&flock_lock);
3542 if (gp == NULL) {
3543 continue;
3544 }
3545
3546 /* Get list of sleeping locks in current lock graph. */
3547 mutex_enter(&gp->gp_mutex);
3548 for (lock = SLEEPING_HEAD(gp)->l_next;
3549 lock != SLEEPING_HEAD(gp);
3550 lock = nlock) {
3551 nlock = lock->l_next;
3552 /* get NLM id */
3553 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3554
3555 /*
3556 * If NLM server request AND nlmid of lock matches
3557 * nlmid of argument, then set the NLM state of the
3558 * lock to "nlm_state."
3559 */
3560 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3561 SET_NLM_STATE(lock, nlm_state);
3562 }
3563 }
3564
3565 /* Get list of active locks in current lock graph. */
3566 for (lock = ACTIVE_HEAD(gp)->l_next;
3567 lock != ACTIVE_HEAD(gp);
3568 lock = nlock) {
3569 nlock = lock->l_next;
3570 /* get NLM id */
3571 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3572
3573 /*
3574 * If NLM server request AND nlmid of lock matches
3575 * nlmid of argument, then set the NLM state of the
3576 * lock to "nlm_state."
3577 */
3578 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3579 ASSERT(IS_ACTIVE(lock));
3580 SET_NLM_STATE(lock, nlm_state);
3581 }
3582 }
3583 mutex_exit(&gp->gp_mutex);
3584 }
3585 }
3586
3587 /*
3588 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid().
3589 * Effects: Find all sleeping lock manager requests _only_ for the NLM server
3590 * identified by "nlmid." Poke those lock requests.
3591 */
3592 static void
3593 cl_flk_wakeup_sleeping_nlm_locks(int nlmid)
3594 {
3595 lock_descriptor_t *lock;
3596 lock_descriptor_t *nlock = NULL; /* next lock */
3597 int i;
3598 graph_t *gp;
3599 int lock_nlmid;
3600
3601 for (i = 0; i < HASH_SIZE; i++) {
3602 mutex_enter(&flock_lock);
3603 gp = lock_graph[i];
3604 mutex_exit(&flock_lock);
3605 if (gp == NULL) {
3606 continue;
3607 }
3608
3609 mutex_enter(&gp->gp_mutex);
3610 for (lock = SLEEPING_HEAD(gp)->l_next;
3611 lock != SLEEPING_HEAD(gp);
3612 lock = nlock) {
3613 nlock = lock->l_next;
3614 /*
3615 * If NLM server request _and_ nlmid of lock matches
3616 * nlmid of argument, then set the NLM state of the
3617 * lock to NLM_SHUTTING_DOWN, and wake up sleeping
3618 * request.
3619 */
3620 if (IS_LOCKMGR(lock)) {
3621 /* get NLM id */
3622 lock_nlmid =
3623 GETNLMID(lock->l_flock.l_sysid);
3624 if (nlmid == lock_nlmid) {
3625 SET_NLM_STATE(lock,
3626 FLK_NLM_SHUTTING_DOWN);
3627 INTERRUPT_WAKEUP(lock);
3628 }
3629 }
3630 }
3631 mutex_exit(&gp->gp_mutex);
3632 }
3633 }
3634
3635 /*
3636 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid()
3637 * Effects: Find all active (granted) lock manager locks _only_ for the
3638 * NLM server identified by "nlmid" and release them.
3639 */
3640 static void
3641 cl_flk_unlock_nlm_granted(int nlmid)
3642 {
3643 lock_descriptor_t *lock;
3644 lock_descriptor_t *nlock = NULL; /* next lock */
3645 int i;
3646 graph_t *gp;
3647 int lock_nlmid;
3648
3649 for (i = 0; i < HASH_SIZE; i++) {
3650 mutex_enter(&flock_lock);
3651 gp = lock_graph[i];
3652 mutex_exit(&flock_lock);
3653 if (gp == NULL) {
3654 continue;
3655 }
3656
3657 mutex_enter(&gp->gp_mutex);
3658 for (lock = ACTIVE_HEAD(gp)->l_next;
3659 lock != ACTIVE_HEAD(gp);
3660 lock = nlock) {
3661 nlock = lock->l_next;
3662 ASSERT(IS_ACTIVE(lock));
3663
3664 /*
3665 * If it's an NLM server request _and_ nlmid of
3666 * the lock matches nlmid of argument, then
3667 * remove the active lock the list, wakup blocked
3668 * threads, and free the storage for the lock.
3669 * Note that there's no need to mark the NLM state
3670 * of this lock to NLM_DOWN because the lock will
3671 * be deleted anyway and its storage freed.
3672 */
3673 if (IS_LOCKMGR(lock)) {
3674 /* get NLM id */
3675 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3676 if (nlmid == lock_nlmid) {
3677 flk_delete_active_lock(lock, 0);
3678 flk_wakeup(lock, 1);
3679 flk_free_lock(lock);
3680 }
3681 }
3682 }
3683 mutex_exit(&gp->gp_mutex);
3684 }
3685 }
3686
3687 /*
3688 * Find all sleeping lock manager requests and poke them.
3689 */
3690 static void
3691 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
3692 {
3693 lock_descriptor_t *lock;
3694 lock_descriptor_t *nlock = NULL; /* next lock */
3695 int i;
3696 graph_t *gp;
3697 zoneid_t zoneid = getzoneid();
3698
3699 for (i = 0; i < HASH_SIZE; i++) {
3700 mutex_enter(&flock_lock);
3701 gp = lock_graph[i];
3702 mutex_exit(&flock_lock);
3703 if (gp == NULL) {
3704 continue;
3705 }
3706
3707 mutex_enter(&gp->gp_mutex);
3708 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
3709 for (lock = SLEEPING_HEAD(gp)->l_next;
3710 lock != SLEEPING_HEAD(gp);
3711 lock = nlock) {
3712 nlock = lock->l_next;
3713 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3714 INTERRUPT_WAKEUP(lock);
3715 }
3716 }
3717 mutex_exit(&gp->gp_mutex);
3718 }
3719 }
3720
3721
3722 /*
3723 * Find all active (granted) lock manager locks and release them.
3724 */
3725 static void
3726 unlock_lockmgr_granted(struct flock_globals *fg)
3727 {
3728 lock_descriptor_t *lock;
3729 lock_descriptor_t *nlock = NULL; /* next lock */
3730 int i;
3731 graph_t *gp;
3732 zoneid_t zoneid = getzoneid();
3733
3734 for (i = 0; i < HASH_SIZE; i++) {
3735 mutex_enter(&flock_lock);
3736 gp = lock_graph[i];
3737 mutex_exit(&flock_lock);
3738 if (gp == NULL) {
3739 continue;
3740 }
3741
3742 mutex_enter(&gp->gp_mutex);
3743 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
3744 for (lock = ACTIVE_HEAD(gp)->l_next;
3745 lock != ACTIVE_HEAD(gp);
3746 lock = nlock) {
3747 nlock = lock->l_next;
3748 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3749 ASSERT(IS_ACTIVE(lock));
3750 flk_delete_active_lock(lock, 0);
3751 flk_wakeup(lock, 1);
3752 flk_free_lock(lock);
3753 }
3754 }
3755 mutex_exit(&gp->gp_mutex);
3756 }
3757 }
3758
3759
3760 /*
3761 * Wait until a lock is granted, cancelled, or interrupted.
3762 */
3763
3764 static void
3765 wait_for_lock(lock_descriptor_t *request)
3766 {
3767 graph_t *gp = request->l_graph;
3768
3769 ASSERT(MUTEX_HELD(&gp->gp_mutex));
3770
3771 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
3772 !(IS_INTERRUPTED(request))) {
3773 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
3774 flk_set_state(request, FLK_INTERRUPTED_STATE);
3775 request->l_state |= INTERRUPTED_LOCK;
3776 }
3777 }
3778 }
3779
3780 /*
3781 * Create an flock structure from the existing lock information
3782 *
3783 * This routine is used to create flock structures for the lock manager
3784 * to use in a reclaim request. Since the lock was originated on this
3785 * host, it must be conforming to UNIX semantics, so no checking is
3786 * done to make sure it falls within the lower half of the 32-bit range.
3787 */
3788
3789 static void
3790 create_flock(lock_descriptor_t *lp, flock64_t *flp)
3791 {
3792 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
3793 ASSERT(lp->l_end >= lp->l_start);
3794
3795 flp->l_type = lp->l_type;
3796 flp->l_whence = 0;
3797 flp->l_start = lp->l_start;
3798 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
3799 (lp->l_end - lp->l_start + 1);
3800 flp->l_sysid = lp->l_flock.l_sysid;
3801 flp->l_pid = lp->l_flock.l_pid;
3802 }
3803
3804 /*
3805 * Convert flock_t data describing a lock range into unsigned long starting
3806 * and ending points, which are put into lock_request. Returns 0 or an
3807 * errno value.
3808 * Large Files: max is passed by the caller and we return EOVERFLOW
3809 * as defined by LFS API.
3810 */
3811
3812 int
3813 flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
3814 u_offset_t *start, u_offset_t *end, offset_t offset)
3815 {
3816 struct vattr vattr;
3817 int error;
3818
3819 /*
3820 * Determine the starting point of the request
3821 */
3822 switch (flp->l_whence) {
3823 case 0: /* SEEK_SET */
3824 *start = (u_offset_t)flp->l_start;
3825 break;
3826 case 1: /* SEEK_CUR */
3827 *start = (u_offset_t)(flp->l_start + offset);
3828 break;
3829 case 2: /* SEEK_END */
3830 vattr.va_mask = AT_SIZE;
3831 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
3832 return (error);
3833 *start = (u_offset_t)(flp->l_start + vattr.va_size);
3834 break;
3835 default:
3836 return (EINVAL);
3837 }
3838
3839 /*
3840 * Determine the range covered by the request.
3841 */
3842 if (flp->l_len == 0)
3843 *end = MAX_U_OFFSET_T;
3844 else if ((offset_t)flp->l_len > 0) {
3845 *end = (u_offset_t)(*start + (flp->l_len - 1));
3846 } else {
3847 /*
3848 * Negative length; why do we even allow this ?
3849 * Because this allows easy specification of
3850 * the last n bytes of the file.
3851 */
3852 *end = *start;
3853 *start += (u_offset_t)flp->l_len;
3854 (*start)++;
3855 }
3856 return (0);
3857 }
3858
3859 /*
3860 * Check the validity of lock data. This can used by the NFS
3861 * frlock routines to check data before contacting the server. The
3862 * server must support semantics that aren't as restrictive as
3863 * the UNIX API, so the NFS client is required to check.
3864 * The maximum is now passed in by the caller.
3865 */
3866
3867 int
3868 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
3869 {
3870 /*
3871 * The end (length) for local locking should never be greater
3872 * than MAXEND. However, the representation for
3873 * the entire file is MAX_U_OFFSET_T.
3874 */
3875 if ((start > max) ||
3876 ((end > max) && (end != MAX_U_OFFSET_T))) {
3877 return (EINVAL);
3878 }
3879 if (start > end) {
3880 return (EINVAL);
3881 }
3882 return (0);
3883 }
3884
3885 /*
3886 * Fill in request->l_flock with information about the lock blocking the
3887 * request. The complexity here is that lock manager requests are allowed
3888 * to see into the upper part of the 32-bit address range, whereas local
3889 * requests are only allowed to see signed values.
3890 *
3891 * What should be done when "blocker" is a lock manager lock that uses the
3892 * upper portion of the 32-bit range, but "request" is local? Since the
3893 * request has already been determined to have been blocked by the blocker,
3894 * at least some portion of "blocker" must be in the range of the request,
3895 * or the request extends to the end of file. For the first case, the
3896 * portion in the lower range is returned with the indication that it goes
3897 * "to EOF." For the second case, the last byte of the lower range is
3898 * returned with the indication that it goes "to EOF."
3899 */
3900
3901 static void
3902 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
3903 {
3904 flock64_t *flrp; /* l_flock portion of request */
3905
3906 ASSERT(blocker != NULL);
3907
3908 flrp = &request->l_flock;
3909 flrp->l_whence = 0;
3910 flrp->l_type = blocker->l_type;
3911 flrp->l_pid = blocker->l_flock.l_pid;
3912 flrp->l_sysid = blocker->l_flock.l_sysid;
3913
3914 if (IS_LOCKMGR(request)) {
3915 flrp->l_start = blocker->l_start;
3916 if (blocker->l_end == MAX_U_OFFSET_T)
3917 flrp->l_len = 0;
3918 else
3919 flrp->l_len = blocker->l_end - blocker->l_start + 1;
3920 } else {
3921 if (blocker->l_start > MAXEND) {
3922 flrp->l_start = MAXEND;
3923 flrp->l_len = 0;
3924 } else {
3925 flrp->l_start = blocker->l_start;
3926 if (blocker->l_end == MAX_U_OFFSET_T)
3927 flrp->l_len = 0;
3928 else
3929 flrp->l_len = blocker->l_end -
3930 blocker->l_start + 1;
3931 }
3932 }
3933 }
3934
3935 /*
3936 * PSARC case 1997/292
3937 */
3938 /*
3939 * This is the public routine exported by flock.h.
3940 */
3941 void
3942 cl_flk_change_nlm_state_to_unknown(int nlmid)
3943 {
3944 /*
3945 * Check to see if node is booted as a cluster. If not, return.
3946 */
3947 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3948 return;
3949 }
3950
3951 /*
3952 * See comment in cl_flk_set_nlm_status().
3953 */
3954 if (nlm_reg_status == NULL) {
3955 return;
3956 }
3957
3958 /*
3959 * protect NLM registry state with a mutex.
3960 */
3961 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3962 mutex_enter(&nlm_reg_lock);
3963 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN);
3964 mutex_exit(&nlm_reg_lock);
3965 }
3966
3967 /*
3968 * Return non-zero if the given I/O request conflicts with an active NBMAND
3969 * lock.
3970 * If svmand is non-zero, it means look at all active locks, not just NBMAND
3971 * locks.
3972 */
3973
3974 int
3975 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset,
3976 ssize_t length, int svmand, caller_context_t *ct)
3977 {
3978 int conflict = 0;
3979 graph_t *gp;
3980 lock_descriptor_t *lock;
3981 pid_t pid;
3982 int sysid;
3983
3984 if (ct == NULL) {
3985 pid = curproc->p_pid;
3986 sysid = 0;
3987 } else {
3988 pid = ct->cc_pid;
3989 sysid = ct->cc_sysid;
3990 }
3991
3992 mutex_enter(&flock_lock);
3993 gp = lock_graph[HASH_INDEX(vp)];
3994 mutex_exit(&flock_lock);
3995 if (gp == NULL)
3996 return (0);
3997
3998 mutex_enter(&gp->gp_mutex);
3999 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4000
4001 for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
4002 if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
4003 (lock->l_flock.l_sysid != sysid ||
4004 lock->l_flock.l_pid != pid) &&
4005 lock_blocks_io(op, offset, length,
4006 lock->l_type, lock->l_start, lock->l_end)) {
4007 conflict = 1;
4008 break;
4009 }
4010 }
4011 mutex_exit(&gp->gp_mutex);
4012
4013 return (conflict);
4014 }
4015
4016 /*
4017 * Return non-zero if the given I/O request conflicts with the given lock.
4018 */
4019
4020 static int
4021 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length,
4022 int lock_type, u_offset_t lock_start, u_offset_t lock_end)
4023 {
4024 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
4025 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
4026
4027 if (op == NBL_READ && lock_type == F_RDLCK)
4028 return (0);
4029
4030 if (offset <= lock_start && lock_start < offset + length)
4031 return (1);
4032 if (lock_start <= offset && offset <= lock_end)
4033 return (1);
4034
4035 return (0);
4036 }
4037
4038 #ifdef DEBUG
4039 static void
4040 check_active_locks(graph_t *gp)
4041 {
4042 lock_descriptor_t *lock, *lock1;
4043 edge_t *ep;
4044
4045 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
4046 lock = lock->l_next) {
4047 ASSERT(IS_ACTIVE(lock));
4048 ASSERT(NOT_BLOCKED(lock));
4049 ASSERT(!IS_BARRIER(lock));
4050
4051 ep = FIRST_IN(lock);
4052
4053 while (ep != HEAD(lock)) {
4054 ASSERT(IS_SLEEPING(ep->from_vertex));
4055 ASSERT(!NOT_BLOCKED(ep->from_vertex));
4056 ep = NEXT_IN(ep);
4057 }
4058
4059 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
4060 lock1 = lock1->l_next) {
4061 if (lock1->l_vnode == lock->l_vnode) {
4062 if (BLOCKS(lock1, lock)) {
4063 cmn_err(CE_PANIC,
4064 "active lock %p blocks %p",
4065 (void *)lock1, (void *)lock);
4066 } else if (BLOCKS(lock, lock1)) {
4067 cmn_err(CE_PANIC,
4068 "active lock %p blocks %p",
4069 (void *)lock, (void *)lock1);
4070 }
4071 }
4072 }
4073 }
4074 }
4075
4076 /*
4077 * Effect: This functions checks to see if the transition from 'old_state' to
4078 * 'new_state' is a valid one. It returns 0 if the transition is valid
4079 * and 1 if it is not.
4080 * For a map of valid transitions, see sys/flock_impl.h
4081 */
4082 static int
4083 check_lock_transition(int old_state, int new_state)
4084 {
4085 switch (old_state) {
4086 case FLK_INITIAL_STATE:
4087 if ((new_state == FLK_START_STATE) ||
4088 (new_state == FLK_SLEEPING_STATE) ||
4089 (new_state == FLK_ACTIVE_STATE) ||
4090 (new_state == FLK_DEAD_STATE)) {
4091 return (0);
4092 } else {
4093 return (1);
4094 }
4095 case FLK_START_STATE:
4096 if ((new_state == FLK_ACTIVE_STATE) ||
4097 (new_state == FLK_DEAD_STATE)) {
4098 return (0);
4099 } else {
4100 return (1);
4101 }
4102 case FLK_ACTIVE_STATE:
4103 if (new_state == FLK_DEAD_STATE) {
4104 return (0);
4105 } else {
4106 return (1);
4107 }
4108 case FLK_SLEEPING_STATE:
4109 if ((new_state == FLK_GRANTED_STATE) ||
4110 (new_state == FLK_INTERRUPTED_STATE) ||
4111 (new_state == FLK_CANCELLED_STATE)) {
4112 return (0);
4113 } else {
4114 return (1);
4115 }
4116 case FLK_GRANTED_STATE:
4117 if ((new_state == FLK_START_STATE) ||
4118 (new_state == FLK_INTERRUPTED_STATE) ||
4119 (new_state == FLK_CANCELLED_STATE)) {
4120 return (0);
4121 } else {
4122 return (1);
4123 }
4124 case FLK_CANCELLED_STATE:
4125 if ((new_state == FLK_INTERRUPTED_STATE) ||
4126 (new_state == FLK_DEAD_STATE)) {
4127 return (0);
4128 } else {
4129 return (1);
4130 }
4131 case FLK_INTERRUPTED_STATE:
4132 if (new_state == FLK_DEAD_STATE) {
4133 return (0);
4134 } else {
4135 return (1);
4136 }
4137 case FLK_DEAD_STATE:
4138 /* May be set more than once */
4139 if (new_state == FLK_DEAD_STATE) {
4140 return (0);
4141 } else {
4142 return (1);
4143 }
4144 default:
4145 return (1);
4146 }
4147 }
4148
4149 static void
4150 check_sleeping_locks(graph_t *gp)
4151 {
4152 lock_descriptor_t *lock1, *lock2;
4153 edge_t *ep;
4154 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
4155 lock1 = lock1->l_next) {
4156 ASSERT(!IS_BARRIER(lock1));
4157 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
4158 lock2 = lock2->l_next) {
4159 if (lock1->l_vnode == lock2->l_vnode) {
4160 if (BLOCKS(lock2, lock1)) {
4161 ASSERT(!IS_GRANTED(lock1));
4162 ASSERT(!NOT_BLOCKED(lock1));
4163 path(lock1, lock2);
4164 }
4165 }
4166 }
4167
4168 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
4169 lock2 = lock2->l_next) {
4170 ASSERT(!IS_BARRIER(lock1));
4171 if (lock1->l_vnode == lock2->l_vnode) {
4172 if (BLOCKS(lock2, lock1)) {
4173 ASSERT(!IS_GRANTED(lock1));
4174 ASSERT(!NOT_BLOCKED(lock1));
4175 path(lock1, lock2);
4176 }
4177 }
4178 }
4179 ep = FIRST_ADJ(lock1);
4180 while (ep != HEAD(lock1)) {
4181 ASSERT(BLOCKS(ep->to_vertex, lock1));
4182 ep = NEXT_ADJ(ep);
4183 }
4184 }
4185 }
4186
4187 static int
4188 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
4189 {
4190 edge_t *ep;
4191 lock_descriptor_t *vertex;
4192 lock_descriptor_t *vertex_stack;
4193
4194 STACK_INIT(vertex_stack);
4195
4196 flk_graph_uncolor(lock1->l_graph);
4197 ep = FIRST_ADJ(lock1);
4198 ASSERT(ep != HEAD(lock1));
4199 while (ep != HEAD(lock1)) {
4200 if (no_path)
4201 ASSERT(ep->to_vertex != lock2);
4202 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4203 COLOR(ep->to_vertex);
4204 ep = NEXT_ADJ(ep);
4205 }
4206
4207 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
4208 STACK_POP(vertex_stack, l_dstack);
4209 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
4210 ep = NEXT_ADJ(ep)) {
4211 if (COLORED(ep->to_vertex))
4212 continue;
4213 COLOR(ep->to_vertex);
4214 if (ep->to_vertex == lock2)
4215 return (1);
4216
4217 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4218 }
4219 }
4220 return (0);
4221 }
4222
4223 static void
4224 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
4225 {
4226 lock_descriptor_t *lock;
4227
4228 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4229
4230 if (lock) {
4231 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
4232 if (lock->l_flock.l_pid == pid &&
4233 lock->l_flock.l_sysid == sysid)
4234 cmn_err(CE_PANIC,
4235 "owner pid %d's lock %p in active queue",
4236 pid, (void *)lock);
4237 lock = lock->l_next;
4238 }
4239 }
4240 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
4241
4242 if (lock) {
4243 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
4244 if (lock->l_flock.l_pid == pid &&
4245 lock->l_flock.l_sysid == sysid)
4246 cmn_err(CE_PANIC,
4247 "owner pid %d's lock %p in sleep queue",
4248 pid, (void *)lock);
4249 lock = lock->l_next;
4250 }
4251 }
4252 }
4253
4254 static int
4255 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4256 {
4257 edge_t *ep = FIRST_ADJ(lock1);
4258
4259 while (ep != HEAD(lock1)) {
4260 if (ep->to_vertex == lock2)
4261 return (1);
4262 else
4263 ep = NEXT_ADJ(ep);
4264 }
4265 return (0);
4266 }
4267
4268 static int
4269 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4270 {
4271 return (!level_two_path(lock1, lock2, 1));
4272 }
4273
4274 static void
4275 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4276 {
4277 if (level_one_path(lock1, lock2)) {
4278 if (level_two_path(lock1, lock2, 0) != 0) {
4279 cmn_err(CE_WARN,
4280 "one edge one path from lock1 %p lock2 %p",
4281 (void *)lock1, (void *)lock2);
4282 }
4283 } else if (no_path(lock1, lock2)) {
4284 cmn_err(CE_PANIC,
4285 "No path from lock1 %p to lock2 %p",
4286 (void *)lock1, (void *)lock2);
4287 }
4288 }
4289 #endif /* DEBUG */