Print this page
195 Need replacement for nfs/lockd+klm
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/flock.c
+++ new/usr/src/uts/common/os/flock.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
20 20 */
21 21
22 22 /*
23 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 28 /* All Rights Reserved */
29 29
30 +/*
31 + * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
32 + */
33 +
30 34 #include <sys/flock_impl.h>
31 35 #include <sys/vfs.h>
32 36 #include <sys/t_lock.h> /* for <sys/callb.h> */
33 37 #include <sys/callb.h>
34 38 #include <sys/clconf.h>
35 39 #include <sys/cladm.h>
36 40 #include <sys/nbmlock.h>
37 41 #include <sys/cred.h>
38 42 #include <sys/policy.h>
39 43
40 44 /*
41 45 * The following four variables are for statistics purposes and they are
42 46 * not protected by locks. They may not be accurate but will at least be
43 47 * close to the actual value.
44 48 */
45 49
46 50 int flk_lock_allocs;
47 51 int flk_lock_frees;
48 52 int edge_allocs;
49 53 int edge_frees;
50 54 int flk_proc_vertex_allocs;
51 55 int flk_proc_edge_allocs;
52 56 int flk_proc_vertex_frees;
53 57 int flk_proc_edge_frees;
54 58
55 59 static kmutex_t flock_lock;
56 60
57 61 #ifdef DEBUG
58 62 int check_debug = 0;
59 63 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \
60 64 check_active_locks(gp);
61 65 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \
62 66 check_sleeping_locks(gp);
63 67 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \
64 68 if (check_debug) \
65 69 check_owner_locks(gp, pid, sysid, vp);
66 70 #define CHECK_LOCK_TRANSITION(old_state, new_state) \
67 71 { \
68 72 if (check_lock_transition(old_state, new_state)) { \
69 73 cmn_err(CE_PANIC, "Illegal lock transition \
70 74 from %d to %d", old_state, new_state); \
71 75 } \
72 76 }
73 77 #else
74 78
75 79 #define CHECK_ACTIVE_LOCKS(gp)
76 80 #define CHECK_SLEEPING_LOCKS(gp)
77 81 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
78 82 #define CHECK_LOCK_TRANSITION(old_state, new_state)
79 83
80 84 #endif /* DEBUG */
81 85
82 86 struct kmem_cache *flk_edge_cache;
83 87
84 88 graph_t *lock_graph[HASH_SIZE];
85 89 proc_graph_t pgraph;
86 90
87 91 /*
88 92 * Clustering.
89 93 *
90 94 * NLM REGISTRY TYPE IMPLEMENTATION
91 95 *
92 96 * Assumptions:
93 97 * 1. Nodes in a cluster are numbered starting at 1; always non-negative
94 98 * integers; maximum node id is returned by clconf_maximum_nodeid().
95 99 * 2. We use this node id to identify the node an NLM server runs on.
96 100 */
97 101
98 102 /*
99 103 * NLM registry object keeps track of NLM servers via their
100 104 * nlmids (which are the node ids of the node in the cluster they run on)
101 105 * that have requested locks at this LLM with which this registry is
102 106 * associated.
103 107 *
104 108 * Representation of abstraction:
105 109 * rep = record[ states: array[nlm_state],
106 110 * lock: mutex]
107 111 *
108 112 * Representation invariants:
109 113 * 1. index i of rep.states is between 0 and n - 1 where n is number
110 114 * of elements in the array, which happen to be the maximum number
111 115 * of nodes in the cluster configuration + 1.
112 116 * 2. map nlmid to index i of rep.states
113 117 * 0 -> 0
114 118 * 1 -> 1
115 119 * 2 -> 2
116 120 * n-1 -> clconf_maximum_nodeid()+1
117 121 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting
118 122 * from forgetting to subtract 1 from the index.
119 123 * 4. The reason we keep the 0th index is the following. A legitimate
120 124 * cluster configuration includes making a UFS file system NFS
121 125 * exportable. The code is structured so that if you're in a cluster
122 126 * you do one thing; otherwise, you do something else. The problem
123 127 * is what to do if you think you're in a cluster with PXFS loaded,
124 128 * but you're using UFS not PXFS? The upper two bytes of the sysid
125 129 * encode the node id of the node where NLM server runs; these bytes
126 130 * are zero for UFS. Since the nodeid is used to index into the
127 131 * registry, we can record the NLM server state information at index
128 132 * 0 using the same mechanism used for PXFS file locks!
129 133 */
130 134 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */
131 135 static kmutex_t nlm_reg_lock; /* lock to protect arrary */
132 136 static uint_t nlm_status_size; /* size of state array */
133 137
134 138 /*
135 139 * Although we need a global lock dependency graph (and associated data
136 140 * structures), we also need a per-zone notion of whether the lock manager is
137 141 * running, and so whether to allow lock manager requests or not.
138 142 *
139 143 * Thus, on a per-zone basis we maintain a ``global'' variable
140 144 * (flk_lockmgr_status), protected by flock_lock, and set when the lock
141 145 * manager is determined to be changing state (starting or stopping).
142 146 *
143 147 * Each graph/zone pair also has a copy of this variable, which is protected by
144 148 * the graph's mutex.
145 149 *
146 150 * The per-graph copies are used to synchronize lock requests with shutdown
147 151 * requests. The global copy is used to initialize the per-graph field when a
148 152 * new graph is created.
149 153 */
150 154 struct flock_globals {
151 155 flk_lockmgr_status_t flk_lockmgr_status;
152 156 flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
153 157 };
154 158
155 159 zone_key_t flock_zone_key;
156 160
157 161 static void create_flock(lock_descriptor_t *, flock64_t *);
158 162 static lock_descriptor_t *flk_get_lock(void);
159 163 static void flk_free_lock(lock_descriptor_t *lock);
160 164 static void flk_get_first_blocking_lock(lock_descriptor_t *request);
161 165 static int flk_process_request(lock_descriptor_t *);
162 166 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
163 167 static edge_t *flk_get_edge(void);
164 168 static int flk_wait_execute_request(lock_descriptor_t *);
165 169 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
166 170 static void flk_insert_active_lock(lock_descriptor_t *);
167 171 static void flk_delete_active_lock(lock_descriptor_t *, int);
168 172 static void flk_insert_sleeping_lock(lock_descriptor_t *);
169 173 static void flk_graph_uncolor(graph_t *);
170 174 static void flk_wakeup(lock_descriptor_t *, int);
171 175 static void flk_free_edge(edge_t *);
172 176 static void flk_recompute_dependencies(lock_descriptor_t *,
173 177 lock_descriptor_t **, int, int);
174 178 static int flk_find_barriers(lock_descriptor_t *);
175 179 static void flk_update_barriers(lock_descriptor_t *);
176 180 static int flk_color_reachables(lock_descriptor_t *);
177 181 static int flk_canceled(lock_descriptor_t *);
178 182 static void flk_delete_locks_by_sysid(lock_descriptor_t *);
179 183 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
180 184 static void wait_for_lock(lock_descriptor_t *);
181 185 static void unlock_lockmgr_granted(struct flock_globals *);
182 186 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
183 187
184 188 /* Clustering hooks */
185 189 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t);
186 190 static void cl_flk_wakeup_sleeping_nlm_locks(int);
187 191 static void cl_flk_unlock_nlm_granted(int);
188 192
189 193 #ifdef DEBUG
190 194 static int check_lock_transition(int, int);
191 195 static void check_sleeping_locks(graph_t *);
192 196 static void check_active_locks(graph_t *);
193 197 static int no_path(lock_descriptor_t *, lock_descriptor_t *);
194 198 static void path(lock_descriptor_t *, lock_descriptor_t *);
195 199 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
196 200 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
197 201 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
198 202 #endif
199 203
200 204 /* proc_graph function definitions */
201 205 static int flk_check_deadlock(lock_descriptor_t *);
202 206 static void flk_proc_graph_uncolor(void);
203 207 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
204 208 static proc_edge_t *flk_get_proc_edge(void);
205 209 static void flk_proc_release(proc_vertex_t *);
206 210 static void flk_free_proc_edge(proc_edge_t *);
207 211 static void flk_update_proc_graph(edge_t *, int);
208 212
209 213 /* Non-blocking mandatory locking */
210 214 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t,
211 215 u_offset_t);
212 216
213 217 static struct flock_globals *
214 218 flk_get_globals(void)
215 219 {
216 220 /*
217 221 * The KLM module had better be loaded if we're attempting to handle
218 222 * lockmgr requests.
219 223 */
220 224 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
221 225 return (zone_getspecific(flock_zone_key, curproc->p_zone));
222 226 }
223 227
224 228 static flk_lockmgr_status_t
225 229 flk_get_lockmgr_status(void)
226 230 {
227 231 struct flock_globals *fg;
228 232
229 233 ASSERT(MUTEX_HELD(&flock_lock));
230 234
231 235 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
232 236 /*
233 237 * KLM module not loaded; lock manager definitely not running.
234 238 */
235 239 return (FLK_LOCKMGR_DOWN);
236 240 }
237 241 fg = flk_get_globals();
238 242 return (fg->flk_lockmgr_status);
239 243 }
240 244
241 245 /*
242 246 * Routine called from fs_frlock in fs/fs_subr.c
243 247 */
244 248
245 249 int
246 250 reclock(vnode_t *vp,
247 251 flock64_t *lckdat,
248 252 int cmd,
249 253 int flag,
250 254 u_offset_t offset,
251 255 flk_callback_t *flk_cbp)
252 256 {
253 257 lock_descriptor_t stack_lock_request;
254 258 lock_descriptor_t *lock_request;
255 259 int error = 0;
256 260 graph_t *gp;
257 261 int nlmid;
258 262
259 263 /*
260 264 * Check access permissions
261 265 */
262 266 if ((cmd & SETFLCK) &&
263 267 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
264 268 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
265 269 return (EBADF);
266 270
267 271 /*
268 272 * for query and unlock we use the stack_lock_request
269 273 */
270 274
271 275 if ((lckdat->l_type == F_UNLCK) ||
272 276 !((cmd & INOFLCK) || (cmd & SETFLCK))) {
273 277 lock_request = &stack_lock_request;
274 278 (void) bzero((caddr_t)lock_request,
275 279 sizeof (lock_descriptor_t));
276 280
277 281 /*
278 282 * following is added to make the assertions in
279 283 * flk_execute_request() to pass through
280 284 */
281 285
282 286 lock_request->l_edge.edge_in_next = &lock_request->l_edge;
283 287 lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
284 288 lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
285 289 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
286 290 lock_request->l_status = FLK_INITIAL_STATE;
287 291 } else {
288 292 lock_request = flk_get_lock();
289 293 }
290 294 lock_request->l_state = 0;
291 295 lock_request->l_vnode = vp;
292 296 lock_request->l_zoneid = getzoneid();
293 297
294 298 /*
295 299 * Convert the request range into the canonical start and end
296 300 * values. The NLM protocol supports locking over the entire
297 301 * 32-bit range, so there's no range checking for remote requests,
298 302 * but we still need to verify that local requests obey the rules.
299 303 */
300 304 /* Clustering */
301 305 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) {
302 306 ASSERT(lckdat->l_whence == 0);
303 307 lock_request->l_start = lckdat->l_start;
304 308 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
305 309 lckdat->l_start + (lckdat->l_len - 1);
306 310 } else {
307 311 /* check the validity of the lock range */
308 312 error = flk_convert_lock_data(vp, lckdat,
309 313 &lock_request->l_start, &lock_request->l_end,
310 314 offset);
311 315 if (error) {
312 316 goto done;
313 317 }
314 318 error = flk_check_lock_data(lock_request->l_start,
315 319 lock_request->l_end, MAXEND);
316 320 if (error) {
317 321 goto done;
318 322 }
319 323 }
320 324
321 325 ASSERT(lock_request->l_end >= lock_request->l_start);
322 326
323 327 lock_request->l_type = lckdat->l_type;
324 328 if (cmd & INOFLCK)
325 329 lock_request->l_state |= IO_LOCK;
326 330 if (cmd & SLPFLCK)
327 331 lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
328 332 if (cmd & RCMDLCK)
329 333 lock_request->l_state |= LOCKMGR_LOCK;
330 334 if (cmd & NBMLCK)
331 335 lock_request->l_state |= NBMAND_LOCK;
332 336 /*
333 337 * Clustering: set flag for PXFS locks
334 338 * We do not _only_ check for the PCMDLCK flag because PXFS locks could
335 339 * also be of type 'RCMDLCK'.
336 340 * We do not _only_ check the GETPXFSID() macro because local PXFS
337 341 * clients use a pxfsid of zero to permit deadlock detection in the LLM.
338 342 */
339 343
340 344 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) {
341 345 lock_request->l_state |= PXFS_LOCK;
342 346 }
343 347 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
344 348 if (lock_request->l_type == F_RDLCK ||
345 349 lock_request->l_type == F_WRLCK)
346 350 lock_request->l_state |= QUERY_LOCK;
347 351 }
348 352 lock_request->l_flock = (*lckdat);
349 353 lock_request->l_callbacks = flk_cbp;
350 354
351 355 /*
352 356 * We are ready for processing the request
353 357 */
354 358 if (IS_LOCKMGR(lock_request)) {
355 359 /*
356 360 * If the lock request is an NLM server request ....
357 361 */
358 362 if (nlm_status_size == 0) { /* not booted as cluster */
359 363 mutex_enter(&flock_lock);
360 364 /*
361 365 * Bail out if this is a lock manager request and the
362 366 * lock manager is not supposed to be running.
363 367 */
364 368 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
365 369 mutex_exit(&flock_lock);
366 370 error = ENOLCK;
367 371 goto done;
368 372 }
369 373 mutex_exit(&flock_lock);
370 374 } else { /* booted as a cluster */
371 375 nlmid = GETNLMID(lock_request->l_flock.l_sysid);
372 376 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
373 377
374 378 mutex_enter(&nlm_reg_lock);
375 379 /*
376 380 * If the NLM registry does not know about this
377 381 * NLM server making the request, add its nlmid
378 382 * to the registry.
379 383 */
380 384 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
381 385 nlmid)) {
382 386 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
383 387 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
384 388 nlmid)) {
385 389 /*
386 390 * If the NLM server is already known (has made
387 391 * previous lock requests) and its state is
388 392 * not NLM_UP (means that NLM server is
389 393 * shutting down), then bail out with an
390 394 * error to deny the lock request.
391 395 */
392 396 mutex_exit(&nlm_reg_lock);
393 397 error = ENOLCK;
394 398 goto done;
395 399 }
396 400 mutex_exit(&nlm_reg_lock);
397 401 }
398 402 }
399 403
400 404 /* Now get the lock graph for a particular vnode */
401 405 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
402 406
403 407 /*
404 408 * We drop rwlock here otherwise this might end up causing a
405 409 * deadlock if this IOLOCK sleeps. (bugid # 1183392).
406 410 */
407 411
408 412 if (IS_IO_LOCK(lock_request)) {
409 413 VOP_RWUNLOCK(vp,
410 414 (lock_request->l_type == F_RDLCK) ?
411 415 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
412 416 }
413 417 mutex_enter(&gp->gp_mutex);
414 418
415 419 lock_request->l_state |= REFERENCED_LOCK;
416 420 lock_request->l_graph = gp;
417 421
418 422 switch (lock_request->l_type) {
419 423 case F_RDLCK:
420 424 case F_WRLCK:
421 425 if (IS_QUERY_LOCK(lock_request)) {
422 426 flk_get_first_blocking_lock(lock_request);
423 427 (*lckdat) = lock_request->l_flock;
424 428 break;
425 429 }
426 430
427 431 /* process the request now */
428 432
429 433 error = flk_process_request(lock_request);
430 434 break;
431 435
432 436 case F_UNLCK:
433 437 /* unlock request will not block so execute it immediately */
434 438
435 439 if (IS_LOCKMGR(lock_request) &&
436 440 flk_canceled(lock_request)) {
437 441 error = 0;
438 442 } else {
439 443 error = flk_execute_request(lock_request);
440 444 }
441 445 break;
442 446
443 447 case F_UNLKSYS:
444 448 /*
445 449 * Recovery mechanism to release lock manager locks when
446 450 * NFS client crashes and restart. NFS server will clear
447 451 * old locks and grant new locks.
448 452 */
449 453
450 454 if (lock_request->l_flock.l_sysid == 0) {
451 455 mutex_exit(&gp->gp_mutex);
452 456 return (EINVAL);
453 457 }
454 458 if (secpolicy_nfs(CRED()) != 0) {
455 459 mutex_exit(&gp->gp_mutex);
456 460 return (EPERM);
457 461 }
458 462 flk_delete_locks_by_sysid(lock_request);
459 463 lock_request->l_state &= ~REFERENCED_LOCK;
460 464 flk_set_state(lock_request, FLK_DEAD_STATE);
461 465 flk_free_lock(lock_request);
462 466 mutex_exit(&gp->gp_mutex);
463 467 return (0);
464 468
465 469 default:
466 470 error = EINVAL;
467 471 break;
468 472 }
469 473
470 474 /* Clustering: For blocked PXFS locks, return */
471 475 if (error == PXFS_LOCK_BLOCKED) {
472 476 lock_request->l_state &= ~REFERENCED_LOCK;
473 477 mutex_exit(&gp->gp_mutex);
474 478 return (error);
475 479 }
476 480
477 481 /*
478 482 * Now that we have seen the status of locks in the system for
479 483 * this vnode we acquire the rwlock if it is an IO_LOCK.
480 484 */
481 485
482 486 if (IS_IO_LOCK(lock_request)) {
483 487 (void) VOP_RWLOCK(vp,
484 488 (lock_request->l_type == F_RDLCK) ?
485 489 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
486 490 if (!error) {
487 491 lckdat->l_type = F_UNLCK;
488 492
489 493 /*
490 494 * This wake up is needed otherwise
491 495 * if IO_LOCK has slept the dependents on this
492 496 * will not be woken up at all. (bugid # 1185482).
493 497 */
494 498
495 499 flk_wakeup(lock_request, 1);
496 500 flk_set_state(lock_request, FLK_DEAD_STATE);
497 501 flk_free_lock(lock_request);
498 502 }
499 503 /*
500 504 * else if error had occurred either flk_process_request()
501 505 * has returned EDEADLK in which case there will be no
502 506 * dependents for this lock or EINTR from flk_wait_execute_
503 507 * request() in which case flk_cancel_sleeping_lock()
504 508 * would have been done. same is true with EBADF.
505 509 */
506 510 }
507 511
508 512 if (lock_request == &stack_lock_request) {
509 513 flk_set_state(lock_request, FLK_DEAD_STATE);
510 514 } else {
511 515 lock_request->l_state &= ~REFERENCED_LOCK;
512 516 if ((error != 0) || IS_DELETED(lock_request)) {
513 517 flk_set_state(lock_request, FLK_DEAD_STATE);
514 518 flk_free_lock(lock_request);
515 519 }
516 520 }
517 521
518 522 mutex_exit(&gp->gp_mutex);
519 523 return (error);
520 524
521 525 done:
522 526 flk_set_state(lock_request, FLK_DEAD_STATE);
523 527 if (lock_request != &stack_lock_request)
524 528 flk_free_lock(lock_request);
525 529 return (error);
526 530 }
527 531
528 532 /*
529 533 * Invoke the callbacks in the given list. If before sleeping, invoke in
530 534 * list order. If after sleeping, invoke in reverse order.
531 535 *
532 536 * CPR (suspend/resume) support: if one of the callbacks returns a
533 537 * callb_cpr_t, return it. This will be used to make the thread CPR-safe
534 538 * while it is sleeping. There should be at most one callb_cpr_t for the
535 539 * thread.
536 540 * XXX This is unnecessarily complicated. The CPR information should just
537 541 * get passed in directly through VOP_FRLOCK and reclock, rather than
538 542 * sneaking it in via a callback.
539 543 */
540 544
541 545 callb_cpr_t *
542 546 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
543 547 {
544 548 callb_cpr_t *cpr_callbackp = NULL;
545 549 callb_cpr_t *one_result;
546 550 flk_callback_t *cb;
547 551
548 552 if (cblist == NULL)
549 553 return (NULL);
550 554
551 555 if (when == FLK_BEFORE_SLEEP) {
552 556 cb = cblist;
553 557 do {
554 558 one_result = (*cb->cb_callback)(when, cb->cb_data);
555 559 if (one_result != NULL) {
556 560 ASSERT(cpr_callbackp == NULL);
557 561 cpr_callbackp = one_result;
558 562 }
559 563 cb = cb->cb_next;
560 564 } while (cb != cblist);
561 565 } else {
562 566 cb = cblist->cb_prev;
563 567 do {
564 568 one_result = (*cb->cb_callback)(when, cb->cb_data);
565 569 if (one_result != NULL) {
566 570 cpr_callbackp = one_result;
567 571 }
568 572 cb = cb->cb_prev;
569 573 } while (cb != cblist->cb_prev);
570 574 }
571 575
572 576 return (cpr_callbackp);
573 577 }
574 578
575 579 /*
576 580 * Initialize a flk_callback_t to hold the given callback.
577 581 */
578 582
579 583 void
580 584 flk_init_callback(flk_callback_t *flk_cb,
581 585 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
582 586 {
583 587 flk_cb->cb_next = flk_cb;
584 588 flk_cb->cb_prev = flk_cb;
585 589 flk_cb->cb_callback = cb_fcn;
586 590 flk_cb->cb_data = cbdata;
587 591 }
588 592
589 593 /*
590 594 * Initialize an flk_callback_t and then link it into the head of an
591 595 * existing list (which may be NULL).
592 596 */
593 597
594 598 void
595 599 flk_add_callback(flk_callback_t *newcb,
596 600 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
597 601 void *cbdata, flk_callback_t *cblist)
598 602 {
599 603 flk_init_callback(newcb, cb_fcn, cbdata);
600 604
601 605 if (cblist == NULL)
602 606 return;
603 607
604 608 newcb->cb_prev = cblist->cb_prev;
605 609 newcb->cb_next = cblist;
606 610 cblist->cb_prev->cb_next = newcb;
607 611 cblist->cb_prev = newcb;
608 612 }
609 613
610 614 /*
611 615 * Initialize the flk_edge_cache data structure and create the
612 616 * nlm_reg_status array.
613 617 */
614 618
615 619 void
616 620 flk_init(void)
617 621 {
618 622 uint_t i;
619 623
620 624 flk_edge_cache = kmem_cache_create("flk_edges",
621 625 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
622 626 if (flk_edge_cache == NULL) {
623 627 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
624 628 }
625 629 /*
626 630 * Create the NLM registry object.
627 631 */
628 632
629 633 if (cluster_bootflags & CLUSTER_BOOTED) {
630 634 /*
631 635 * This routine tells you the maximum node id that will be used
632 636 * in the cluster. This number will be the size of the nlm
633 637 * registry status array. We add 1 because we will be using
634 638 * all entries indexed from 0 to maxnodeid; e.g., from 0
635 639 * to 64, for a total of 65 entries.
636 640 */
637 641 nlm_status_size = clconf_maximum_nodeid() + 1;
638 642 } else {
639 643 nlm_status_size = 0;
640 644 }
641 645
642 646 if (nlm_status_size != 0) { /* booted as a cluster */
643 647 nlm_reg_status = (flk_nlm_status_t *)
644 648 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size,
645 649 KM_SLEEP);
646 650
647 651 /* initialize all NLM states in array to NLM_UNKNOWN */
648 652 for (i = 0; i < nlm_status_size; i++) {
649 653 nlm_reg_status[i] = FLK_NLM_UNKNOWN;
650 654 }
651 655 }
652 656 }
653 657
654 658 /*
655 659 * Zone constructor/destructor callbacks to be executed when a zone is
656 660 * created/destroyed.
657 661 */
658 662 /* ARGSUSED */
659 663 void *
660 664 flk_zone_init(zoneid_t zoneid)
661 665 {
662 666 struct flock_globals *fg;
663 667 uint_t i;
664 668
665 669 fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
666 670 fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
667 671 for (i = 0; i < HASH_SIZE; i++)
668 672 fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
669 673 return (fg);
670 674 }
671 675
672 676 /* ARGSUSED */
673 677 void
674 678 flk_zone_fini(zoneid_t zoneid, void *data)
675 679 {
676 680 struct flock_globals *fg = data;
677 681
678 682 kmem_free(fg, sizeof (*fg));
679 683 }
680 684
681 685 /*
682 686 * Get a lock_descriptor structure with initialization of edge lists.
683 687 */
684 688
685 689 static lock_descriptor_t *
686 690 flk_get_lock(void)
687 691 {
688 692 lock_descriptor_t *l;
689 693
690 694 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
691 695
692 696 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
693 697 l->l_edge.edge_in_next = &l->l_edge;
694 698 l->l_edge.edge_in_prev = &l->l_edge;
695 699 l->l_edge.edge_adj_next = &l->l_edge;
696 700 l->l_edge.edge_adj_prev = &l->l_edge;
697 701 l->pvertex = -1;
698 702 l->l_status = FLK_INITIAL_STATE;
699 703 flk_lock_allocs++;
700 704 return (l);
701 705 }
702 706
703 707 /*
704 708 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag
705 709 * when some thread has a reference to it as in reclock().
706 710 */
707 711
708 712 void
709 713 flk_free_lock(lock_descriptor_t *lock)
710 714 {
711 715 ASSERT(IS_DEAD(lock));
712 716 if (IS_REFERENCED(lock)) {
713 717 lock->l_state |= DELETED_LOCK;
714 718 return;
715 719 }
716 720 flk_lock_frees++;
717 721 kmem_free((void *)lock, sizeof (lock_descriptor_t));
718 722 }
719 723
720 724 void
721 725 flk_set_state(lock_descriptor_t *lock, int new_state)
722 726 {
723 727 /*
724 728 * Locks in the sleeping list may be woken up in a number of ways,
725 729 * and more than once. If a sleeping lock is signaled awake more
726 730 * than once, then it may or may not change state depending on its
727 731 * current state.
728 732 * Also note that NLM locks that are sleeping could be moved to an
729 733 * interrupted state more than once if the unlock request is
730 734 * retransmitted by the NLM client - the second time around, this is
731 735 * just a nop.
732 736 * The ordering of being signaled awake is:
733 737 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE.
734 738 * The checks below implement this ordering.
735 739 */
736 740 if (IS_INTERRUPTED(lock)) {
737 741 if ((new_state == FLK_CANCELLED_STATE) ||
738 742 (new_state == FLK_GRANTED_STATE) ||
739 743 (new_state == FLK_INTERRUPTED_STATE)) {
740 744 return;
741 745 }
742 746 }
743 747 if (IS_CANCELLED(lock)) {
744 748 if ((new_state == FLK_GRANTED_STATE) ||
745 749 (new_state == FLK_CANCELLED_STATE)) {
746 750 return;
747 751 }
748 752 }
749 753 CHECK_LOCK_TRANSITION(lock->l_status, new_state);
750 754 if (IS_PXFS(lock)) {
751 755 cl_flk_state_transition_notify(lock, lock->l_status, new_state);
752 756 }
753 757 lock->l_status = new_state;
754 758 }
755 759
756 760 /*
757 761 * Routine that checks whether there are any blocking locks in the system.
758 762 *
759 763 * The policy followed is if a write lock is sleeping we don't allow read
760 764 * locks before this write lock even though there may not be any active
761 765 * locks corresponding to the read locks' region.
762 766 *
763 767 * flk_add_edge() function adds an edge between l1 and l2 iff there
764 768 * is no path between l1 and l2. This is done to have a "minimum
765 769 * storage representation" of the dependency graph.
766 770 *
767 771 * Another property of the graph is since only the new request throws
768 772 * edges to the existing locks in the graph, the graph is always topologically
769 773 * ordered.
770 774 */
771 775
772 776 static int
773 777 flk_process_request(lock_descriptor_t *request)
774 778 {
775 779 graph_t *gp = request->l_graph;
776 780 lock_descriptor_t *lock;
777 781 int request_blocked_by_active = 0;
778 782 int request_blocked_by_granted = 0;
779 783 int request_blocked_by_sleeping = 0;
780 784 vnode_t *vp = request->l_vnode;
781 785 int error = 0;
782 786 int request_will_wait = 0;
783 787 int found_covering_lock = 0;
784 788 lock_descriptor_t *covered_by = NULL;
785 789
786 790 ASSERT(MUTEX_HELD(&gp->gp_mutex));
787 791 request_will_wait = IS_WILLING_TO_SLEEP(request);
788 792
789 793 /*
790 794 * check active locks
791 795 */
792 796
793 797 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
794 798
795 799
796 800 if (lock) {
797 801 do {
798 802 if (BLOCKS(lock, request)) {
799 803 if (!request_will_wait)
800 804 return (EAGAIN);
801 805 request_blocked_by_active = 1;
802 806 break;
803 807 }
804 808 /*
805 809 * Grant lock if it is for the same owner holding active
806 810 * lock that covers the request.
807 811 */
808 812
809 813 if (SAME_OWNER(lock, request) &&
810 814 COVERS(lock, request) &&
811 815 (request->l_type == F_RDLCK))
812 816 return (flk_execute_request(request));
813 817 lock = lock->l_next;
814 818 } while (lock->l_vnode == vp);
815 819 }
816 820
817 821 if (!request_blocked_by_active) {
818 822 lock_descriptor_t *lk[1];
819 823 lock_descriptor_t *first_glock = NULL;
820 824 /*
821 825 * Shall we grant this?! NO!!
822 826 * What about those locks that were just granted and still
823 827 * in sleep queue. Those threads are woken up and so locks
824 828 * are almost active.
825 829 */
826 830 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
827 831 if (lock) {
828 832 do {
829 833 if (BLOCKS(lock, request)) {
830 834 if (IS_GRANTED(lock)) {
831 835 request_blocked_by_granted = 1;
832 836 } else {
833 837 request_blocked_by_sleeping = 1;
834 838 }
835 839 }
836 840
837 841 lock = lock->l_next;
838 842 } while ((lock->l_vnode == vp));
839 843 first_glock = lock->l_prev;
840 844 ASSERT(first_glock->l_vnode == vp);
841 845 }
842 846
843 847 if (request_blocked_by_granted)
844 848 goto block;
845 849
846 850 if (!request_blocked_by_sleeping) {
847 851 /*
848 852 * If the request isn't going to be blocked by a
849 853 * sleeping request, we know that it isn't going to
850 854 * be blocked; we can just execute the request --
851 855 * without performing costly deadlock detection.
852 856 */
853 857 ASSERT(!request_blocked_by_active);
854 858 return (flk_execute_request(request));
855 859 } else if (request->l_type == F_RDLCK) {
856 860 /*
857 861 * If we have a sleeping writer in the requested
858 862 * lock's range, block.
859 863 */
860 864 goto block;
861 865 }
862 866
863 867 lk[0] = request;
864 868 request->l_state |= RECOMPUTE_LOCK;
865 869 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
866 870 if (lock) {
867 871 do {
868 872 flk_recompute_dependencies(lock, lk, 1, 0);
869 873 lock = lock->l_next;
870 874 } while (lock->l_vnode == vp);
871 875 }
872 876 lock = first_glock;
873 877 if (lock) {
874 878 do {
875 879 if (IS_GRANTED(lock)) {
876 880 flk_recompute_dependencies(lock, lk, 1, 0);
877 881 }
878 882 lock = lock->l_prev;
879 883 } while ((lock->l_vnode == vp));
880 884 }
881 885 request->l_state &= ~RECOMPUTE_LOCK;
882 886 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
883 887 return (EDEADLK);
884 888 return (flk_execute_request(request));
885 889 }
886 890
887 891 block:
888 892 if (request_will_wait)
889 893 flk_graph_uncolor(gp);
890 894
891 895 /* check sleeping locks */
892 896
893 897 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
894 898
895 899 /*
896 900 * If we find a sleeping write lock that is a superset of the
897 901 * region wanted by request we can be assured that by adding an
898 902 * edge to this write lock we have paths to all locks in the
899 903 * graph that blocks the request except in one case and that is why
900 904 * another check for SAME_OWNER in the loop below. The exception
901 905 * case is when this process that owns the sleeping write lock 'l1'
902 906 * has other locks l2, l3, l4 that are in the system and arrived
903 907 * before l1. l1 does not have path to these locks as they are from
904 908 * same process. We break when we find a second covering sleeping
905 909 * lock l5 owned by a process different from that owning l1, because
906 910 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if
907 911 * it has l1 would have produced a deadlock already.
908 912 */
909 913
910 914 if (lock) {
911 915 do {
912 916 if (BLOCKS(lock, request)) {
913 917 if (!request_will_wait)
914 918 return (EAGAIN);
915 919 if (COVERS(lock, request) &&
916 920 lock->l_type == F_WRLCK) {
917 921 if (found_covering_lock &&
918 922 !SAME_OWNER(lock, covered_by)) {
919 923 found_covering_lock++;
920 924 break;
921 925 }
922 926 found_covering_lock = 1;
923 927 covered_by = lock;
924 928 }
925 929 if (found_covering_lock &&
926 930 !SAME_OWNER(lock, covered_by)) {
927 931 lock = lock->l_next;
928 932 continue;
929 933 }
930 934 if ((error = flk_add_edge(request, lock,
931 935 !found_covering_lock, 0)))
932 936 return (error);
933 937 }
934 938 lock = lock->l_next;
935 939 } while (lock->l_vnode == vp);
936 940 }
937 941
938 942 /*
939 943 * found_covering_lock == 2 iff at this point 'request' has paths
940 944 * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
941 945 * point 'request' has paths to all locks that blocks 'request' whose owners
942 946 * are not same as the one that covers 'request' (covered_by above) and
943 947 * we can have locks whose owner is same as covered_by in the active list.
944 948 */
945 949
946 950 if (request_blocked_by_active && found_covering_lock != 2) {
947 951 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
948 952 ASSERT(lock != NULL);
949 953 do {
950 954 if (BLOCKS(lock, request)) {
951 955 if (found_covering_lock &&
952 956 !SAME_OWNER(lock, covered_by)) {
953 957 lock = lock->l_next;
954 958 continue;
955 959 }
956 960 if ((error = flk_add_edge(request, lock,
957 961 CHECK_CYCLE, 0)))
958 962 return (error);
959 963 }
960 964 lock = lock->l_next;
961 965 } while (lock->l_vnode == vp);
962 966 }
963 967
964 968 if (NOT_BLOCKED(request)) {
965 969 /*
966 970 * request not dependent on any other locks
967 971 * so execute this request
968 972 */
969 973 return (flk_execute_request(request));
970 974 } else {
971 975 /*
972 976 * check for deadlock
973 977 */
974 978 if (flk_check_deadlock(request))
975 979 return (EDEADLK);
976 980 /*
977 981 * this thread has to sleep
978 982 */
979 983 return (flk_wait_execute_request(request));
980 984 }
981 985 }
982 986
983 987 /*
984 988 * The actual execution of the request in the simple case is only to
985 989 * insert the 'request' in the list of active locks if it is not an
986 990 * UNLOCK.
987 991 * We have to consider the existing active locks' relation to
988 992 * this 'request' if they are owned by same process. flk_relation() does
989 993 * this job and sees to that the dependency graph information is maintained
990 994 * properly.
991 995 */
992 996
993 997 int
994 998 flk_execute_request(lock_descriptor_t *request)
995 999 {
996 1000 graph_t *gp = request->l_graph;
997 1001 vnode_t *vp = request->l_vnode;
998 1002 lock_descriptor_t *lock, *lock1;
999 1003 int done_searching = 0;
1000 1004
1001 1005 CHECK_SLEEPING_LOCKS(gp);
1002 1006 CHECK_ACTIVE_LOCKS(gp);
1003 1007
1004 1008 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1005 1009
1006 1010 flk_set_state(request, FLK_START_STATE);
1007 1011
1008 1012 ASSERT(NOT_BLOCKED(request));
1009 1013
1010 1014 /* IO_LOCK requests are only to check status */
1011 1015
1012 1016 if (IS_IO_LOCK(request))
1013 1017 return (0);
1014 1018
1015 1019 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1016 1020
1017 1021 if (lock == NULL && request->l_type == F_UNLCK)
1018 1022 return (0);
1019 1023 if (lock == NULL) {
1020 1024 flk_insert_active_lock(request);
1021 1025 return (0);
1022 1026 }
1023 1027
1024 1028 do {
1025 1029 lock1 = lock->l_next;
1026 1030 if (SAME_OWNER(request, lock)) {
1027 1031 done_searching = flk_relation(lock, request);
1028 1032 }
1029 1033 lock = lock1;
1030 1034 } while (lock->l_vnode == vp && !done_searching);
1031 1035
1032 1036 /*
1033 1037 * insert in active queue
1034 1038 */
1035 1039
1036 1040 if (request->l_type != F_UNLCK)
1037 1041 flk_insert_active_lock(request);
1038 1042
1039 1043 return (0);
1040 1044 }
1041 1045
1042 1046 /*
1043 1047 * 'request' is blocked by some one therefore we put it into sleep queue.
1044 1048 */
1045 1049 static int
1046 1050 flk_wait_execute_request(lock_descriptor_t *request)
1047 1051 {
1048 1052 graph_t *gp = request->l_graph;
1049 1053 callb_cpr_t *cprp; /* CPR info from callback */
1050 1054 struct flock_globals *fg;
1051 1055 int index;
1052 1056
1053 1057 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1054 1058 ASSERT(IS_WILLING_TO_SLEEP(request));
1055 1059
1056 1060 flk_insert_sleeping_lock(request);
1057 1061
1058 1062 if (IS_LOCKMGR(request)) {
1059 1063 index = HASH_INDEX(request->l_vnode);
1060 1064 fg = flk_get_globals();
1061 1065
1062 1066 if (nlm_status_size == 0) { /* not booted as a cluster */
1063 1067 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
1064 1068 flk_cancel_sleeping_lock(request, 1);
1065 1069 return (ENOLCK);
1066 1070 }
1067 1071 } else { /* booted as a cluster */
1068 1072 /*
1069 1073 * If the request is an NLM server lock request,
1070 1074 * and the NLM state of the lock request is not
1071 1075 * NLM_UP (because the NLM server is shutting
1072 1076 * down), then cancel the sleeping lock and
1073 1077 * return error ENOLCK that will encourage the
1074 1078 * client to retransmit.
1075 1079 */
1076 1080 if (!IS_NLM_UP(request)) {
1077 1081 flk_cancel_sleeping_lock(request, 1);
1078 1082 return (ENOLCK);
1079 1083 }
1080 1084 }
1081 1085 }
1082 1086
1083 1087 /* Clustering: For blocking PXFS locks, return */
1084 1088 if (IS_PXFS(request)) {
1085 1089 /*
1086 1090 * PXFS locks sleep on the client side.
1087 1091 * The callback argument is used to wake up the sleeper
1088 1092 * when the lock is granted.
1089 1093 * We return -1 (rather than an errno value) to indicate
1090 1094 * the client side should sleep
1091 1095 */
1092 1096 return (PXFS_LOCK_BLOCKED);
1093 1097 }
1094 1098
1095 1099 if (request->l_callbacks != NULL) {
1096 1100 /*
1097 1101 * To make sure the shutdown code works correctly, either
1098 1102 * the callback must happen after putting the lock on the
1099 1103 * sleep list, or we must check the shutdown status after
1100 1104 * returning from the callback (and before sleeping). At
1101 1105 * least for now, we'll use the first option. If a
1102 1106 * shutdown or signal or whatever happened while the graph
1103 1107 * mutex was dropped, that will be detected by
1104 1108 * wait_for_lock().
1105 1109 */
1106 1110 mutex_exit(&gp->gp_mutex);
1107 1111
1108 1112 cprp = flk_invoke_callbacks(request->l_callbacks,
1109 1113 FLK_BEFORE_SLEEP);
1110 1114
1111 1115 mutex_enter(&gp->gp_mutex);
1112 1116
1113 1117 if (cprp == NULL) {
1114 1118 wait_for_lock(request);
1115 1119 } else {
1116 1120 mutex_enter(cprp->cc_lockp);
1117 1121 CALLB_CPR_SAFE_BEGIN(cprp);
1118 1122 mutex_exit(cprp->cc_lockp);
1119 1123 wait_for_lock(request);
1120 1124 mutex_enter(cprp->cc_lockp);
1121 1125 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
1122 1126 mutex_exit(cprp->cc_lockp);
1123 1127 }
1124 1128
1125 1129 mutex_exit(&gp->gp_mutex);
1126 1130 (void) flk_invoke_callbacks(request->l_callbacks,
1127 1131 FLK_AFTER_SLEEP);
1128 1132 mutex_enter(&gp->gp_mutex);
1129 1133 } else {
1130 1134 wait_for_lock(request);
1131 1135 }
1132 1136
1133 1137 if (IS_LOCKMGR(request)) {
1134 1138 /*
1135 1139 * If the lock manager is shutting down, return an
1136 1140 * error that will encourage the client to retransmit.
1137 1141 */
1138 1142 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
1139 1143 !IS_GRANTED(request)) {
1140 1144 flk_cancel_sleeping_lock(request, 1);
1141 1145 return (ENOLCK);
1142 1146 }
1143 1147 }
1144 1148
1145 1149 if (IS_INTERRUPTED(request)) {
1146 1150 /* we got a signal, or act like we did */
1147 1151 flk_cancel_sleeping_lock(request, 1);
1148 1152 return (EINTR);
1149 1153 }
1150 1154
1151 1155 /* Cancelled if some other thread has closed the file */
1152 1156
1153 1157 if (IS_CANCELLED(request)) {
1154 1158 flk_cancel_sleeping_lock(request, 1);
1155 1159 return (EBADF);
1156 1160 }
1157 1161
1158 1162 request->l_state &= ~GRANTED_LOCK;
1159 1163 REMOVE_SLEEP_QUEUE(request);
1160 1164 return (flk_execute_request(request));
1161 1165 }
1162 1166
1163 1167 /*
1164 1168 * This routine adds an edge between from and to because from depends
1165 1169 * to. If asked to check for deadlock it checks whether there are any
1166 1170 * reachable locks from "from_lock" that is owned by the same process
1167 1171 * as "from_lock".
1168 1172 * NOTE: It is the caller's responsibility to make sure that the color
1169 1173 * of the graph is consistent between the calls to flk_add_edge as done
1170 1174 * in flk_process_request. This routine does not color and check for
1171 1175 * deadlock explicitly.
1172 1176 */
1173 1177
1174 1178 static int
1175 1179 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
1176 1180 int check_cycle, int update_graph)
1177 1181 {
1178 1182 edge_t *edge;
1179 1183 edge_t *ep;
1180 1184 lock_descriptor_t *vertex;
1181 1185 lock_descriptor_t *vertex_stack;
1182 1186
1183 1187 STACK_INIT(vertex_stack);
1184 1188
1185 1189 /*
1186 1190 * if to vertex already has mark_color just return
1187 1191 * don't add an edge as it is reachable from from vertex
1188 1192 * before itself.
1189 1193 */
1190 1194
1191 1195 if (COLORED(to_lock))
1192 1196 return (0);
1193 1197
1194 1198 edge = flk_get_edge();
1195 1199
1196 1200 /*
1197 1201 * set the from and to vertex
1198 1202 */
1199 1203
1200 1204 edge->from_vertex = from_lock;
1201 1205 edge->to_vertex = to_lock;
1202 1206
1203 1207 /*
1204 1208 * put in adjacency list of from vertex
1205 1209 */
1206 1210
1207 1211 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
1208 1212 edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
1209 1213 edge->edge_adj_prev = &from_lock->l_edge;
1210 1214 from_lock->l_edge.edge_adj_next = edge;
1211 1215
1212 1216 /*
1213 1217 * put in in list of to vertex
1214 1218 */
1215 1219
1216 1220 to_lock->l_edge.edge_in_next->edge_in_prev = edge;
1217 1221 edge->edge_in_next = to_lock->l_edge.edge_in_next;
1218 1222 to_lock->l_edge.edge_in_next = edge;
1219 1223 edge->edge_in_prev = &to_lock->l_edge;
1220 1224
1221 1225
1222 1226 if (update_graph) {
1223 1227 flk_update_proc_graph(edge, 0);
1224 1228 return (0);
1225 1229 }
1226 1230 if (!check_cycle) {
1227 1231 return (0);
1228 1232 }
1229 1233
1230 1234 STACK_PUSH(vertex_stack, from_lock, l_stack);
1231 1235
1232 1236 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1233 1237
1234 1238 STACK_POP(vertex_stack, l_stack);
1235 1239
1236 1240 for (ep = FIRST_ADJ(vertex);
1237 1241 ep != HEAD(vertex);
1238 1242 ep = NEXT_ADJ(ep)) {
1239 1243 if (COLORED(ep->to_vertex))
1240 1244 continue;
1241 1245 COLOR(ep->to_vertex);
1242 1246 if (SAME_OWNER(ep->to_vertex, from_lock))
1243 1247 goto dead_lock;
1244 1248 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1245 1249 }
1246 1250 }
1247 1251 return (0);
1248 1252
1249 1253 dead_lock:
1250 1254
1251 1255 /*
1252 1256 * remove all edges
1253 1257 */
1254 1258
1255 1259 ep = FIRST_ADJ(from_lock);
1256 1260
1257 1261 while (ep != HEAD(from_lock)) {
1258 1262 IN_LIST_REMOVE(ep);
1259 1263 from_lock->l_sedge = NEXT_ADJ(ep);
1260 1264 ADJ_LIST_REMOVE(ep);
1261 1265 flk_free_edge(ep);
1262 1266 ep = from_lock->l_sedge;
1263 1267 }
1264 1268 return (EDEADLK);
1265 1269 }
1266 1270
1267 1271 /*
1268 1272 * Get an edge structure for representing the dependency between two locks.
1269 1273 */
1270 1274
1271 1275 static edge_t *
1272 1276 flk_get_edge()
1273 1277 {
1274 1278 edge_t *ep;
1275 1279
1276 1280 ASSERT(flk_edge_cache != NULL);
1277 1281
1278 1282 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
1279 1283 edge_allocs++;
1280 1284 return (ep);
1281 1285 }
1282 1286
1283 1287 /*
1284 1288 * Free the edge structure.
1285 1289 */
1286 1290
1287 1291 static void
1288 1292 flk_free_edge(edge_t *ep)
1289 1293 {
1290 1294 edge_frees++;
1291 1295 kmem_cache_free(flk_edge_cache, (void *)ep);
1292 1296 }
1293 1297
1294 1298 /*
1295 1299 * Check the relationship of request with lock and perform the
1296 1300 * recomputation of dependencies, break lock if required, and return
1297 1301 * 1 if request cannot have any more relationship with the next
1298 1302 * active locks.
1299 1303 * The 'lock' and 'request' are compared and in case of overlap we
1300 1304 * delete the 'lock' and form new locks to represent the non-overlapped
1301 1305 * portion of original 'lock'. This function has side effects such as
1302 1306 * 'lock' will be freed, new locks will be added to the active list.
1303 1307 */
1304 1308
1305 1309 static int
1306 1310 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
1307 1311 {
1308 1312 int lock_effect;
1309 1313 lock_descriptor_t *lock1, *lock2;
1310 1314 lock_descriptor_t *topology[3];
1311 1315 int nvertex = 0;
1312 1316 int i;
1313 1317 edge_t *ep;
1314 1318 graph_t *gp = (lock->l_graph);
1315 1319
1316 1320
1317 1321 CHECK_SLEEPING_LOCKS(gp);
1318 1322 CHECK_ACTIVE_LOCKS(gp);
1319 1323
1320 1324 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1321 1325
1322 1326 topology[0] = topology[1] = topology[2] = NULL;
1323 1327
1324 1328 if (request->l_type == F_UNLCK)
1325 1329 lock_effect = FLK_UNLOCK;
1326 1330 else if (request->l_type == F_RDLCK &&
1327 1331 lock->l_type == F_WRLCK)
1328 1332 lock_effect = FLK_DOWNGRADE;
1329 1333 else if (request->l_type == F_WRLCK &&
1330 1334 lock->l_type == F_RDLCK)
1331 1335 lock_effect = FLK_UPGRADE;
1332 1336 else
1333 1337 lock_effect = FLK_STAY_SAME;
1334 1338
1335 1339 if (lock->l_end < request->l_start) {
1336 1340 if (lock->l_end == request->l_start - 1 &&
1337 1341 lock_effect == FLK_STAY_SAME) {
1338 1342 topology[0] = request;
1339 1343 request->l_start = lock->l_start;
1340 1344 nvertex = 1;
1341 1345 goto recompute;
1342 1346 } else {
1343 1347 return (0);
1344 1348 }
1345 1349 }
1346 1350
1347 1351 if (lock->l_start > request->l_end) {
1348 1352 if (request->l_end == lock->l_start - 1 &&
1349 1353 lock_effect == FLK_STAY_SAME) {
1350 1354 topology[0] = request;
1351 1355 request->l_end = lock->l_end;
1352 1356 nvertex = 1;
1353 1357 goto recompute;
1354 1358 } else {
1355 1359 return (1);
1356 1360 }
1357 1361 }
1358 1362
1359 1363 if (request->l_end < lock->l_end) {
1360 1364 if (request->l_start > lock->l_start) {
1361 1365 if (lock_effect == FLK_STAY_SAME) {
1362 1366 request->l_start = lock->l_start;
1363 1367 request->l_end = lock->l_end;
1364 1368 topology[0] = request;
1365 1369 nvertex = 1;
1366 1370 } else {
1367 1371 lock1 = flk_get_lock();
1368 1372 lock2 = flk_get_lock();
1369 1373 COPY(lock1, lock);
1370 1374 COPY(lock2, lock);
1371 1375 lock1->l_start = lock->l_start;
1372 1376 lock1->l_end = request->l_start - 1;
1373 1377 lock2->l_start = request->l_end + 1;
1374 1378 lock2->l_end = lock->l_end;
1375 1379 topology[0] = lock1;
1376 1380 topology[1] = lock2;
1377 1381 topology[2] = request;
1378 1382 nvertex = 3;
1379 1383 }
1380 1384 } else if (request->l_start < lock->l_start) {
1381 1385 if (lock_effect == FLK_STAY_SAME) {
1382 1386 request->l_end = lock->l_end;
1383 1387 topology[0] = request;
1384 1388 nvertex = 1;
1385 1389 } else {
1386 1390 lock1 = flk_get_lock();
1387 1391 COPY(lock1, lock);
1388 1392 lock1->l_start = request->l_end + 1;
1389 1393 topology[0] = lock1;
1390 1394 topology[1] = request;
1391 1395 nvertex = 2;
1392 1396 }
1393 1397 } else {
1394 1398 if (lock_effect == FLK_STAY_SAME) {
1395 1399 request->l_start = lock->l_start;
1396 1400 request->l_end = lock->l_end;
1397 1401 topology[0] = request;
1398 1402 nvertex = 1;
1399 1403 } else {
1400 1404 lock1 = flk_get_lock();
1401 1405 COPY(lock1, lock);
1402 1406 lock1->l_start = request->l_end + 1;
1403 1407 topology[0] = lock1;
1404 1408 topology[1] = request;
1405 1409 nvertex = 2;
1406 1410 }
1407 1411 }
1408 1412 } else if (request->l_end > lock->l_end) {
1409 1413 if (request->l_start > lock->l_start) {
1410 1414 if (lock_effect == FLK_STAY_SAME) {
1411 1415 request->l_start = lock->l_start;
1412 1416 topology[0] = request;
1413 1417 nvertex = 1;
1414 1418 } else {
1415 1419 lock1 = flk_get_lock();
1416 1420 COPY(lock1, lock);
1417 1421 lock1->l_end = request->l_start - 1;
1418 1422 topology[0] = lock1;
1419 1423 topology[1] = request;
1420 1424 nvertex = 2;
1421 1425 }
1422 1426 } else if (request->l_start < lock->l_start) {
1423 1427 topology[0] = request;
1424 1428 nvertex = 1;
1425 1429 } else {
1426 1430 topology[0] = request;
1427 1431 nvertex = 1;
1428 1432 }
1429 1433 } else {
1430 1434 if (request->l_start > lock->l_start) {
1431 1435 if (lock_effect == FLK_STAY_SAME) {
1432 1436 request->l_start = lock->l_start;
1433 1437 topology[0] = request;
1434 1438 nvertex = 1;
1435 1439 } else {
1436 1440 lock1 = flk_get_lock();
1437 1441 COPY(lock1, lock);
1438 1442 lock1->l_end = request->l_start - 1;
1439 1443 topology[0] = lock1;
1440 1444 topology[1] = request;
1441 1445 nvertex = 2;
1442 1446 }
1443 1447 } else if (request->l_start < lock->l_start) {
1444 1448 topology[0] = request;
1445 1449 nvertex = 1;
1446 1450 } else {
1447 1451 if (lock_effect != FLK_UNLOCK) {
1448 1452 topology[0] = request;
1449 1453 nvertex = 1;
1450 1454 } else {
1451 1455 flk_delete_active_lock(lock, 0);
1452 1456 flk_wakeup(lock, 1);
1453 1457 flk_free_lock(lock);
1454 1458 CHECK_SLEEPING_LOCKS(gp);
1455 1459 CHECK_ACTIVE_LOCKS(gp);
1456 1460 return (1);
1457 1461 }
1458 1462 }
1459 1463 }
1460 1464
1461 1465 recompute:
1462 1466
1463 1467 /*
1464 1468 * For unlock we don't send the 'request' to for recomputing
1465 1469 * dependencies because no lock will add an edge to this.
1466 1470 */
1467 1471
1468 1472 if (lock_effect == FLK_UNLOCK) {
1469 1473 topology[nvertex-1] = NULL;
1470 1474 nvertex--;
1471 1475 }
1472 1476 for (i = 0; i < nvertex; i++) {
1473 1477 topology[i]->l_state |= RECOMPUTE_LOCK;
1474 1478 topology[i]->l_color = NO_COLOR;
1475 1479 }
1476 1480
1477 1481 ASSERT(FIRST_ADJ(lock) == HEAD(lock));
1478 1482
1479 1483 /*
1480 1484 * we remove the adjacent edges for all vertices' to this vertex
1481 1485 * 'lock'.
1482 1486 */
1483 1487
1484 1488 ep = FIRST_IN(lock);
1485 1489 while (ep != HEAD(lock)) {
1486 1490 ADJ_LIST_REMOVE(ep);
1487 1491 ep = NEXT_IN(ep);
1488 1492 }
1489 1493
1490 1494 flk_delete_active_lock(lock, 0);
1491 1495
1492 1496 /* We are ready for recomputing the dependencies now */
1493 1497
1494 1498 flk_recompute_dependencies(lock, topology, nvertex, 1);
1495 1499
1496 1500 for (i = 0; i < nvertex; i++) {
1497 1501 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1498 1502 topology[i]->l_color = NO_COLOR;
1499 1503 }
1500 1504
1501 1505
1502 1506 if (lock_effect == FLK_UNLOCK) {
1503 1507 nvertex++;
1504 1508 }
1505 1509 for (i = 0; i < nvertex - 1; i++) {
1506 1510 flk_insert_active_lock(topology[i]);
1507 1511 }
1508 1512
1509 1513
1510 1514 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
1511 1515 flk_wakeup(lock, 0);
1512 1516 } else {
1513 1517 ep = FIRST_IN(lock);
1514 1518 while (ep != HEAD(lock)) {
1515 1519 lock->l_sedge = NEXT_IN(ep);
1516 1520 IN_LIST_REMOVE(ep);
1517 1521 flk_update_proc_graph(ep, 1);
1518 1522 flk_free_edge(ep);
1519 1523 ep = lock->l_sedge;
1520 1524 }
1521 1525 }
1522 1526 flk_free_lock(lock);
1523 1527
1524 1528 CHECK_SLEEPING_LOCKS(gp);
1525 1529 CHECK_ACTIVE_LOCKS(gp);
1526 1530 return (0);
1527 1531 }
1528 1532
1529 1533 /*
1530 1534 * Insert a lock into the active queue.
1531 1535 */
1532 1536
1533 1537 static void
1534 1538 flk_insert_active_lock(lock_descriptor_t *new_lock)
1535 1539 {
1536 1540 graph_t *gp = new_lock->l_graph;
1537 1541 vnode_t *vp = new_lock->l_vnode;
1538 1542 lock_descriptor_t *first_lock, *lock;
1539 1543
1540 1544 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1541 1545
1542 1546 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1543 1547 first_lock = lock;
1544 1548
1545 1549 if (first_lock != NULL) {
1546 1550 for (; (lock->l_vnode == vp &&
1547 1551 lock->l_start < new_lock->l_start); lock = lock->l_next)
1548 1552 ;
1549 1553 } else {
1550 1554 lock = ACTIVE_HEAD(gp);
1551 1555 }
1552 1556
1553 1557 lock->l_prev->l_next = new_lock;
1554 1558 new_lock->l_next = lock;
1555 1559 new_lock->l_prev = lock->l_prev;
1556 1560 lock->l_prev = new_lock;
1557 1561
1558 1562 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
1559 1563 vp->v_filocks = (struct filock *)new_lock;
1560 1564 }
1561 1565 flk_set_state(new_lock, FLK_ACTIVE_STATE);
1562 1566 new_lock->l_state |= ACTIVE_LOCK;
1563 1567
1564 1568 CHECK_ACTIVE_LOCKS(gp);
1565 1569 CHECK_SLEEPING_LOCKS(gp);
1566 1570 }
1567 1571
1568 1572 /*
1569 1573 * Delete the active lock : Performs two functions depending on the
1570 1574 * value of second parameter. One is to remove from the active lists
1571 1575 * only and other is to both remove and free the lock.
1572 1576 */
1573 1577
1574 1578 static void
1575 1579 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
1576 1580 {
1577 1581 vnode_t *vp = lock->l_vnode;
1578 1582 graph_t *gp = lock->l_graph;
1579 1583
1580 1584 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1581 1585 if (free_lock)
1582 1586 ASSERT(NO_DEPENDENTS(lock));
1583 1587 ASSERT(NOT_BLOCKED(lock));
1584 1588 ASSERT(IS_ACTIVE(lock));
1585 1589
1586 1590 ASSERT((vp->v_filocks != NULL));
1587 1591
1588 1592 if (vp->v_filocks == (struct filock *)lock) {
1589 1593 vp->v_filocks = (struct filock *)
1590 1594 ((lock->l_next->l_vnode == vp) ? lock->l_next :
1591 1595 NULL);
1592 1596 }
1593 1597 lock->l_next->l_prev = lock->l_prev;
1594 1598 lock->l_prev->l_next = lock->l_next;
1595 1599 lock->l_next = lock->l_prev = NULL;
1596 1600 flk_set_state(lock, FLK_DEAD_STATE);
1597 1601 lock->l_state &= ~ACTIVE_LOCK;
1598 1602
1599 1603 if (free_lock)
1600 1604 flk_free_lock(lock);
1601 1605 CHECK_ACTIVE_LOCKS(gp);
1602 1606 CHECK_SLEEPING_LOCKS(gp);
1603 1607 }
1604 1608
1605 1609 /*
1606 1610 * Insert into the sleep queue.
1607 1611 */
1608 1612
1609 1613 static void
1610 1614 flk_insert_sleeping_lock(lock_descriptor_t *request)
1611 1615 {
1612 1616 graph_t *gp = request->l_graph;
1613 1617 vnode_t *vp = request->l_vnode;
1614 1618 lock_descriptor_t *lock;
1615 1619
1616 1620 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1617 1621 ASSERT(IS_INITIAL(request));
1618 1622
1619 1623 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
1620 1624 lock->l_vnode < vp); lock = lock->l_next)
1621 1625 ;
1622 1626
1623 1627 lock->l_prev->l_next = request;
1624 1628 request->l_prev = lock->l_prev;
1625 1629 lock->l_prev = request;
1626 1630 request->l_next = lock;
1627 1631 flk_set_state(request, FLK_SLEEPING_STATE);
1628 1632 request->l_state |= SLEEPING_LOCK;
1629 1633 }
1630 1634
1631 1635 /*
1632 1636 * Cancelling a sleeping lock implies removing a vertex from the
1633 1637 * dependency graph and therefore we should recompute the dependencies
1634 1638 * of all vertices that have a path to this vertex, w.r.t. all
1635 1639 * vertices reachable from this vertex.
1636 1640 */
1637 1641
1638 1642 void
1639 1643 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
1640 1644 {
1641 1645 graph_t *gp = request->l_graph;
1642 1646 vnode_t *vp = request->l_vnode;
1643 1647 lock_descriptor_t **topology = NULL;
1644 1648 edge_t *ep;
1645 1649 lock_descriptor_t *vertex, *lock;
1646 1650 int nvertex = 0;
1647 1651 int i;
1648 1652 lock_descriptor_t *vertex_stack;
1649 1653
1650 1654 STACK_INIT(vertex_stack);
1651 1655
1652 1656 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1653 1657 /*
1654 1658 * count number of vertex pointers that has to be allocated
1655 1659 * All vertices that are reachable from request.
1656 1660 */
1657 1661
1658 1662 STACK_PUSH(vertex_stack, request, l_stack);
1659 1663
1660 1664 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1661 1665 STACK_POP(vertex_stack, l_stack);
1662 1666 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
1663 1667 ep = NEXT_ADJ(ep)) {
1664 1668 if (IS_RECOMPUTE(ep->to_vertex))
1665 1669 continue;
1666 1670 ep->to_vertex->l_state |= RECOMPUTE_LOCK;
1667 1671 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1668 1672 nvertex++;
1669 1673 }
1670 1674 }
1671 1675
1672 1676 /*
1673 1677 * allocate memory for holding the vertex pointers
1674 1678 */
1675 1679
1676 1680 if (nvertex) {
1677 1681 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
1678 1682 KM_SLEEP);
1679 1683 }
1680 1684
1681 1685 /*
1682 1686 * one more pass to actually store the vertices in the
1683 1687 * allocated array.
1684 1688 * We first check sleeping locks and then active locks
1685 1689 * so that topology array will be in a topological
1686 1690 * order.
1687 1691 */
1688 1692
1689 1693 nvertex = 0;
1690 1694 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1691 1695
1692 1696 if (lock) {
1693 1697 do {
1694 1698 if (IS_RECOMPUTE(lock)) {
1695 1699 lock->l_index = nvertex;
1696 1700 topology[nvertex++] = lock;
1697 1701 }
1698 1702 lock->l_color = NO_COLOR;
1699 1703 lock = lock->l_next;
1700 1704 } while (lock->l_vnode == vp);
1701 1705 }
1702 1706
1703 1707 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1704 1708
1705 1709 if (lock) {
1706 1710 do {
1707 1711 if (IS_RECOMPUTE(lock)) {
1708 1712 lock->l_index = nvertex;
1709 1713 topology[nvertex++] = lock;
1710 1714 }
1711 1715 lock->l_color = NO_COLOR;
1712 1716 lock = lock->l_next;
1713 1717 } while (lock->l_vnode == vp);
1714 1718 }
1715 1719
1716 1720 /*
1717 1721 * remove in and out edges of request
1718 1722 * They are freed after updating proc_graph below.
1719 1723 */
1720 1724
1721 1725 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
1722 1726 ADJ_LIST_REMOVE(ep);
1723 1727 }
1724 1728
1725 1729
1726 1730 if (remove_from_queue)
1727 1731 REMOVE_SLEEP_QUEUE(request);
1728 1732
1729 1733 /* we are ready to recompute */
1730 1734
1731 1735 flk_recompute_dependencies(request, topology, nvertex, 1);
1732 1736
1733 1737 ep = FIRST_ADJ(request);
1734 1738 while (ep != HEAD(request)) {
1735 1739 IN_LIST_REMOVE(ep);
1736 1740 request->l_sedge = NEXT_ADJ(ep);
1737 1741 ADJ_LIST_REMOVE(ep);
1738 1742 flk_update_proc_graph(ep, 1);
1739 1743 flk_free_edge(ep);
1740 1744 ep = request->l_sedge;
1741 1745 }
1742 1746
1743 1747
1744 1748 /*
1745 1749 * unset the RECOMPUTE flag in those vertices
1746 1750 */
1747 1751
1748 1752 for (i = 0; i < nvertex; i++) {
1749 1753 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1750 1754 }
1751 1755
1752 1756 /*
1753 1757 * free the topology
1754 1758 */
1755 1759 if (nvertex)
1756 1760 kmem_free((void *)topology,
1757 1761 (nvertex * sizeof (lock_descriptor_t *)));
1758 1762 /*
1759 1763 * Possibility of some locks unblocked now
1760 1764 */
1761 1765
1762 1766 flk_wakeup(request, 0);
1763 1767
1764 1768 /*
1765 1769 * we expect to have a correctly recomputed graph now.
1766 1770 */
1767 1771 flk_set_state(request, FLK_DEAD_STATE);
1768 1772 flk_free_lock(request);
1769 1773 CHECK_SLEEPING_LOCKS(gp);
1770 1774 CHECK_ACTIVE_LOCKS(gp);
1771 1775
1772 1776 }
1773 1777
1774 1778 /*
1775 1779 * Uncoloring the graph is simply to increment the mark value of the graph
1776 1780 * And only when wrap round takes place will we color all vertices in
1777 1781 * the graph explicitly.
1778 1782 */
1779 1783
1780 1784 static void
1781 1785 flk_graph_uncolor(graph_t *gp)
1782 1786 {
1783 1787 lock_descriptor_t *lock;
1784 1788
1785 1789 if (gp->mark == UINT_MAX) {
1786 1790 gp->mark = 1;
1787 1791 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
1788 1792 lock = lock->l_next)
1789 1793 lock->l_color = 0;
1790 1794
1791 1795 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp);
1792 1796 lock = lock->l_next)
1793 1797 lock->l_color = 0;
1794 1798 } else {
1795 1799 gp->mark++;
1796 1800 }
1797 1801 }
1798 1802
1799 1803 /*
1800 1804 * Wake up locks that are blocked on the given lock.
1801 1805 */
1802 1806
1803 1807 static void
1804 1808 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
1805 1809 {
1806 1810 edge_t *ep;
1807 1811 graph_t *gp = lock->l_graph;
1808 1812 lock_descriptor_t *lck;
1809 1813
1810 1814 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1811 1815 if (NO_DEPENDENTS(lock))
1812 1816 return;
1813 1817 ep = FIRST_IN(lock);
1814 1818 do {
1815 1819 /*
1816 1820 * delete the edge from the adjacency list
1817 1821 * of from vertex. if no more adjacent edges
1818 1822 * for this vertex wake this process.
1819 1823 */
1820 1824 lck = ep->from_vertex;
1821 1825 if (adj_list_remove)
1822 1826 ADJ_LIST_REMOVE(ep);
1823 1827 flk_update_proc_graph(ep, 1);
1824 1828 if (NOT_BLOCKED(lck)) {
1825 1829 GRANT_WAKEUP(lck);
1826 1830 }
1827 1831 lock->l_sedge = NEXT_IN(ep);
1828 1832 IN_LIST_REMOVE(ep);
1829 1833 flk_free_edge(ep);
1830 1834 ep = lock->l_sedge;
1831 1835 } while (ep != HEAD(lock));
1832 1836 ASSERT(NO_DEPENDENTS(lock));
1833 1837 }
1834 1838
1835 1839 /*
1836 1840 * The dependents of request, is checked for its dependency against the
1837 1841 * locks in topology (called topology because the array is and should be in
1838 1842 * topological order for this algorithm, if not in topological order the
1839 1843 * inner loop below might add more edges than necessary. Topological ordering
1840 1844 * of vertices satisfies the property that all edges will be from left to
1841 1845 * right i.e., topology[i] can have an edge to topology[j], iff i<j)
1842 1846 * If lock l1 in the dependent set of request is dependent (blocked by)
1843 1847 * on lock l2 in topology but does not have a path to it, we add an edge
1844 1848 * in the inner loop below.
1845 1849 *
1846 1850 * We don't want to add an edge between l1 and l2 if there exists
1847 1851 * already a path from l1 to l2, so care has to be taken for those vertices
1848 1852 * that have two paths to 'request'. These vertices are referred to here
1849 1853 * as barrier locks.
1850 1854 *
1851 1855 * The barriers has to be found (those vertex that originally had two paths
1852 1856 * to request) because otherwise we may end up adding edges unnecessarily
1853 1857 * to vertices in topology, and thus barrier vertices can have an edge
1854 1858 * to a vertex in topology as well a path to it.
1855 1859 */
1856 1860
1857 1861 static void
1858 1862 flk_recompute_dependencies(lock_descriptor_t *request,
1859 1863 lock_descriptor_t **topology,
1860 1864 int nvertex, int update_graph)
1861 1865 {
1862 1866 lock_descriptor_t *vertex, *lock;
1863 1867 graph_t *gp = request->l_graph;
1864 1868 int i, count;
1865 1869 int barrier_found = 0;
1866 1870 edge_t *ep;
1867 1871 lock_descriptor_t *vertex_stack;
1868 1872
1869 1873 STACK_INIT(vertex_stack);
1870 1874
1871 1875 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1872 1876 if (nvertex == 0)
1873 1877 return;
1874 1878 flk_graph_uncolor(request->l_graph);
1875 1879 barrier_found = flk_find_barriers(request);
1876 1880 request->l_state |= RECOMPUTE_DONE;
1877 1881
1878 1882 STACK_PUSH(vertex_stack, request, l_stack);
1879 1883 request->l_sedge = FIRST_IN(request);
1880 1884
1881 1885
1882 1886 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1883 1887 if (vertex->l_state & RECOMPUTE_DONE) {
1884 1888 count = 0;
1885 1889 goto next_in_edge;
1886 1890 }
1887 1891 if (IS_BARRIER(vertex)) {
1888 1892 /* decrement the barrier count */
1889 1893 if (vertex->l_index) {
1890 1894 vertex->l_index--;
1891 1895 /* this guy will be pushed again anyway ? */
1892 1896 STACK_POP(vertex_stack, l_stack);
1893 1897 if (vertex->l_index == 0) {
1894 1898 /*
1895 1899 * barrier is over we can recompute
1896 1900 * dependencies for this lock in the
1897 1901 * next stack pop
1898 1902 */
1899 1903 vertex->l_state &= ~BARRIER_LOCK;
1900 1904 }
1901 1905 continue;
1902 1906 }
1903 1907 }
1904 1908 vertex->l_state |= RECOMPUTE_DONE;
1905 1909 flk_graph_uncolor(gp);
1906 1910 count = flk_color_reachables(vertex);
1907 1911 for (i = 0; i < nvertex; i++) {
1908 1912 lock = topology[i];
1909 1913 if (COLORED(lock))
1910 1914 continue;
1911 1915 if (BLOCKS(lock, vertex)) {
1912 1916 (void) flk_add_edge(vertex, lock,
1913 1917 NO_CHECK_CYCLE, update_graph);
1914 1918 COLOR(lock);
1915 1919 count++;
1916 1920 count += flk_color_reachables(lock);
1917 1921 }
1918 1922
1919 1923 }
1920 1924
1921 1925 next_in_edge:
1922 1926 if (count == nvertex ||
1923 1927 vertex->l_sedge == HEAD(vertex)) {
1924 1928 /* prune the tree below this */
1925 1929 STACK_POP(vertex_stack, l_stack);
1926 1930 vertex->l_state &= ~RECOMPUTE_DONE;
1927 1931 /* update the barrier locks below this! */
1928 1932 if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
1929 1933 flk_graph_uncolor(gp);
1930 1934 flk_update_barriers(vertex);
1931 1935 }
1932 1936 continue;
1933 1937 }
1934 1938
1935 1939 ep = vertex->l_sedge;
1936 1940 lock = ep->from_vertex;
1937 1941 STACK_PUSH(vertex_stack, lock, l_stack);
1938 1942 lock->l_sedge = FIRST_IN(lock);
1939 1943 vertex->l_sedge = NEXT_IN(ep);
1940 1944 }
1941 1945
1942 1946 }
1943 1947
1944 1948 /*
1945 1949 * Color all reachable vertices from vertex that belongs to topology (here
1946 1950 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored.
1947 1951 *
1948 1952 * Note: we need to use a different stack_link l_stack1 because this is
1949 1953 * called from flk_recompute_dependencies() that already uses a stack with
1950 1954 * l_stack as stack_link.
1951 1955 */
1952 1956
1953 1957 static int
1954 1958 flk_color_reachables(lock_descriptor_t *vertex)
1955 1959 {
1956 1960 lock_descriptor_t *ver, *lock;
1957 1961 int count;
1958 1962 edge_t *ep;
1959 1963 lock_descriptor_t *vertex_stack;
1960 1964
1961 1965 STACK_INIT(vertex_stack);
1962 1966
1963 1967 STACK_PUSH(vertex_stack, vertex, l_stack1);
1964 1968 count = 0;
1965 1969 while ((ver = STACK_TOP(vertex_stack)) != NULL) {
1966 1970
1967 1971 STACK_POP(vertex_stack, l_stack1);
1968 1972 for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
1969 1973 ep = NEXT_ADJ(ep)) {
1970 1974 lock = ep->to_vertex;
1971 1975 if (COLORED(lock))
1972 1976 continue;
1973 1977 COLOR(lock);
1974 1978 if (IS_RECOMPUTE(lock))
1975 1979 count++;
1976 1980 STACK_PUSH(vertex_stack, lock, l_stack1);
1977 1981 }
1978 1982
1979 1983 }
1980 1984 return (count);
1981 1985 }
1982 1986
1983 1987 /*
1984 1988 * Called from flk_recompute_dependencies() this routine decrements
1985 1989 * the barrier count of barrier vertices that are reachable from lock.
1986 1990 */
1987 1991
1988 1992 static void
1989 1993 flk_update_barriers(lock_descriptor_t *lock)
1990 1994 {
1991 1995 lock_descriptor_t *vertex, *lck;
1992 1996 edge_t *ep;
1993 1997 lock_descriptor_t *vertex_stack;
1994 1998
1995 1999 STACK_INIT(vertex_stack);
1996 2000
1997 2001 STACK_PUSH(vertex_stack, lock, l_stack1);
1998 2002
1999 2003 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2000 2004 STACK_POP(vertex_stack, l_stack1);
2001 2005 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2002 2006 ep = NEXT_IN(ep)) {
2003 2007 lck = ep->from_vertex;
2004 2008 if (COLORED(lck)) {
2005 2009 if (IS_BARRIER(lck)) {
2006 2010 ASSERT(lck->l_index > 0);
2007 2011 lck->l_index--;
2008 2012 if (lck->l_index == 0)
2009 2013 lck->l_state &= ~BARRIER_LOCK;
2010 2014 }
2011 2015 continue;
2012 2016 }
2013 2017 COLOR(lck);
2014 2018 if (IS_BARRIER(lck)) {
2015 2019 ASSERT(lck->l_index > 0);
2016 2020 lck->l_index--;
2017 2021 if (lck->l_index == 0)
2018 2022 lck->l_state &= ~BARRIER_LOCK;
2019 2023 }
2020 2024 STACK_PUSH(vertex_stack, lck, l_stack1);
2021 2025 }
2022 2026 }
2023 2027 }
2024 2028
2025 2029 /*
2026 2030 * Finds all vertices that are reachable from 'lock' more than once and
2027 2031 * mark them as barrier vertices and increment their barrier count.
2028 2032 * The barrier count is one minus the total number of paths from lock
2029 2033 * to that vertex.
2030 2034 */
2031 2035
2032 2036 static int
2033 2037 flk_find_barriers(lock_descriptor_t *lock)
2034 2038 {
2035 2039 lock_descriptor_t *vertex, *lck;
2036 2040 int found = 0;
2037 2041 edge_t *ep;
2038 2042 lock_descriptor_t *vertex_stack;
2039 2043
2040 2044 STACK_INIT(vertex_stack);
2041 2045
2042 2046 STACK_PUSH(vertex_stack, lock, l_stack1);
2043 2047
2044 2048 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2045 2049 STACK_POP(vertex_stack, l_stack1);
2046 2050 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2047 2051 ep = NEXT_IN(ep)) {
2048 2052 lck = ep->from_vertex;
2049 2053 if (COLORED(lck)) {
2050 2054 /* this is a barrier */
2051 2055 lck->l_state |= BARRIER_LOCK;
2052 2056 /* index will have barrier count */
2053 2057 lck->l_index++;
2054 2058 if (!found)
2055 2059 found = 1;
2056 2060 continue;
2057 2061 }
2058 2062 COLOR(lck);
2059 2063 lck->l_index = 0;
2060 2064 STACK_PUSH(vertex_stack, lck, l_stack1);
2061 2065 }
2062 2066 }
2063 2067 return (found);
2064 2068 }
2065 2069
2066 2070 /*
2067 2071 * Finds the first lock that is mainly responsible for blocking this
2068 2072 * request. If there is no such lock, request->l_flock.l_type is set to
2069 2073 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars
2070 2074 * of the blocking lock.
2071 2075 *
2072 2076 * Note: It is possible a request is blocked by a sleeping lock because
2073 2077 * of the fairness policy used in flk_process_request() to construct the
2074 2078 * dependencies. (see comments before flk_process_request()).
2075 2079 */
2076 2080
2077 2081 static void
2078 2082 flk_get_first_blocking_lock(lock_descriptor_t *request)
2079 2083 {
2080 2084 graph_t *gp = request->l_graph;
2081 2085 vnode_t *vp = request->l_vnode;
2082 2086 lock_descriptor_t *lock, *blocker;
2083 2087
2084 2088 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2085 2089 blocker = NULL;
2086 2090 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2087 2091
2088 2092 if (lock) {
2089 2093 do {
2090 2094 if (BLOCKS(lock, request)) {
2091 2095 blocker = lock;
2092 2096 break;
2093 2097 }
2094 2098 lock = lock->l_next;
2095 2099 } while (lock->l_vnode == vp);
2096 2100 }
2097 2101
2098 2102 if (blocker) {
2099 2103 report_blocker(blocker, request);
2100 2104 } else
2101 2105 request->l_flock.l_type = F_UNLCK;
2102 2106 }
2103 2107
2104 2108 /*
2105 2109 * Get the graph_t structure associated with a vnode.
2106 2110 * If 'initialize' is non-zero, and the graph_t structure for this vnode has
2107 2111 * not yet been initialized, then a new element is allocated and returned.
2108 2112 */
2109 2113 graph_t *
2110 2114 flk_get_lock_graph(vnode_t *vp, int initialize)
2111 2115 {
2112 2116 graph_t *gp;
2113 2117 graph_t *gp_alloc = NULL;
2114 2118 int index = HASH_INDEX(vp);
2115 2119
2116 2120 if (initialize == FLK_USE_GRAPH) {
2117 2121 mutex_enter(&flock_lock);
2118 2122 gp = lock_graph[index];
2119 2123 mutex_exit(&flock_lock);
2120 2124 return (gp);
2121 2125 }
2122 2126
2123 2127 ASSERT(initialize == FLK_INIT_GRAPH);
2124 2128
2125 2129 if (lock_graph[index] == NULL) {
2126 2130
2127 2131 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
2128 2132
2129 2133 /* Initialize the graph */
2130 2134
2131 2135 gp_alloc->active_locks.l_next =
2132 2136 gp_alloc->active_locks.l_prev =
2133 2137 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
2134 2138 gp_alloc->sleeping_locks.l_next =
2135 2139 gp_alloc->sleeping_locks.l_prev =
2136 2140 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
2137 2141 gp_alloc->index = index;
2138 2142 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
2139 2143 }
2140 2144
2141 2145 mutex_enter(&flock_lock);
2142 2146
2143 2147 gp = lock_graph[index];
2144 2148
2145 2149 /* Recheck the value within flock_lock */
2146 2150 if (gp == NULL) {
2147 2151 struct flock_globals *fg;
2148 2152
2149 2153 /* We must have previously allocated the graph_t structure */
2150 2154 ASSERT(gp_alloc != NULL);
2151 2155 lock_graph[index] = gp = gp_alloc;
2152 2156 /*
2153 2157 * The lockmgr status is only needed if KLM is loaded.
2154 2158 */
2155 2159 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
2156 2160 fg = flk_get_globals();
2157 2161 fg->lockmgr_status[index] = fg->flk_lockmgr_status;
2158 2162 }
2159 2163 }
2160 2164
2161 2165 mutex_exit(&flock_lock);
2162 2166
2163 2167 if ((gp_alloc != NULL) && (gp != gp_alloc)) {
2164 2168 /* There was a race to allocate the graph_t and we lost */
2165 2169 mutex_destroy(&gp_alloc->gp_mutex);
2166 2170 kmem_free(gp_alloc, sizeof (graph_t));
2167 2171 }
2168 2172
2169 2173 return (gp);
2170 2174 }
2171 2175
2172 2176 /*
2173 2177 * PSARC case 1997/292
2174 2178 */
2175 2179 int
2176 2180 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid)
2177 2181 {
2178 2182 lock_descriptor_t *lock;
2179 2183 int result = 0;
2180 2184 graph_t *gp;
2181 2185 int lock_nlmid;
2182 2186
2183 2187 /*
2184 2188 * Check to see if node is booted as a cluster. If not, return.
2185 2189 */
2186 2190 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2187 2191 return (0);
2188 2192 }
2189 2193
2190 2194 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2191 2195 if (gp == NULL) {
2192 2196 return (0);
2193 2197 }
2194 2198
2195 2199 mutex_enter(&gp->gp_mutex);
2196 2200
2197 2201 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2198 2202
2199 2203 if (lock) {
2200 2204 while (lock->l_vnode == vp) {
2201 2205 /* get NLM id from sysid */
2202 2206 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2203 2207
2204 2208 /*
2205 2209 * If NLM server request _and_ nlmid of lock matches
2206 2210 * nlmid of argument, then we've found a remote lock.
2207 2211 */
2208 2212 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2209 2213 result = 1;
2210 2214 goto done;
2211 2215 }
2212 2216 lock = lock->l_next;
2213 2217 }
2214 2218 }
2215 2219
2216 2220 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2217 2221
2218 2222 if (lock) {
2219 2223 while (lock->l_vnode == vp) {
2220 2224 /* get NLM id from sysid */
2221 2225 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2222 2226
2223 2227 /*
2224 2228 * If NLM server request _and_ nlmid of lock matches
2225 2229 * nlmid of argument, then we've found a remote lock.
2226 2230 */
2227 2231 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2228 2232 result = 1;
2229 2233 goto done;
2230 2234 }
2231 2235 lock = lock->l_next;
2232 2236 }
2233 2237 }
2234 2238
2235 2239 done:
2236 2240 mutex_exit(&gp->gp_mutex);
2237 2241 return (result);
2238 2242 }
2239 2243
2240 2244 /*
2241 2245 * Determine whether there are any locks for the given vnode with a remote
2242 2246 * sysid. Returns zero if not, non-zero if there are.
2243 2247 *
2244 2248 * Note that the return value from this function is potentially invalid
2245 2249 * once it has been returned. The caller is responsible for providing its
2246 2250 * own synchronization mechanism to ensure that the return value is useful
2247 2251 * (e.g., see nfs_lockcompletion()).
2248 2252 */
2249 2253 int
2250 2254 flk_has_remote_locks(vnode_t *vp)
2251 2255 {
2252 2256 lock_descriptor_t *lock;
2253 2257 int result = 0;
2254 2258 graph_t *gp;
2255 2259
2256 2260 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2257 2261 if (gp == NULL) {
2258 2262 return (0);
2259 2263 }
2260 2264
2261 2265 mutex_enter(&gp->gp_mutex);
2262 2266
2263 2267 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2264 2268
2265 2269 if (lock) {
2266 2270 while (lock->l_vnode == vp) {
2267 2271 if (IS_REMOTE(lock)) {
2268 2272 result = 1;
2269 2273 goto done;
2270 2274 }
2271 2275 lock = lock->l_next;
↓ open down ↓ |
2232 lines elided |
↑ open up ↑ |
2272 2276 }
2273 2277 }
2274 2278
2275 2279 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2276 2280
2277 2281 if (lock) {
2278 2282 while (lock->l_vnode == vp) {
2279 2283 if (IS_REMOTE(lock)) {
2280 2284 result = 1;
2281 2285 goto done;
2286 + }
2287 + lock = lock->l_next;
2288 + }
2289 + }
2290 +
2291 +done:
2292 + mutex_exit(&gp->gp_mutex);
2293 + return (result);
2294 +}
2295 +
2296 +/*
2297 + * Determine whether there are any locks for the given vnode with a remote
2298 + * sysid matching given sysid.
2299 + * Used by the new (open source) NFS Lock Manager (NLM)
2300 + */
2301 +int
2302 +flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid)
2303 +{
2304 + lock_descriptor_t *lock;
2305 + int result = 0;
2306 + graph_t *gp;
2307 +
2308 + if (sysid == 0)
2309 + return (0);
2310 +
2311 + gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2312 + if (gp == NULL) {
2313 + return (0);
2314 + }
2315 +
2316 + mutex_enter(&gp->gp_mutex);
2317 +
2318 + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2319 +
2320 + if (lock) {
2321 + while (lock->l_vnode == vp) {
2322 + if (lock->l_flock.l_sysid == sysid) {
2323 + result = 1;
2324 + goto done;
2325 + }
2326 + lock = lock->l_next;
2327 + }
2328 + }
2329 +
2330 + SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2331 +
2332 + if (lock) {
2333 + while (lock->l_vnode == vp) {
2334 + if (lock->l_flock.l_sysid == sysid) {
2335 + result = 1;
2336 + goto done;
2282 2337 }
2283 2338 lock = lock->l_next;
2284 2339 }
2285 2340 }
2286 2341
2287 2342 done:
2288 2343 mutex_exit(&gp->gp_mutex);
2289 2344 return (result);
2290 2345 }
2291 2346
2292 2347 /*
2293 2348 * Determine if there are any locks owned by the given sysid.
2294 2349 * Returns zero if not, non-zero if there are. Note that this return code
2295 2350 * could be derived from flk_get_{sleeping,active}_locks, but this routine
2296 2351 * avoids all the memory allocations of those routines.
2297 2352 *
2298 2353 * This routine has the same synchronization issues as
2299 2354 * flk_has_remote_locks.
2300 2355 */
2301 2356
2302 2357 int
2303 2358 flk_sysid_has_locks(int sysid, int lck_type)
2304 2359 {
2305 2360 int has_locks = 0;
2306 2361 lock_descriptor_t *lock;
2307 2362 graph_t *gp;
2308 2363 int i;
2309 2364
2310 2365 for (i = 0; i < HASH_SIZE && !has_locks; i++) {
2311 2366 mutex_enter(&flock_lock);
2312 2367 gp = lock_graph[i];
2313 2368 mutex_exit(&flock_lock);
2314 2369 if (gp == NULL) {
2315 2370 continue;
2316 2371 }
2317 2372
2318 2373 mutex_enter(&gp->gp_mutex);
2319 2374
2320 2375 if (lck_type & FLK_QUERY_ACTIVE) {
2321 2376 for (lock = ACTIVE_HEAD(gp)->l_next;
2322 2377 lock != ACTIVE_HEAD(gp) && !has_locks;
2323 2378 lock = lock->l_next) {
2324 2379 if (lock->l_flock.l_sysid == sysid)
2325 2380 has_locks = 1;
2326 2381 }
2327 2382 }
2328 2383
2329 2384 if (lck_type & FLK_QUERY_SLEEPING) {
2330 2385 for (lock = SLEEPING_HEAD(gp)->l_next;
2331 2386 lock != SLEEPING_HEAD(gp) && !has_locks;
2332 2387 lock = lock->l_next) {
2333 2388 if (lock->l_flock.l_sysid == sysid)
2334 2389 has_locks = 1;
2335 2390 }
2336 2391 }
2337 2392 mutex_exit(&gp->gp_mutex);
2338 2393 }
2339 2394
2340 2395 return (has_locks);
2341 2396 }
2342 2397
2343 2398
2344 2399 /*
2345 2400 * PSARC case 1997/292
2346 2401 *
2347 2402 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit
2348 2403 * quantity, the real sysid generated by the NLM server; the upper half
2349 2404 * identifies the node of the cluster where the NLM server ran.
2350 2405 * This routine is only called by an NLM server running in a cluster.
2351 2406 * Effects: Remove all locks held on behalf of the client identified
2352 2407 * by "sysid."
2353 2408 */
2354 2409 void
2355 2410 cl_flk_remove_locks_by_sysid(int sysid)
2356 2411 {
2357 2412 graph_t *gp;
2358 2413 int i;
2359 2414 lock_descriptor_t *lock, *nlock;
2360 2415
2361 2416 /*
2362 2417 * Check to see if node is booted as a cluster. If not, return.
2363 2418 */
2364 2419 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2365 2420 return;
2366 2421 }
2367 2422
2368 2423 ASSERT(sysid != 0);
2369 2424 for (i = 0; i < HASH_SIZE; i++) {
2370 2425 mutex_enter(&flock_lock);
2371 2426 gp = lock_graph[i];
2372 2427 mutex_exit(&flock_lock);
2373 2428
2374 2429 if (gp == NULL)
2375 2430 continue;
2376 2431
2377 2432 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */
2378 2433
2379 2434 /* signal sleeping requests so that they bail out */
2380 2435 lock = SLEEPING_HEAD(gp)->l_next;
2381 2436 while (lock != SLEEPING_HEAD(gp)) {
2382 2437 nlock = lock->l_next;
2383 2438 if (lock->l_flock.l_sysid == sysid) {
2384 2439 INTERRUPT_WAKEUP(lock);
2385 2440 }
2386 2441 lock = nlock;
2387 2442 }
2388 2443
2389 2444 /* delete active locks */
2390 2445 lock = ACTIVE_HEAD(gp)->l_next;
2391 2446 while (lock != ACTIVE_HEAD(gp)) {
2392 2447 nlock = lock->l_next;
2393 2448 if (lock->l_flock.l_sysid == sysid) {
2394 2449 flk_delete_active_lock(lock, 0);
2395 2450 flk_wakeup(lock, 1);
2396 2451 flk_free_lock(lock);
2397 2452 }
2398 2453 lock = nlock;
2399 2454 }
2400 2455 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */
2401 2456 }
2402 2457 }
2403 2458
2404 2459 /*
2405 2460 * Delete all locks in the system that belongs to the sysid of the request.
2406 2461 */
2407 2462
2408 2463 static void
2409 2464 flk_delete_locks_by_sysid(lock_descriptor_t *request)
2410 2465 {
2411 2466 int sysid = request->l_flock.l_sysid;
2412 2467 lock_descriptor_t *lock, *nlock;
2413 2468 graph_t *gp;
2414 2469 int i;
2415 2470
2416 2471 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
2417 2472 ASSERT(sysid != 0);
2418 2473
2419 2474 mutex_exit(&request->l_graph->gp_mutex);
2420 2475
2421 2476 for (i = 0; i < HASH_SIZE; i++) {
2422 2477 mutex_enter(&flock_lock);
2423 2478 gp = lock_graph[i];
2424 2479 mutex_exit(&flock_lock);
2425 2480
2426 2481 if (gp == NULL)
2427 2482 continue;
2428 2483
2429 2484 mutex_enter(&gp->gp_mutex);
2430 2485
2431 2486 /* signal sleeping requests so that they bail out */
2432 2487 lock = SLEEPING_HEAD(gp)->l_next;
2433 2488 while (lock != SLEEPING_HEAD(gp)) {
2434 2489 nlock = lock->l_next;
2435 2490 if (lock->l_flock.l_sysid == sysid) {
2436 2491 INTERRUPT_WAKEUP(lock);
2437 2492 }
2438 2493 lock = nlock;
2439 2494 }
2440 2495
2441 2496 /* delete active locks */
2442 2497 lock = ACTIVE_HEAD(gp)->l_next;
2443 2498 while (lock != ACTIVE_HEAD(gp)) {
2444 2499 nlock = lock->l_next;
2445 2500 if (lock->l_flock.l_sysid == sysid) {
2446 2501 flk_delete_active_lock(lock, 0);
2447 2502 flk_wakeup(lock, 1);
2448 2503 flk_free_lock(lock);
2449 2504 }
2450 2505 lock = nlock;
2451 2506 }
2452 2507 mutex_exit(&gp->gp_mutex);
2453 2508 }
2454 2509
2455 2510 mutex_enter(&request->l_graph->gp_mutex);
2456 2511 }
2457 2512
2458 2513 /*
2459 2514 * Clustering: Deletes PXFS locks
2460 2515 * Effects: Delete all locks on files in the given file system and with the
2461 2516 * given PXFS id.
2462 2517 */
2463 2518 void
2464 2519 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid)
2465 2520 {
2466 2521 lock_descriptor_t *lock, *nlock;
2467 2522 graph_t *gp;
2468 2523 int i;
2469 2524
2470 2525 for (i = 0; i < HASH_SIZE; i++) {
2471 2526 mutex_enter(&flock_lock);
2472 2527 gp = lock_graph[i];
2473 2528 mutex_exit(&flock_lock);
2474 2529
2475 2530 if (gp == NULL)
2476 2531 continue;
2477 2532
2478 2533 mutex_enter(&gp->gp_mutex);
2479 2534
2480 2535 /* signal sleeping requests so that they bail out */
2481 2536 lock = SLEEPING_HEAD(gp)->l_next;
2482 2537 while (lock != SLEEPING_HEAD(gp)) {
2483 2538 nlock = lock->l_next;
2484 2539 if (lock->l_vnode->v_vfsp == vfsp) {
2485 2540 ASSERT(IS_PXFS(lock));
2486 2541 if (GETPXFSID(lock->l_flock.l_sysid) ==
2487 2542 pxfsid) {
2488 2543 flk_set_state(lock,
2489 2544 FLK_CANCELLED_STATE);
2490 2545 flk_cancel_sleeping_lock(lock, 1);
2491 2546 }
2492 2547 }
2493 2548 lock = nlock;
2494 2549 }
2495 2550
2496 2551 /* delete active locks */
2497 2552 lock = ACTIVE_HEAD(gp)->l_next;
2498 2553 while (lock != ACTIVE_HEAD(gp)) {
2499 2554 nlock = lock->l_next;
2500 2555 if (lock->l_vnode->v_vfsp == vfsp) {
2501 2556 ASSERT(IS_PXFS(lock));
2502 2557 if (GETPXFSID(lock->l_flock.l_sysid) ==
2503 2558 pxfsid) {
2504 2559 flk_delete_active_lock(lock, 0);
2505 2560 flk_wakeup(lock, 1);
2506 2561 flk_free_lock(lock);
2507 2562 }
2508 2563 }
2509 2564 lock = nlock;
2510 2565 }
2511 2566 mutex_exit(&gp->gp_mutex);
2512 2567 }
2513 2568 }
2514 2569
2515 2570 /*
2516 2571 * Search for a sleeping lock manager lock which matches exactly this lock
2517 2572 * request; if one is found, fake a signal to cancel it.
2518 2573 *
2519 2574 * Return 1 if a matching lock was found, 0 otherwise.
2520 2575 */
2521 2576
2522 2577 static int
2523 2578 flk_canceled(lock_descriptor_t *request)
2524 2579 {
2525 2580 lock_descriptor_t *lock, *nlock;
2526 2581 graph_t *gp = request->l_graph;
2527 2582 vnode_t *vp = request->l_vnode;
2528 2583
2529 2584 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2530 2585 ASSERT(IS_LOCKMGR(request));
2531 2586 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2532 2587
2533 2588 if (lock) {
2534 2589 while (lock->l_vnode == vp) {
2535 2590 nlock = lock->l_next;
2536 2591 if (SAME_OWNER(lock, request) &&
2537 2592 lock->l_start == request->l_start &&
2538 2593 lock->l_end == request->l_end) {
2539 2594 INTERRUPT_WAKEUP(lock);
2540 2595 return (1);
2541 2596 }
2542 2597 lock = nlock;
2543 2598 }
2544 2599 }
2545 2600 return (0);
2546 2601 }
2547 2602
2548 2603 /*
2549 2604 * Remove all the locks for the vnode belonging to the given pid and sysid.
2550 2605 */
2551 2606
2552 2607 void
2553 2608 cleanlocks(vnode_t *vp, pid_t pid, int sysid)
2554 2609 {
2555 2610 graph_t *gp;
2556 2611 lock_descriptor_t *lock, *nlock;
2557 2612 lock_descriptor_t *link_stack;
2558 2613
2559 2614 STACK_INIT(link_stack);
2560 2615
2561 2616 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2562 2617
2563 2618 if (gp == NULL)
2564 2619 return;
2565 2620 mutex_enter(&gp->gp_mutex);
2566 2621
2567 2622 CHECK_SLEEPING_LOCKS(gp);
2568 2623 CHECK_ACTIVE_LOCKS(gp);
2569 2624
2570 2625 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2571 2626
2572 2627 if (lock) {
2573 2628 do {
2574 2629 nlock = lock->l_next;
2575 2630 if ((lock->l_flock.l_pid == pid ||
2576 2631 pid == IGN_PID) &&
2577 2632 lock->l_flock.l_sysid == sysid) {
2578 2633 CANCEL_WAKEUP(lock);
2579 2634 }
2580 2635 lock = nlock;
2581 2636 } while (lock->l_vnode == vp);
2582 2637 }
2583 2638
2584 2639 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2585 2640
2586 2641 if (lock) {
2587 2642 do {
2588 2643 nlock = lock->l_next;
2589 2644 if ((lock->l_flock.l_pid == pid ||
2590 2645 pid == IGN_PID) &&
2591 2646 lock->l_flock.l_sysid == sysid) {
2592 2647 flk_delete_active_lock(lock, 0);
2593 2648 STACK_PUSH(link_stack, lock, l_stack);
2594 2649 }
2595 2650 lock = nlock;
2596 2651 } while (lock->l_vnode == vp);
2597 2652 }
2598 2653
2599 2654 while ((lock = STACK_TOP(link_stack)) != NULL) {
2600 2655 STACK_POP(link_stack, l_stack);
2601 2656 flk_wakeup(lock, 1);
2602 2657 flk_free_lock(lock);
2603 2658 }
2604 2659
2605 2660 CHECK_SLEEPING_LOCKS(gp);
2606 2661 CHECK_ACTIVE_LOCKS(gp);
2607 2662 CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
2608 2663 mutex_exit(&gp->gp_mutex);
2609 2664 }
2610 2665
2611 2666
2612 2667 /*
2613 2668 * Called from 'fs' read and write routines for files that have mandatory
2614 2669 * locking enabled.
2615 2670 */
2616 2671
2617 2672 int
2618 2673 chklock(
2619 2674 struct vnode *vp,
2620 2675 int iomode,
2621 2676 u_offset_t offset,
2622 2677 ssize_t len,
2623 2678 int fmode,
2624 2679 caller_context_t *ct)
2625 2680 {
2626 2681 register int i;
2627 2682 struct flock64 bf;
2628 2683 int error = 0;
2629 2684
2630 2685 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
2631 2686 bf.l_whence = 0;
2632 2687 bf.l_start = offset;
2633 2688 bf.l_len = len;
2634 2689 if (ct == NULL) {
2635 2690 bf.l_pid = curproc->p_pid;
2636 2691 bf.l_sysid = 0;
2637 2692 } else {
2638 2693 bf.l_pid = ct->cc_pid;
2639 2694 bf.l_sysid = ct->cc_sysid;
2640 2695 }
2641 2696 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
2642 2697 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
2643 2698 bf.l_type != F_UNLCK)
2644 2699 error = i ? i : EAGAIN;
2645 2700 return (error);
2646 2701 }
2647 2702
2648 2703 /*
2649 2704 * convoff - converts the given data (start, whence) to the
2650 2705 * given whence.
2651 2706 */
2652 2707 int
2653 2708 convoff(vp, lckdat, whence, offset)
2654 2709 struct vnode *vp;
2655 2710 struct flock64 *lckdat;
2656 2711 int whence;
2657 2712 offset_t offset;
2658 2713 {
2659 2714 int error;
2660 2715 struct vattr vattr;
2661 2716
2662 2717 if ((lckdat->l_whence == 2) || (whence == 2)) {
2663 2718 vattr.va_mask = AT_SIZE;
2664 2719 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
2665 2720 return (error);
2666 2721 }
2667 2722
2668 2723 switch (lckdat->l_whence) {
2669 2724 case 1:
2670 2725 lckdat->l_start += offset;
2671 2726 break;
2672 2727 case 2:
2673 2728 lckdat->l_start += vattr.va_size;
2674 2729 /* FALLTHRU */
2675 2730 case 0:
2676 2731 break;
2677 2732 default:
2678 2733 return (EINVAL);
2679 2734 }
2680 2735
2681 2736 if (lckdat->l_start < 0)
2682 2737 return (EINVAL);
2683 2738
2684 2739 switch (whence) {
2685 2740 case 1:
2686 2741 lckdat->l_start -= offset;
2687 2742 break;
2688 2743 case 2:
2689 2744 lckdat->l_start -= vattr.va_size;
2690 2745 /* FALLTHRU */
2691 2746 case 0:
2692 2747 break;
2693 2748 default:
2694 2749 return (EINVAL);
2695 2750 }
2696 2751
2697 2752 lckdat->l_whence = (short)whence;
2698 2753 return (0);
2699 2754 }
2700 2755
2701 2756
2702 2757 /* proc_graph function definitions */
2703 2758
2704 2759 /*
2705 2760 * Function checks for deadlock due to the new 'lock'. If deadlock found
2706 2761 * edges of this lock are freed and returned.
2707 2762 */
2708 2763
2709 2764 static int
2710 2765 flk_check_deadlock(lock_descriptor_t *lock)
2711 2766 {
2712 2767 proc_vertex_t *start_vertex, *pvertex;
2713 2768 proc_vertex_t *dvertex;
2714 2769 proc_edge_t *pep, *ppep;
2715 2770 edge_t *ep, *nep;
2716 2771 proc_vertex_t *process_stack;
2717 2772
2718 2773 STACK_INIT(process_stack);
2719 2774
2720 2775 mutex_enter(&flock_lock);
2721 2776 start_vertex = flk_get_proc_vertex(lock);
2722 2777 ASSERT(start_vertex != NULL);
2723 2778
2724 2779 /* construct the edges from this process to other processes */
2725 2780
2726 2781 ep = FIRST_ADJ(lock);
2727 2782 while (ep != HEAD(lock)) {
2728 2783 proc_vertex_t *adj_proc;
2729 2784
2730 2785 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2731 2786 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
2732 2787 if (pep->to_proc == adj_proc) {
2733 2788 ASSERT(pep->refcount);
2734 2789 pep->refcount++;
2735 2790 break;
2736 2791 }
2737 2792 }
2738 2793 if (pep == NULL) {
2739 2794 pep = flk_get_proc_edge();
2740 2795 pep->to_proc = adj_proc;
2741 2796 pep->refcount = 1;
2742 2797 adj_proc->incount++;
2743 2798 pep->next = start_vertex->edge;
2744 2799 start_vertex->edge = pep;
2745 2800 }
2746 2801 ep = NEXT_ADJ(ep);
2747 2802 }
2748 2803
2749 2804 ep = FIRST_IN(lock);
2750 2805
2751 2806 while (ep != HEAD(lock)) {
2752 2807 proc_vertex_t *in_proc;
2753 2808
2754 2809 in_proc = flk_get_proc_vertex(ep->from_vertex);
2755 2810
2756 2811 for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
2757 2812 if (pep->to_proc == start_vertex) {
2758 2813 ASSERT(pep->refcount);
2759 2814 pep->refcount++;
2760 2815 break;
2761 2816 }
2762 2817 }
2763 2818 if (pep == NULL) {
2764 2819 pep = flk_get_proc_edge();
2765 2820 pep->to_proc = start_vertex;
2766 2821 pep->refcount = 1;
2767 2822 start_vertex->incount++;
2768 2823 pep->next = in_proc->edge;
2769 2824 in_proc->edge = pep;
2770 2825 }
2771 2826 ep = NEXT_IN(ep);
2772 2827 }
2773 2828
2774 2829 if (start_vertex->incount == 0) {
2775 2830 mutex_exit(&flock_lock);
2776 2831 return (0);
2777 2832 }
2778 2833
2779 2834 flk_proc_graph_uncolor();
2780 2835
2781 2836 start_vertex->p_sedge = start_vertex->edge;
2782 2837
2783 2838 STACK_PUSH(process_stack, start_vertex, p_stack);
2784 2839
2785 2840 while ((pvertex = STACK_TOP(process_stack)) != NULL) {
2786 2841 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
2787 2842 dvertex = pep->to_proc;
2788 2843 if (!PROC_ARRIVED(dvertex)) {
2789 2844 STACK_PUSH(process_stack, dvertex, p_stack);
2790 2845 dvertex->p_sedge = dvertex->edge;
2791 2846 PROC_ARRIVE(pvertex);
2792 2847 pvertex->p_sedge = pep->next;
2793 2848 break;
2794 2849 }
2795 2850 if (!PROC_DEPARTED(dvertex))
2796 2851 goto deadlock;
2797 2852 }
2798 2853 if (pep == NULL) {
2799 2854 PROC_DEPART(pvertex);
2800 2855 STACK_POP(process_stack, p_stack);
2801 2856 }
2802 2857 }
2803 2858 mutex_exit(&flock_lock);
2804 2859 return (0);
2805 2860
2806 2861 deadlock:
2807 2862
2808 2863 /* we remove all lock edges and proc edges */
2809 2864
2810 2865 ep = FIRST_ADJ(lock);
2811 2866 while (ep != HEAD(lock)) {
2812 2867 proc_vertex_t *adj_proc;
2813 2868 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2814 2869 nep = NEXT_ADJ(ep);
2815 2870 IN_LIST_REMOVE(ep);
2816 2871 ADJ_LIST_REMOVE(ep);
2817 2872 flk_free_edge(ep);
2818 2873 ppep = start_vertex->edge;
2819 2874 for (pep = start_vertex->edge; pep != NULL; ppep = pep,
2820 2875 pep = ppep->next) {
2821 2876 if (pep->to_proc == adj_proc) {
2822 2877 pep->refcount--;
2823 2878 if (pep->refcount == 0) {
2824 2879 if (pep == ppep) {
2825 2880 start_vertex->edge = pep->next;
2826 2881 } else {
2827 2882 ppep->next = pep->next;
2828 2883 }
2829 2884 adj_proc->incount--;
2830 2885 flk_proc_release(adj_proc);
2831 2886 flk_free_proc_edge(pep);
2832 2887 }
2833 2888 break;
2834 2889 }
2835 2890 }
2836 2891 ep = nep;
2837 2892 }
2838 2893 ep = FIRST_IN(lock);
2839 2894 while (ep != HEAD(lock)) {
2840 2895 proc_vertex_t *in_proc;
2841 2896 in_proc = flk_get_proc_vertex(ep->from_vertex);
2842 2897 nep = NEXT_IN(ep);
2843 2898 IN_LIST_REMOVE(ep);
2844 2899 ADJ_LIST_REMOVE(ep);
2845 2900 flk_free_edge(ep);
2846 2901 ppep = in_proc->edge;
2847 2902 for (pep = in_proc->edge; pep != NULL; ppep = pep,
2848 2903 pep = ppep->next) {
2849 2904 if (pep->to_proc == start_vertex) {
2850 2905 pep->refcount--;
2851 2906 if (pep->refcount == 0) {
2852 2907 if (pep == ppep) {
2853 2908 in_proc->edge = pep->next;
2854 2909 } else {
2855 2910 ppep->next = pep->next;
2856 2911 }
2857 2912 start_vertex->incount--;
2858 2913 flk_proc_release(in_proc);
2859 2914 flk_free_proc_edge(pep);
2860 2915 }
2861 2916 break;
2862 2917 }
2863 2918 }
2864 2919 ep = nep;
2865 2920 }
2866 2921 flk_proc_release(start_vertex);
2867 2922 mutex_exit(&flock_lock);
2868 2923 return (1);
2869 2924 }
2870 2925
2871 2926 /*
2872 2927 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex
2873 2928 * from the list we return that, otherwise we allocate one. If necessary,
2874 2929 * we grow the list of vertices also.
2875 2930 */
2876 2931
2877 2932 static proc_vertex_t *
2878 2933 flk_get_proc_vertex(lock_descriptor_t *lock)
2879 2934 {
2880 2935 int i;
2881 2936 proc_vertex_t *pv;
2882 2937 proc_vertex_t **palloc;
2883 2938
2884 2939 ASSERT(MUTEX_HELD(&flock_lock));
2885 2940 if (lock->pvertex != -1) {
2886 2941 ASSERT(lock->pvertex >= 0);
2887 2942 pv = pgraph.proc[lock->pvertex];
2888 2943 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2889 2944 return (pv);
2890 2945 }
2891 2946 }
2892 2947 for (i = 0; i < pgraph.gcount; i++) {
2893 2948 pv = pgraph.proc[i];
2894 2949 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2895 2950 lock->pvertex = pv->index = i;
2896 2951 return (pv);
2897 2952 }
2898 2953 }
2899 2954 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
2900 2955 pv->pid = lock->l_flock.l_pid;
2901 2956 pv->sysid = lock->l_flock.l_sysid;
2902 2957 flk_proc_vertex_allocs++;
2903 2958 if (pgraph.free != 0) {
2904 2959 for (i = 0; i < pgraph.gcount; i++) {
2905 2960 if (pgraph.proc[i] == NULL) {
2906 2961 pgraph.proc[i] = pv;
2907 2962 lock->pvertex = pv->index = i;
2908 2963 pgraph.free--;
2909 2964 return (pv);
2910 2965 }
2911 2966 }
2912 2967 }
2913 2968 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
2914 2969 sizeof (proc_vertex_t *), KM_SLEEP);
2915 2970
2916 2971 if (pgraph.proc) {
2917 2972 bcopy(pgraph.proc, palloc,
2918 2973 pgraph.gcount * sizeof (proc_vertex_t *));
2919 2974
2920 2975 kmem_free(pgraph.proc,
2921 2976 pgraph.gcount * sizeof (proc_vertex_t *));
2922 2977 }
2923 2978 pgraph.proc = palloc;
2924 2979 pgraph.free += (PROC_CHUNK - 1);
2925 2980 pv->index = lock->pvertex = pgraph.gcount;
2926 2981 pgraph.gcount += PROC_CHUNK;
2927 2982 pgraph.proc[pv->index] = pv;
2928 2983 return (pv);
2929 2984 }
2930 2985
2931 2986 /*
2932 2987 * Allocate a proc edge.
2933 2988 */
2934 2989
2935 2990 static proc_edge_t *
2936 2991 flk_get_proc_edge()
2937 2992 {
2938 2993 proc_edge_t *pep;
2939 2994
2940 2995 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
2941 2996 flk_proc_edge_allocs++;
2942 2997 return (pep);
2943 2998 }
2944 2999
2945 3000 /*
2946 3001 * Free the proc edge. Called whenever its reference count goes to zero.
2947 3002 */
2948 3003
2949 3004 static void
2950 3005 flk_free_proc_edge(proc_edge_t *pep)
2951 3006 {
2952 3007 ASSERT(pep->refcount == 0);
2953 3008 kmem_free((void *)pep, sizeof (proc_edge_t));
2954 3009 flk_proc_edge_frees++;
2955 3010 }
2956 3011
2957 3012 /*
2958 3013 * Color the graph explicitly done only when the mark value hits max value.
2959 3014 */
2960 3015
2961 3016 static void
2962 3017 flk_proc_graph_uncolor()
2963 3018 {
2964 3019 int i;
2965 3020
2966 3021 if (pgraph.mark == UINT_MAX) {
2967 3022 for (i = 0; i < pgraph.gcount; i++)
2968 3023 if (pgraph.proc[i] != NULL) {
2969 3024 pgraph.proc[i]->atime = 0;
2970 3025 pgraph.proc[i]->dtime = 0;
2971 3026 }
2972 3027 pgraph.mark = 1;
2973 3028 } else {
2974 3029 pgraph.mark++;
2975 3030 }
2976 3031 }
2977 3032
2978 3033 /*
2979 3034 * Release the proc vertex iff both there are no in edges and out edges
2980 3035 */
2981 3036
2982 3037 static void
2983 3038 flk_proc_release(proc_vertex_t *proc)
2984 3039 {
2985 3040 ASSERT(MUTEX_HELD(&flock_lock));
2986 3041 if (proc->edge == NULL && proc->incount == 0) {
2987 3042 pgraph.proc[proc->index] = NULL;
2988 3043 pgraph.free++;
2989 3044 kmem_free(proc, sizeof (proc_vertex_t));
2990 3045 flk_proc_vertex_frees++;
2991 3046 }
2992 3047 }
2993 3048
2994 3049 /*
2995 3050 * Updates process graph to reflect change in a lock_graph.
2996 3051 * Note: We should call this function only after we have a correctly
2997 3052 * recomputed lock graph. Otherwise we might miss a deadlock detection.
2998 3053 * eg: in function flk_relation() we call this function after flk_recompute_
2999 3054 * dependencies() otherwise if a process tries to lock a vnode hashed
3000 3055 * into another graph it might sleep for ever.
3001 3056 */
3002 3057
3003 3058 static void
3004 3059 flk_update_proc_graph(edge_t *ep, int delete)
3005 3060 {
3006 3061 proc_vertex_t *toproc, *fromproc;
3007 3062 proc_edge_t *pep, *prevpep;
3008 3063
3009 3064 mutex_enter(&flock_lock);
3010 3065 toproc = flk_get_proc_vertex(ep->to_vertex);
3011 3066 fromproc = flk_get_proc_vertex(ep->from_vertex);
3012 3067
3013 3068 if (!delete)
3014 3069 goto add;
3015 3070 pep = prevpep = fromproc->edge;
3016 3071
3017 3072 ASSERT(pep != NULL);
3018 3073 while (pep != NULL) {
3019 3074 if (pep->to_proc == toproc) {
3020 3075 ASSERT(pep->refcount > 0);
3021 3076 pep->refcount--;
3022 3077 if (pep->refcount == 0) {
3023 3078 if (pep == prevpep) {
3024 3079 fromproc->edge = pep->next;
3025 3080 } else {
3026 3081 prevpep->next = pep->next;
3027 3082 }
3028 3083 toproc->incount--;
3029 3084 flk_proc_release(toproc);
3030 3085 flk_free_proc_edge(pep);
3031 3086 }
3032 3087 break;
3033 3088 }
3034 3089 prevpep = pep;
3035 3090 pep = pep->next;
3036 3091 }
3037 3092 flk_proc_release(fromproc);
3038 3093 mutex_exit(&flock_lock);
3039 3094 return;
3040 3095 add:
3041 3096
3042 3097 pep = fromproc->edge;
3043 3098
3044 3099 while (pep != NULL) {
3045 3100 if (pep->to_proc == toproc) {
3046 3101 ASSERT(pep->refcount > 0);
3047 3102 pep->refcount++;
3048 3103 break;
3049 3104 }
3050 3105 pep = pep->next;
3051 3106 }
3052 3107 if (pep == NULL) {
3053 3108 pep = flk_get_proc_edge();
3054 3109 pep->to_proc = toproc;
3055 3110 pep->refcount = 1;
3056 3111 toproc->incount++;
3057 3112 pep->next = fromproc->edge;
3058 3113 fromproc->edge = pep;
3059 3114 }
3060 3115 mutex_exit(&flock_lock);
3061 3116 }
3062 3117
3063 3118 /*
3064 3119 * Set the control status for lock manager requests.
3065 3120 *
3066 3121 */
3067 3122
3068 3123 /*
3069 3124 * PSARC case 1997/292
3070 3125 *
3071 3126 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid().
3072 3127 * Effects: Set the state of the NLM server identified by "nlmid"
3073 3128 * in the NLM registry to state "nlm_state."
3074 3129 * Raises exception no_such_nlm if "nlmid" doesn't identify a known
3075 3130 * NLM server to this LLM.
3076 3131 * Note that when this routine is called with NLM_SHUTTING_DOWN there
3077 3132 * may be locks requests that have gotten started but not finished. In
3078 3133 * particular, there may be blocking requests that are in the callback code
3079 3134 * before sleeping (so they're not holding the lock for the graph). If
3080 3135 * such a thread reacquires the graph's lock (to go to sleep) after
3081 3136 * NLM state in the NLM registry is set to a non-up value,
3082 3137 * it will notice the status and bail out. If the request gets
3083 3138 * granted before the thread can check the NLM registry, let it
3084 3139 * continue normally. It will get flushed when we are called with NLM_DOWN.
3085 3140 *
3086 3141 * Modifies: nlm_reg_obj (global)
3087 3142 * Arguments:
3088 3143 * nlmid (IN): id uniquely identifying an NLM server
3089 3144 * nlm_state (IN): NLM server state to change "nlmid" to
3090 3145 */
3091 3146 void
3092 3147 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state)
3093 3148 {
3094 3149 /*
3095 3150 * Check to see if node is booted as a cluster. If not, return.
3096 3151 */
3097 3152 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3098 3153 return;
3099 3154 }
3100 3155
3101 3156 /*
3102 3157 * Check for development/debugging. It is possible to boot a node
3103 3158 * in non-cluster mode, and then run a special script, currently
3104 3159 * available only to developers, to bring up the node as part of a
3105 3160 * cluster. The problem is that running such a script does not
3106 3161 * result in the routine flk_init() being called and hence global array
3107 3162 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode,
3108 3163 * but the LLM needs to do an additional check to see if the global
3109 3164 * array has been created or not. If nlm_reg_status is NULL, then
3110 3165 * return, else continue.
3111 3166 */
3112 3167 if (nlm_reg_status == NULL) {
3113 3168 return;
3114 3169 }
3115 3170
3116 3171 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3117 3172 mutex_enter(&nlm_reg_lock);
3118 3173
3119 3174 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) {
3120 3175 /*
3121 3176 * If the NLM server "nlmid" is unknown in the NLM registry,
3122 3177 * add it to the registry in the nlm shutting down state.
3123 3178 */
3124 3179 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3125 3180 FLK_NLM_SHUTTING_DOWN);
3126 3181 } else {
3127 3182 /*
3128 3183 * Change the state of the NLM server identified by "nlmid"
3129 3184 * in the NLM registry to the argument "nlm_state."
3130 3185 */
3131 3186 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3132 3187 nlm_state);
3133 3188 }
3134 3189
3135 3190 /*
3136 3191 * The reason we must register the NLM server that is shutting down
3137 3192 * with an LLM that doesn't already know about it (never sent a lock
3138 3193 * request) is to handle correctly a race between shutdown and a new
3139 3194 * lock request. Suppose that a shutdown request from the NLM server
3140 3195 * invokes this routine at the LLM, and a thread is spawned to
3141 3196 * service the request. Now suppose a new lock request is in
3142 3197 * progress and has already passed the first line of defense in
3143 3198 * reclock(), which denies new locks requests from NLM servers
3144 3199 * that are not in the NLM_UP state. After the current routine
3145 3200 * is invoked for both phases of shutdown, the routine will return,
3146 3201 * having done nothing, and the lock request will proceed and
3147 3202 * probably be granted. The problem is that the shutdown was ignored
3148 3203 * by the lock request because there was no record of that NLM server
3149 3204 * shutting down. We will be in the peculiar position of thinking
3150 3205 * that we've shutdown the NLM server and all locks at all LLMs have
3151 3206 * been discarded, but in fact there's still one lock held.
3152 3207 * The solution is to record the existence of NLM server and change
3153 3208 * its state immediately to NLM_SHUTTING_DOWN. The lock request in
3154 3209 * progress may proceed because the next phase NLM_DOWN will catch
3155 3210 * this lock and discard it.
3156 3211 */
3157 3212 mutex_exit(&nlm_reg_lock);
3158 3213
3159 3214 switch (nlm_state) {
3160 3215 case FLK_NLM_UP:
3161 3216 /*
3162 3217 * Change the NLM state of all locks still held on behalf of
3163 3218 * the NLM server identified by "nlmid" to NLM_UP.
3164 3219 */
3165 3220 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP);
3166 3221 break;
3167 3222
3168 3223 case FLK_NLM_SHUTTING_DOWN:
3169 3224 /*
3170 3225 * Wake up all sleeping locks for the NLM server identified
3171 3226 * by "nlmid." Note that eventually all woken threads will
3172 3227 * have their lock requests cancelled and descriptors
3173 3228 * removed from the sleeping lock list. Note that the NLM
3174 3229 * server state associated with each lock descriptor is
3175 3230 * changed to FLK_NLM_SHUTTING_DOWN.
3176 3231 */
3177 3232 cl_flk_wakeup_sleeping_nlm_locks(nlmid);
3178 3233 break;
3179 3234
3180 3235 case FLK_NLM_DOWN:
3181 3236 /*
3182 3237 * Discard all active, granted locks for this NLM server
3183 3238 * identified by "nlmid."
3184 3239 */
3185 3240 cl_flk_unlock_nlm_granted(nlmid);
3186 3241 break;
3187 3242
3188 3243 default:
3189 3244 panic("cl_set_nlm_status: bad status (%d)", nlm_state);
3190 3245 }
3191 3246 }
3192 3247
3193 3248 /*
3194 3249 * Set the control status for lock manager requests.
3195 3250 *
3196 3251 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there
3197 3252 * may be locks requests that have gotten started but not finished. In
3198 3253 * particular, there may be blocking requests that are in the callback code
3199 3254 * before sleeping (so they're not holding the lock for the graph). If
3200 3255 * such a thread reacquires the graph's lock (to go to sleep) after
3201 3256 * flk_lockmgr_status is set to a non-up value, it will notice the status
3202 3257 * and bail out. If the request gets granted before the thread can check
3203 3258 * flk_lockmgr_status, let it continue normally. It will get flushed when
3204 3259 * we are called with FLK_LOCKMGR_DOWN.
3205 3260 */
3206 3261
3207 3262 void
3208 3263 flk_set_lockmgr_status(flk_lockmgr_status_t status)
3209 3264 {
3210 3265 int i;
3211 3266 graph_t *gp;
3212 3267 struct flock_globals *fg;
3213 3268
3214 3269 fg = flk_get_globals();
3215 3270 ASSERT(fg != NULL);
3216 3271
3217 3272 mutex_enter(&flock_lock);
3218 3273 fg->flk_lockmgr_status = status;
3219 3274 mutex_exit(&flock_lock);
3220 3275
3221 3276 /*
3222 3277 * If the lock manager is coming back up, all that's needed is to
3223 3278 * propagate this information to the graphs. If the lock manager
3224 3279 * is going down, additional action is required, and each graph's
3225 3280 * copy of the state is updated atomically with this other action.
3226 3281 */
3227 3282 switch (status) {
3228 3283 case FLK_LOCKMGR_UP:
3229 3284 for (i = 0; i < HASH_SIZE; i++) {
3230 3285 mutex_enter(&flock_lock);
3231 3286 gp = lock_graph[i];
3232 3287 mutex_exit(&flock_lock);
3233 3288 if (gp == NULL)
3234 3289 continue;
3235 3290 mutex_enter(&gp->gp_mutex);
3236 3291 fg->lockmgr_status[i] = status;
3237 3292 mutex_exit(&gp->gp_mutex);
3238 3293 }
3239 3294 break;
3240 3295 case FLK_WAKEUP_SLEEPERS:
3241 3296 wakeup_sleeping_lockmgr_locks(fg);
3242 3297 break;
3243 3298 case FLK_LOCKMGR_DOWN:
3244 3299 unlock_lockmgr_granted(fg);
3245 3300 break;
3246 3301 default:
3247 3302 panic("flk_set_lockmgr_status: bad status (%d)", status);
3248 3303 break;
3249 3304 }
3250 3305 }
3251 3306
3252 3307 /*
3253 3308 * This routine returns all the locks that are active or sleeping and are
3254 3309 * associated with a particular set of identifiers. If lock_state != 0, then
3255 3310 * only locks that match the lock_state are returned. If lock_state == 0, then
3256 3311 * all locks are returned. If pid == NOPID, the pid is ignored. If
3257 3312 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the
3258 3313 * vnode pointer is ignored.
3259 3314 *
3260 3315 * A list containing the vnode pointer and an flock structure
3261 3316 * describing the lock is returned. Each element in the list is
3262 3317 * dynamically allocated and must be freed by the caller. The
3263 3318 * last item in the list is denoted by a NULL value in the ll_next
3264 3319 * field.
3265 3320 *
3266 3321 * The vnode pointers returned are held. The caller is responsible
3267 3322 * for releasing these. Note that the returned list is only a snapshot of
3268 3323 * the current lock information, and that it is a snapshot of a moving
3269 3324 * target (only one graph is locked at a time).
3270 3325 */
3271 3326
3272 3327 locklist_t *
3273 3328 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
3274 3329 pid_t pid, const vnode_t *vp, zoneid_t zoneid)
3275 3330 {
3276 3331 lock_descriptor_t *lock;
3277 3332 lock_descriptor_t *graph_head;
3278 3333 locklist_t listhead;
3279 3334 locklist_t *llheadp;
3280 3335 locklist_t *llp;
3281 3336 locklist_t *lltp;
3282 3337 graph_t *gp;
3283 3338 int i;
3284 3339 int first_index; /* graph index */
3285 3340 int num_indexes; /* graph index */
3286 3341
3287 3342 ASSERT((list_type == FLK_ACTIVE_STATE) ||
3288 3343 (list_type == FLK_SLEEPING_STATE));
3289 3344
3290 3345 /*
3291 3346 * Get a pointer to something to use as a list head while building
3292 3347 * the rest of the list.
3293 3348 */
3294 3349 llheadp = &listhead;
3295 3350 lltp = llheadp;
3296 3351 llheadp->ll_next = (locklist_t *)NULL;
3297 3352
3298 3353 /* Figure out which graphs we want to look at. */
3299 3354 if (vp == NULL) {
3300 3355 first_index = 0;
3301 3356 num_indexes = HASH_SIZE;
3302 3357 } else {
3303 3358 first_index = HASH_INDEX(vp);
3304 3359 num_indexes = 1;
3305 3360 }
3306 3361
3307 3362 for (i = first_index; i < first_index + num_indexes; i++) {
3308 3363 mutex_enter(&flock_lock);
3309 3364 gp = lock_graph[i];
3310 3365 mutex_exit(&flock_lock);
3311 3366 if (gp == NULL) {
3312 3367 continue;
3313 3368 }
3314 3369
3315 3370 mutex_enter(&gp->gp_mutex);
3316 3371 graph_head = (list_type == FLK_ACTIVE_STATE) ?
3317 3372 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
3318 3373 for (lock = graph_head->l_next;
3319 3374 lock != graph_head;
3320 3375 lock = lock->l_next) {
3321 3376 if (use_sysid && lock->l_flock.l_sysid != sysid)
3322 3377 continue;
3323 3378 if (pid != NOPID && lock->l_flock.l_pid != pid)
3324 3379 continue;
3325 3380 if (vp != NULL && lock->l_vnode != vp)
3326 3381 continue;
3327 3382 if (lock_state && !(lock_state & lock->l_state))
3328 3383 continue;
3329 3384 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
3330 3385 continue;
3331 3386 /*
3332 3387 * A matching lock was found. Allocate
3333 3388 * space for a new locklist entry and fill
3334 3389 * it in.
3335 3390 */
3336 3391 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
3337 3392 lltp->ll_next = llp;
3338 3393 VN_HOLD(lock->l_vnode);
3339 3394 llp->ll_vp = lock->l_vnode;
3340 3395 create_flock(lock, &(llp->ll_flock));
3341 3396 llp->ll_next = (locklist_t *)NULL;
3342 3397 lltp = llp;
3343 3398 }
3344 3399 mutex_exit(&gp->gp_mutex);
3345 3400 }
3346 3401
3347 3402 llp = llheadp->ll_next;
3348 3403 return (llp);
3349 3404 }
3350 3405
3351 3406 /*
3352 3407 * These two functions are simply interfaces to get_lock_list. They return
3353 3408 * a list of sleeping or active locks for the given sysid and pid. See
3354 3409 * get_lock_list for details.
3355 3410 *
3356 3411 * In either case we don't particularly care to specify the zone of interest;
3357 3412 * the sysid-space is global across zones, so the sysid will map to exactly one
3358 3413 * zone, and we'll return information for that zone.
3359 3414 */
3360 3415
3361 3416 locklist_t *
3362 3417 flk_get_sleeping_locks(int sysid, pid_t pid)
3363 3418 {
3364 3419 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
3365 3420 ALL_ZONES));
3366 3421 }
3367 3422
3368 3423 locklist_t *
3369 3424 flk_get_active_locks(int sysid, pid_t pid)
3370 3425 {
3371 3426 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
3372 3427 ALL_ZONES));
3373 3428 }
3374 3429
3375 3430 /*
3376 3431 * Another interface to get_lock_list. This one returns all the active
3377 3432 * locks for a given vnode. Again, see get_lock_list for details.
3378 3433 *
3379 3434 * We don't need to specify which zone's locks we're interested in. The matter
3380 3435 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't
3381 3436 * be used by multiple zones, so the list of locks will all be from the right
3382 3437 * zone.
3383 3438 */
3384 3439
3385 3440 locklist_t *
3386 3441 flk_active_locks_for_vp(const vnode_t *vp)
3387 3442 {
3388 3443 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
3389 3444 ALL_ZONES));
3390 3445 }
3391 3446
3392 3447 /*
3393 3448 * Another interface to get_lock_list. This one returns all the active
3394 3449 * nbmand locks for a given vnode. Again, see get_lock_list for details.
3395 3450 *
3396 3451 * See the comment for flk_active_locks_for_vp() for why we don't care to
3397 3452 * specify the particular zone of interest.
3398 3453 */
3399 3454 locklist_t *
3400 3455 flk_active_nbmand_locks_for_vp(const vnode_t *vp)
3401 3456 {
3402 3457 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3403 3458 NOPID, vp, ALL_ZONES));
3404 3459 }
3405 3460
3406 3461 /*
3407 3462 * Another interface to get_lock_list. This one returns all the active
3408 3463 * nbmand locks for a given pid. Again, see get_lock_list for details.
3409 3464 *
3410 3465 * The zone doesn't need to be specified here; the locks held by a
3411 3466 * particular process will either be local (ie, non-NFS) or from the zone
3412 3467 * the process is executing in. This is because other parts of the system
3413 3468 * ensure that an NFS vnode can't be used in a zone other than that in
3414 3469 * which it was opened.
3415 3470 */
3416 3471 locklist_t *
3417 3472 flk_active_nbmand_locks(pid_t pid)
3418 3473 {
3419 3474 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3420 3475 pid, NULL, ALL_ZONES));
3421 3476 }
3422 3477
3423 3478 /*
3424 3479 * Free up all entries in the locklist.
3425 3480 */
3426 3481 void
3427 3482 flk_free_locklist(locklist_t *llp)
3428 3483 {
3429 3484 locklist_t *next_llp;
3430 3485
3431 3486 while (llp) {
3432 3487 next_llp = llp->ll_next;
3433 3488 VN_RELE(llp->ll_vp);
3434 3489 kmem_free(llp, sizeof (*llp));
3435 3490 llp = next_llp;
3436 3491 }
3437 3492 }
3438 3493
3439 3494 static void
3440 3495 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state)
3441 3496 {
3442 3497 /*
3443 3498 * For each graph "lg" in the hash table lock_graph do
3444 3499 * a. Get the list of sleeping locks
3445 3500 * b. For each lock descriptor in the list do
3446 3501 * i. If the requested lock is an NLM server request AND
3447 3502 * the nlmid is the same as the routine argument then
3448 3503 * change the lock descriptor's state field to
3449 3504 * "nlm_state."
3450 3505 * c. Get the list of active locks
3451 3506 * d. For each lock descriptor in the list do
3452 3507 * i. If the requested lock is an NLM server request AND
3453 3508 * the nlmid is the same as the routine argument then
3454 3509 * change the lock descriptor's state field to
3455 3510 * "nlm_state."
3456 3511 */
3457 3512
3458 3513 int i;
3459 3514 graph_t *gp; /* lock graph */
3460 3515 lock_descriptor_t *lock; /* lock */
3461 3516 lock_descriptor_t *nlock = NULL; /* next lock */
3462 3517 int lock_nlmid;
3463 3518
3464 3519 for (i = 0; i < HASH_SIZE; i++) {
3465 3520 mutex_enter(&flock_lock);
3466 3521 gp = lock_graph[i];
3467 3522 mutex_exit(&flock_lock);
3468 3523 if (gp == NULL) {
3469 3524 continue;
3470 3525 }
3471 3526
3472 3527 /* Get list of sleeping locks in current lock graph. */
3473 3528 mutex_enter(&gp->gp_mutex);
3474 3529 for (lock = SLEEPING_HEAD(gp)->l_next;
3475 3530 lock != SLEEPING_HEAD(gp);
3476 3531 lock = nlock) {
3477 3532 nlock = lock->l_next;
3478 3533 /* get NLM id */
3479 3534 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3480 3535
3481 3536 /*
3482 3537 * If NLM server request AND nlmid of lock matches
3483 3538 * nlmid of argument, then set the NLM state of the
3484 3539 * lock to "nlm_state."
3485 3540 */
3486 3541 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3487 3542 SET_NLM_STATE(lock, nlm_state);
3488 3543 }
3489 3544 }
3490 3545
3491 3546 /* Get list of active locks in current lock graph. */
3492 3547 for (lock = ACTIVE_HEAD(gp)->l_next;
3493 3548 lock != ACTIVE_HEAD(gp);
3494 3549 lock = nlock) {
3495 3550 nlock = lock->l_next;
3496 3551 /* get NLM id */
3497 3552 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3498 3553
3499 3554 /*
3500 3555 * If NLM server request AND nlmid of lock matches
3501 3556 * nlmid of argument, then set the NLM state of the
3502 3557 * lock to "nlm_state."
3503 3558 */
3504 3559 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3505 3560 ASSERT(IS_ACTIVE(lock));
3506 3561 SET_NLM_STATE(lock, nlm_state);
3507 3562 }
3508 3563 }
3509 3564 mutex_exit(&gp->gp_mutex);
3510 3565 }
3511 3566 }
3512 3567
3513 3568 /*
3514 3569 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid().
3515 3570 * Effects: Find all sleeping lock manager requests _only_ for the NLM server
3516 3571 * identified by "nlmid." Poke those lock requests.
3517 3572 */
3518 3573 static void
3519 3574 cl_flk_wakeup_sleeping_nlm_locks(int nlmid)
3520 3575 {
3521 3576 lock_descriptor_t *lock;
3522 3577 lock_descriptor_t *nlock = NULL; /* next lock */
3523 3578 int i;
3524 3579 graph_t *gp;
3525 3580 int lock_nlmid;
3526 3581
3527 3582 for (i = 0; i < HASH_SIZE; i++) {
3528 3583 mutex_enter(&flock_lock);
3529 3584 gp = lock_graph[i];
3530 3585 mutex_exit(&flock_lock);
3531 3586 if (gp == NULL) {
3532 3587 continue;
3533 3588 }
3534 3589
3535 3590 mutex_enter(&gp->gp_mutex);
3536 3591 for (lock = SLEEPING_HEAD(gp)->l_next;
3537 3592 lock != SLEEPING_HEAD(gp);
3538 3593 lock = nlock) {
3539 3594 nlock = lock->l_next;
3540 3595 /*
3541 3596 * If NLM server request _and_ nlmid of lock matches
3542 3597 * nlmid of argument, then set the NLM state of the
3543 3598 * lock to NLM_SHUTTING_DOWN, and wake up sleeping
3544 3599 * request.
3545 3600 */
3546 3601 if (IS_LOCKMGR(lock)) {
3547 3602 /* get NLM id */
3548 3603 lock_nlmid =
3549 3604 GETNLMID(lock->l_flock.l_sysid);
3550 3605 if (nlmid == lock_nlmid) {
3551 3606 SET_NLM_STATE(lock,
3552 3607 FLK_NLM_SHUTTING_DOWN);
3553 3608 INTERRUPT_WAKEUP(lock);
3554 3609 }
3555 3610 }
3556 3611 }
3557 3612 mutex_exit(&gp->gp_mutex);
3558 3613 }
3559 3614 }
3560 3615
3561 3616 /*
3562 3617 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid()
3563 3618 * Effects: Find all active (granted) lock manager locks _only_ for the
3564 3619 * NLM server identified by "nlmid" and release them.
3565 3620 */
3566 3621 static void
3567 3622 cl_flk_unlock_nlm_granted(int nlmid)
3568 3623 {
3569 3624 lock_descriptor_t *lock;
3570 3625 lock_descriptor_t *nlock = NULL; /* next lock */
3571 3626 int i;
3572 3627 graph_t *gp;
3573 3628 int lock_nlmid;
3574 3629
3575 3630 for (i = 0; i < HASH_SIZE; i++) {
3576 3631 mutex_enter(&flock_lock);
3577 3632 gp = lock_graph[i];
3578 3633 mutex_exit(&flock_lock);
3579 3634 if (gp == NULL) {
3580 3635 continue;
3581 3636 }
3582 3637
3583 3638 mutex_enter(&gp->gp_mutex);
3584 3639 for (lock = ACTIVE_HEAD(gp)->l_next;
3585 3640 lock != ACTIVE_HEAD(gp);
3586 3641 lock = nlock) {
3587 3642 nlock = lock->l_next;
3588 3643 ASSERT(IS_ACTIVE(lock));
3589 3644
3590 3645 /*
3591 3646 * If it's an NLM server request _and_ nlmid of
3592 3647 * the lock matches nlmid of argument, then
3593 3648 * remove the active lock the list, wakup blocked
3594 3649 * threads, and free the storage for the lock.
3595 3650 * Note that there's no need to mark the NLM state
3596 3651 * of this lock to NLM_DOWN because the lock will
3597 3652 * be deleted anyway and its storage freed.
3598 3653 */
3599 3654 if (IS_LOCKMGR(lock)) {
3600 3655 /* get NLM id */
3601 3656 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3602 3657 if (nlmid == lock_nlmid) {
3603 3658 flk_delete_active_lock(lock, 0);
3604 3659 flk_wakeup(lock, 1);
3605 3660 flk_free_lock(lock);
3606 3661 }
3607 3662 }
3608 3663 }
3609 3664 mutex_exit(&gp->gp_mutex);
3610 3665 }
3611 3666 }
3612 3667
3613 3668 /*
3614 3669 * Find all sleeping lock manager requests and poke them.
3615 3670 */
3616 3671 static void
3617 3672 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
3618 3673 {
3619 3674 lock_descriptor_t *lock;
3620 3675 lock_descriptor_t *nlock = NULL; /* next lock */
3621 3676 int i;
3622 3677 graph_t *gp;
3623 3678 zoneid_t zoneid = getzoneid();
3624 3679
3625 3680 for (i = 0; i < HASH_SIZE; i++) {
3626 3681 mutex_enter(&flock_lock);
3627 3682 gp = lock_graph[i];
3628 3683 mutex_exit(&flock_lock);
3629 3684 if (gp == NULL) {
3630 3685 continue;
3631 3686 }
3632 3687
3633 3688 mutex_enter(&gp->gp_mutex);
3634 3689 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
3635 3690 for (lock = SLEEPING_HEAD(gp)->l_next;
3636 3691 lock != SLEEPING_HEAD(gp);
3637 3692 lock = nlock) {
3638 3693 nlock = lock->l_next;
3639 3694 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3640 3695 INTERRUPT_WAKEUP(lock);
3641 3696 }
3642 3697 }
3643 3698 mutex_exit(&gp->gp_mutex);
3644 3699 }
3645 3700 }
3646 3701
3647 3702
3648 3703 /*
3649 3704 * Find all active (granted) lock manager locks and release them.
3650 3705 */
3651 3706 static void
3652 3707 unlock_lockmgr_granted(struct flock_globals *fg)
3653 3708 {
3654 3709 lock_descriptor_t *lock;
3655 3710 lock_descriptor_t *nlock = NULL; /* next lock */
3656 3711 int i;
3657 3712 graph_t *gp;
3658 3713 zoneid_t zoneid = getzoneid();
3659 3714
3660 3715 for (i = 0; i < HASH_SIZE; i++) {
3661 3716 mutex_enter(&flock_lock);
3662 3717 gp = lock_graph[i];
3663 3718 mutex_exit(&flock_lock);
3664 3719 if (gp == NULL) {
3665 3720 continue;
3666 3721 }
3667 3722
3668 3723 mutex_enter(&gp->gp_mutex);
3669 3724 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
3670 3725 for (lock = ACTIVE_HEAD(gp)->l_next;
3671 3726 lock != ACTIVE_HEAD(gp);
3672 3727 lock = nlock) {
3673 3728 nlock = lock->l_next;
3674 3729 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3675 3730 ASSERT(IS_ACTIVE(lock));
3676 3731 flk_delete_active_lock(lock, 0);
3677 3732 flk_wakeup(lock, 1);
3678 3733 flk_free_lock(lock);
3679 3734 }
3680 3735 }
3681 3736 mutex_exit(&gp->gp_mutex);
3682 3737 }
3683 3738 }
3684 3739
3685 3740
3686 3741 /*
3687 3742 * Wait until a lock is granted, cancelled, or interrupted.
3688 3743 */
3689 3744
3690 3745 static void
3691 3746 wait_for_lock(lock_descriptor_t *request)
3692 3747 {
3693 3748 graph_t *gp = request->l_graph;
3694 3749
3695 3750 ASSERT(MUTEX_HELD(&gp->gp_mutex));
3696 3751
3697 3752 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
3698 3753 !(IS_INTERRUPTED(request))) {
3699 3754 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
3700 3755 flk_set_state(request, FLK_INTERRUPTED_STATE);
3701 3756 request->l_state |= INTERRUPTED_LOCK;
3702 3757 }
3703 3758 }
3704 3759 }
3705 3760
3706 3761 /*
3707 3762 * Create an flock structure from the existing lock information
3708 3763 *
3709 3764 * This routine is used to create flock structures for the lock manager
3710 3765 * to use in a reclaim request. Since the lock was originated on this
3711 3766 * host, it must be conforming to UNIX semantics, so no checking is
3712 3767 * done to make sure it falls within the lower half of the 32-bit range.
3713 3768 */
3714 3769
3715 3770 static void
3716 3771 create_flock(lock_descriptor_t *lp, flock64_t *flp)
3717 3772 {
3718 3773 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
3719 3774 ASSERT(lp->l_end >= lp->l_start);
3720 3775
3721 3776 flp->l_type = lp->l_type;
3722 3777 flp->l_whence = 0;
3723 3778 flp->l_start = lp->l_start;
3724 3779 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
3725 3780 (lp->l_end - lp->l_start + 1);
3726 3781 flp->l_sysid = lp->l_flock.l_sysid;
3727 3782 flp->l_pid = lp->l_flock.l_pid;
3728 3783 }
3729 3784
3730 3785 /*
3731 3786 * Convert flock_t data describing a lock range into unsigned long starting
3732 3787 * and ending points, which are put into lock_request. Returns 0 or an
3733 3788 * errno value.
3734 3789 * Large Files: max is passed by the caller and we return EOVERFLOW
3735 3790 * as defined by LFS API.
3736 3791 */
3737 3792
3738 3793 int
3739 3794 flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
3740 3795 u_offset_t *start, u_offset_t *end, offset_t offset)
3741 3796 {
3742 3797 struct vattr vattr;
3743 3798 int error;
3744 3799
3745 3800 /*
3746 3801 * Determine the starting point of the request
3747 3802 */
3748 3803 switch (flp->l_whence) {
3749 3804 case 0: /* SEEK_SET */
3750 3805 *start = (u_offset_t)flp->l_start;
3751 3806 break;
3752 3807 case 1: /* SEEK_CUR */
3753 3808 *start = (u_offset_t)(flp->l_start + offset);
3754 3809 break;
3755 3810 case 2: /* SEEK_END */
3756 3811 vattr.va_mask = AT_SIZE;
3757 3812 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
3758 3813 return (error);
3759 3814 *start = (u_offset_t)(flp->l_start + vattr.va_size);
3760 3815 break;
3761 3816 default:
3762 3817 return (EINVAL);
3763 3818 }
3764 3819
3765 3820 /*
3766 3821 * Determine the range covered by the request.
3767 3822 */
3768 3823 if (flp->l_len == 0)
3769 3824 *end = MAX_U_OFFSET_T;
3770 3825 else if ((offset_t)flp->l_len > 0) {
3771 3826 *end = (u_offset_t)(*start + (flp->l_len - 1));
3772 3827 } else {
3773 3828 /*
3774 3829 * Negative length; why do we even allow this ?
3775 3830 * Because this allows easy specification of
3776 3831 * the last n bytes of the file.
3777 3832 */
3778 3833 *end = *start;
3779 3834 *start += (u_offset_t)flp->l_len;
3780 3835 (*start)++;
3781 3836 }
3782 3837 return (0);
3783 3838 }
3784 3839
3785 3840 /*
3786 3841 * Check the validity of lock data. This can used by the NFS
3787 3842 * frlock routines to check data before contacting the server. The
3788 3843 * server must support semantics that aren't as restrictive as
3789 3844 * the UNIX API, so the NFS client is required to check.
3790 3845 * The maximum is now passed in by the caller.
3791 3846 */
3792 3847
3793 3848 int
3794 3849 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
3795 3850 {
3796 3851 /*
3797 3852 * The end (length) for local locking should never be greater
3798 3853 * than MAXEND. However, the representation for
3799 3854 * the entire file is MAX_U_OFFSET_T.
3800 3855 */
3801 3856 if ((start > max) ||
3802 3857 ((end > max) && (end != MAX_U_OFFSET_T))) {
3803 3858 return (EINVAL);
3804 3859 }
3805 3860 if (start > end) {
3806 3861 return (EINVAL);
3807 3862 }
3808 3863 return (0);
3809 3864 }
3810 3865
3811 3866 /*
3812 3867 * Fill in request->l_flock with information about the lock blocking the
3813 3868 * request. The complexity here is that lock manager requests are allowed
3814 3869 * to see into the upper part of the 32-bit address range, whereas local
3815 3870 * requests are only allowed to see signed values.
3816 3871 *
3817 3872 * What should be done when "blocker" is a lock manager lock that uses the
3818 3873 * upper portion of the 32-bit range, but "request" is local? Since the
3819 3874 * request has already been determined to have been blocked by the blocker,
3820 3875 * at least some portion of "blocker" must be in the range of the request,
3821 3876 * or the request extends to the end of file. For the first case, the
3822 3877 * portion in the lower range is returned with the indication that it goes
3823 3878 * "to EOF." For the second case, the last byte of the lower range is
3824 3879 * returned with the indication that it goes "to EOF."
3825 3880 */
3826 3881
3827 3882 static void
3828 3883 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
3829 3884 {
3830 3885 flock64_t *flrp; /* l_flock portion of request */
3831 3886
3832 3887 ASSERT(blocker != NULL);
3833 3888
3834 3889 flrp = &request->l_flock;
3835 3890 flrp->l_whence = 0;
3836 3891 flrp->l_type = blocker->l_type;
3837 3892 flrp->l_pid = blocker->l_flock.l_pid;
3838 3893 flrp->l_sysid = blocker->l_flock.l_sysid;
3839 3894
3840 3895 if (IS_LOCKMGR(request)) {
3841 3896 flrp->l_start = blocker->l_start;
3842 3897 if (blocker->l_end == MAX_U_OFFSET_T)
3843 3898 flrp->l_len = 0;
3844 3899 else
3845 3900 flrp->l_len = blocker->l_end - blocker->l_start + 1;
3846 3901 } else {
3847 3902 if (blocker->l_start > MAXEND) {
3848 3903 flrp->l_start = MAXEND;
3849 3904 flrp->l_len = 0;
3850 3905 } else {
3851 3906 flrp->l_start = blocker->l_start;
3852 3907 if (blocker->l_end == MAX_U_OFFSET_T)
3853 3908 flrp->l_len = 0;
3854 3909 else
3855 3910 flrp->l_len = blocker->l_end -
3856 3911 blocker->l_start + 1;
3857 3912 }
3858 3913 }
3859 3914 }
3860 3915
3861 3916 /*
3862 3917 * PSARC case 1997/292
3863 3918 */
3864 3919 /*
3865 3920 * This is the public routine exported by flock.h.
3866 3921 */
3867 3922 void
3868 3923 cl_flk_change_nlm_state_to_unknown(int nlmid)
3869 3924 {
3870 3925 /*
3871 3926 * Check to see if node is booted as a cluster. If not, return.
3872 3927 */
3873 3928 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3874 3929 return;
3875 3930 }
3876 3931
3877 3932 /*
3878 3933 * See comment in cl_flk_set_nlm_status().
3879 3934 */
3880 3935 if (nlm_reg_status == NULL) {
3881 3936 return;
3882 3937 }
3883 3938
3884 3939 /*
3885 3940 * protect NLM registry state with a mutex.
3886 3941 */
3887 3942 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3888 3943 mutex_enter(&nlm_reg_lock);
3889 3944 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN);
3890 3945 mutex_exit(&nlm_reg_lock);
3891 3946 }
3892 3947
3893 3948 /*
3894 3949 * Return non-zero if the given I/O request conflicts with an active NBMAND
3895 3950 * lock.
3896 3951 * If svmand is non-zero, it means look at all active locks, not just NBMAND
3897 3952 * locks.
3898 3953 */
3899 3954
3900 3955 int
3901 3956 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset,
3902 3957 ssize_t length, int svmand, caller_context_t *ct)
3903 3958 {
3904 3959 int conflict = 0;
3905 3960 graph_t *gp;
3906 3961 lock_descriptor_t *lock;
3907 3962 pid_t pid;
3908 3963 int sysid;
3909 3964
3910 3965 if (ct == NULL) {
3911 3966 pid = curproc->p_pid;
3912 3967 sysid = 0;
3913 3968 } else {
3914 3969 pid = ct->cc_pid;
3915 3970 sysid = ct->cc_sysid;
3916 3971 }
3917 3972
3918 3973 mutex_enter(&flock_lock);
3919 3974 gp = lock_graph[HASH_INDEX(vp)];
3920 3975 mutex_exit(&flock_lock);
3921 3976 if (gp == NULL)
3922 3977 return (0);
3923 3978
3924 3979 mutex_enter(&gp->gp_mutex);
3925 3980 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
3926 3981
3927 3982 for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
3928 3983 if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
3929 3984 (lock->l_flock.l_sysid != sysid ||
3930 3985 lock->l_flock.l_pid != pid) &&
3931 3986 lock_blocks_io(op, offset, length,
3932 3987 lock->l_type, lock->l_start, lock->l_end)) {
3933 3988 conflict = 1;
3934 3989 break;
3935 3990 }
3936 3991 }
3937 3992 mutex_exit(&gp->gp_mutex);
3938 3993
3939 3994 return (conflict);
3940 3995 }
3941 3996
3942 3997 /*
3943 3998 * Return non-zero if the given I/O request conflicts with the given lock.
3944 3999 */
3945 4000
3946 4001 static int
3947 4002 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length,
3948 4003 int lock_type, u_offset_t lock_start, u_offset_t lock_end)
3949 4004 {
3950 4005 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
3951 4006 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
3952 4007
3953 4008 if (op == NBL_READ && lock_type == F_RDLCK)
3954 4009 return (0);
3955 4010
3956 4011 if (offset <= lock_start && lock_start < offset + length)
3957 4012 return (1);
3958 4013 if (lock_start <= offset && offset <= lock_end)
3959 4014 return (1);
3960 4015
3961 4016 return (0);
3962 4017 }
3963 4018
3964 4019 #ifdef DEBUG
3965 4020 static void
3966 4021 check_active_locks(graph_t *gp)
3967 4022 {
3968 4023 lock_descriptor_t *lock, *lock1;
3969 4024 edge_t *ep;
3970 4025
3971 4026 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
3972 4027 lock = lock->l_next) {
3973 4028 ASSERT(IS_ACTIVE(lock));
3974 4029 ASSERT(NOT_BLOCKED(lock));
3975 4030 ASSERT(!IS_BARRIER(lock));
3976 4031
3977 4032 ep = FIRST_IN(lock);
3978 4033
3979 4034 while (ep != HEAD(lock)) {
3980 4035 ASSERT(IS_SLEEPING(ep->from_vertex));
3981 4036 ASSERT(!NOT_BLOCKED(ep->from_vertex));
3982 4037 ep = NEXT_IN(ep);
3983 4038 }
3984 4039
3985 4040 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
3986 4041 lock1 = lock1->l_next) {
3987 4042 if (lock1->l_vnode == lock->l_vnode) {
3988 4043 if (BLOCKS(lock1, lock)) {
3989 4044 cmn_err(CE_PANIC,
3990 4045 "active lock %p blocks %p",
3991 4046 (void *)lock1, (void *)lock);
3992 4047 } else if (BLOCKS(lock, lock1)) {
3993 4048 cmn_err(CE_PANIC,
3994 4049 "active lock %p blocks %p",
3995 4050 (void *)lock, (void *)lock1);
3996 4051 }
3997 4052 }
3998 4053 }
3999 4054 }
4000 4055 }
4001 4056
4002 4057 /*
4003 4058 * Effect: This functions checks to see if the transition from 'old_state' to
4004 4059 * 'new_state' is a valid one. It returns 0 if the transition is valid
4005 4060 * and 1 if it is not.
4006 4061 * For a map of valid transitions, see sys/flock_impl.h
4007 4062 */
4008 4063 static int
4009 4064 check_lock_transition(int old_state, int new_state)
4010 4065 {
4011 4066 switch (old_state) {
4012 4067 case FLK_INITIAL_STATE:
4013 4068 if ((new_state == FLK_START_STATE) ||
4014 4069 (new_state == FLK_SLEEPING_STATE) ||
4015 4070 (new_state == FLK_ACTIVE_STATE) ||
4016 4071 (new_state == FLK_DEAD_STATE)) {
4017 4072 return (0);
4018 4073 } else {
4019 4074 return (1);
4020 4075 }
4021 4076 case FLK_START_STATE:
4022 4077 if ((new_state == FLK_ACTIVE_STATE) ||
4023 4078 (new_state == FLK_DEAD_STATE)) {
4024 4079 return (0);
4025 4080 } else {
4026 4081 return (1);
4027 4082 }
4028 4083 case FLK_ACTIVE_STATE:
4029 4084 if (new_state == FLK_DEAD_STATE) {
4030 4085 return (0);
4031 4086 } else {
4032 4087 return (1);
4033 4088 }
4034 4089 case FLK_SLEEPING_STATE:
4035 4090 if ((new_state == FLK_GRANTED_STATE) ||
4036 4091 (new_state == FLK_INTERRUPTED_STATE) ||
4037 4092 (new_state == FLK_CANCELLED_STATE)) {
4038 4093 return (0);
4039 4094 } else {
4040 4095 return (1);
4041 4096 }
4042 4097 case FLK_GRANTED_STATE:
4043 4098 if ((new_state == FLK_START_STATE) ||
4044 4099 (new_state == FLK_INTERRUPTED_STATE) ||
4045 4100 (new_state == FLK_CANCELLED_STATE)) {
4046 4101 return (0);
4047 4102 } else {
4048 4103 return (1);
4049 4104 }
4050 4105 case FLK_CANCELLED_STATE:
4051 4106 if ((new_state == FLK_INTERRUPTED_STATE) ||
4052 4107 (new_state == FLK_DEAD_STATE)) {
4053 4108 return (0);
4054 4109 } else {
4055 4110 return (1);
4056 4111 }
4057 4112 case FLK_INTERRUPTED_STATE:
4058 4113 if (new_state == FLK_DEAD_STATE) {
4059 4114 return (0);
4060 4115 } else {
4061 4116 return (1);
4062 4117 }
4063 4118 case FLK_DEAD_STATE:
4064 4119 /* May be set more than once */
4065 4120 if (new_state == FLK_DEAD_STATE) {
4066 4121 return (0);
4067 4122 } else {
4068 4123 return (1);
4069 4124 }
4070 4125 default:
4071 4126 return (1);
4072 4127 }
4073 4128 }
4074 4129
4075 4130 static void
4076 4131 check_sleeping_locks(graph_t *gp)
4077 4132 {
4078 4133 lock_descriptor_t *lock1, *lock2;
4079 4134 edge_t *ep;
4080 4135 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
4081 4136 lock1 = lock1->l_next) {
4082 4137 ASSERT(!IS_BARRIER(lock1));
4083 4138 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
4084 4139 lock2 = lock2->l_next) {
4085 4140 if (lock1->l_vnode == lock2->l_vnode) {
4086 4141 if (BLOCKS(lock2, lock1)) {
4087 4142 ASSERT(!IS_GRANTED(lock1));
4088 4143 ASSERT(!NOT_BLOCKED(lock1));
4089 4144 path(lock1, lock2);
4090 4145 }
4091 4146 }
4092 4147 }
4093 4148
4094 4149 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
4095 4150 lock2 = lock2->l_next) {
4096 4151 ASSERT(!IS_BARRIER(lock1));
4097 4152 if (lock1->l_vnode == lock2->l_vnode) {
4098 4153 if (BLOCKS(lock2, lock1)) {
4099 4154 ASSERT(!IS_GRANTED(lock1));
4100 4155 ASSERT(!NOT_BLOCKED(lock1));
4101 4156 path(lock1, lock2);
4102 4157 }
4103 4158 }
4104 4159 }
4105 4160 ep = FIRST_ADJ(lock1);
4106 4161 while (ep != HEAD(lock1)) {
4107 4162 ASSERT(BLOCKS(ep->to_vertex, lock1));
4108 4163 ep = NEXT_ADJ(ep);
4109 4164 }
4110 4165 }
4111 4166 }
4112 4167
4113 4168 static int
4114 4169 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
4115 4170 {
4116 4171 edge_t *ep;
4117 4172 lock_descriptor_t *vertex;
4118 4173 lock_descriptor_t *vertex_stack;
4119 4174
4120 4175 STACK_INIT(vertex_stack);
4121 4176
4122 4177 flk_graph_uncolor(lock1->l_graph);
4123 4178 ep = FIRST_ADJ(lock1);
4124 4179 ASSERT(ep != HEAD(lock1));
4125 4180 while (ep != HEAD(lock1)) {
4126 4181 if (no_path)
4127 4182 ASSERT(ep->to_vertex != lock2);
4128 4183 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4129 4184 COLOR(ep->to_vertex);
4130 4185 ep = NEXT_ADJ(ep);
4131 4186 }
4132 4187
4133 4188 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
4134 4189 STACK_POP(vertex_stack, l_dstack);
4135 4190 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
4136 4191 ep = NEXT_ADJ(ep)) {
4137 4192 if (COLORED(ep->to_vertex))
4138 4193 continue;
4139 4194 COLOR(ep->to_vertex);
4140 4195 if (ep->to_vertex == lock2)
4141 4196 return (1);
4142 4197
4143 4198 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4144 4199 }
4145 4200 }
4146 4201 return (0);
4147 4202 }
4148 4203
4149 4204 static void
4150 4205 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
4151 4206 {
4152 4207 lock_descriptor_t *lock;
4153 4208
4154 4209 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4155 4210
4156 4211 if (lock) {
4157 4212 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
4158 4213 if (lock->l_flock.l_pid == pid &&
4159 4214 lock->l_flock.l_sysid == sysid)
4160 4215 cmn_err(CE_PANIC,
4161 4216 "owner pid %d's lock %p in active queue",
4162 4217 pid, (void *)lock);
4163 4218 lock = lock->l_next;
4164 4219 }
4165 4220 }
4166 4221 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
4167 4222
4168 4223 if (lock) {
4169 4224 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
4170 4225 if (lock->l_flock.l_pid == pid &&
4171 4226 lock->l_flock.l_sysid == sysid)
4172 4227 cmn_err(CE_PANIC,
4173 4228 "owner pid %d's lock %p in sleep queue",
4174 4229 pid, (void *)lock);
4175 4230 lock = lock->l_next;
4176 4231 }
4177 4232 }
4178 4233 }
4179 4234
4180 4235 static int
4181 4236 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4182 4237 {
4183 4238 edge_t *ep = FIRST_ADJ(lock1);
4184 4239
4185 4240 while (ep != HEAD(lock1)) {
4186 4241 if (ep->to_vertex == lock2)
4187 4242 return (1);
4188 4243 else
4189 4244 ep = NEXT_ADJ(ep);
4190 4245 }
4191 4246 return (0);
4192 4247 }
4193 4248
4194 4249 static int
4195 4250 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4196 4251 {
4197 4252 return (!level_two_path(lock1, lock2, 1));
4198 4253 }
4199 4254
4200 4255 static void
4201 4256 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4202 4257 {
4203 4258 if (level_one_path(lock1, lock2)) {
4204 4259 if (level_two_path(lock1, lock2, 0) != 0) {
4205 4260 cmn_err(CE_WARN,
4206 4261 "one edge one path from lock1 %p lock2 %p",
4207 4262 (void *)lock1, (void *)lock2);
4208 4263 }
4209 4264 } else if (no_path(lock1, lock2)) {
4210 4265 cmn_err(CE_PANIC,
4211 4266 "No path from lock1 %p to lock2 %p",
4212 4267 (void *)lock1, (void *)lock2);
4213 4268 }
4214 4269 }
4215 4270 #endif /* DEBUG */
↓ open down ↓ |
1924 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX