Print this page
6253 F_GETLK doesn't always return lock owner
The F_GETLK fcntl doesn't return the offending lock if there is a read lock
on the file, a waiting write lock, and a read lock is requested.
The write lock blocks the locking request, but without this patch isn't
returned by GETLK.
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/flock.c
+++ new/usr/src/uts/common/os/flock.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 28 /* All Rights Reserved */
29 29
30 30 /*
31 31 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
32 32 */
33 33
34 34 #include <sys/flock_impl.h>
35 35 #include <sys/vfs.h>
36 36 #include <sys/t_lock.h> /* for <sys/callb.h> */
37 37 #include <sys/callb.h>
38 38 #include <sys/clconf.h>
39 39 #include <sys/cladm.h>
40 40 #include <sys/nbmlock.h>
41 41 #include <sys/cred.h>
42 42 #include <sys/policy.h>
43 43
44 44 /*
45 45 * The following four variables are for statistics purposes and they are
46 46 * not protected by locks. They may not be accurate but will at least be
47 47 * close to the actual value.
48 48 */
49 49
50 50 int flk_lock_allocs;
51 51 int flk_lock_frees;
52 52 int edge_allocs;
53 53 int edge_frees;
54 54 int flk_proc_vertex_allocs;
55 55 int flk_proc_edge_allocs;
56 56 int flk_proc_vertex_frees;
57 57 int flk_proc_edge_frees;
58 58
59 59 static kmutex_t flock_lock;
60 60
61 61 #ifdef DEBUG
62 62 int check_debug = 0;
63 63 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \
64 64 check_active_locks(gp);
65 65 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \
66 66 check_sleeping_locks(gp);
67 67 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \
68 68 if (check_debug) \
69 69 check_owner_locks(gp, pid, sysid, vp);
70 70 #define CHECK_LOCK_TRANSITION(old_state, new_state) \
71 71 { \
72 72 if (check_lock_transition(old_state, new_state)) { \
73 73 cmn_err(CE_PANIC, "Illegal lock transition \
74 74 from %d to %d", old_state, new_state); \
75 75 } \
76 76 }
77 77 #else
78 78
79 79 #define CHECK_ACTIVE_LOCKS(gp)
80 80 #define CHECK_SLEEPING_LOCKS(gp)
81 81 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
82 82 #define CHECK_LOCK_TRANSITION(old_state, new_state)
83 83
84 84 #endif /* DEBUG */
85 85
86 86 struct kmem_cache *flk_edge_cache;
87 87
88 88 graph_t *lock_graph[HASH_SIZE];
89 89 proc_graph_t pgraph;
90 90
91 91 /*
92 92 * Clustering.
93 93 *
94 94 * NLM REGISTRY TYPE IMPLEMENTATION
95 95 *
96 96 * Assumptions:
97 97 * 1. Nodes in a cluster are numbered starting at 1; always non-negative
98 98 * integers; maximum node id is returned by clconf_maximum_nodeid().
99 99 * 2. We use this node id to identify the node an NLM server runs on.
100 100 */
101 101
102 102 /*
103 103 * NLM registry object keeps track of NLM servers via their
104 104 * nlmids (which are the node ids of the node in the cluster they run on)
105 105 * that have requested locks at this LLM with which this registry is
106 106 * associated.
107 107 *
108 108 * Representation of abstraction:
109 109 * rep = record[ states: array[nlm_state],
110 110 * lock: mutex]
111 111 *
112 112 * Representation invariants:
113 113 * 1. index i of rep.states is between 0 and n - 1 where n is number
114 114 * of elements in the array, which happen to be the maximum number
115 115 * of nodes in the cluster configuration + 1.
116 116 * 2. map nlmid to index i of rep.states
117 117 * 0 -> 0
118 118 * 1 -> 1
119 119 * 2 -> 2
120 120 * n-1 -> clconf_maximum_nodeid()+1
121 121 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting
122 122 * from forgetting to subtract 1 from the index.
123 123 * 4. The reason we keep the 0th index is the following. A legitimate
124 124 * cluster configuration includes making a UFS file system NFS
125 125 * exportable. The code is structured so that if you're in a cluster
126 126 * you do one thing; otherwise, you do something else. The problem
127 127 * is what to do if you think you're in a cluster with PXFS loaded,
128 128 * but you're using UFS not PXFS? The upper two bytes of the sysid
129 129 * encode the node id of the node where NLM server runs; these bytes
130 130 * are zero for UFS. Since the nodeid is used to index into the
131 131 * registry, we can record the NLM server state information at index
132 132 * 0 using the same mechanism used for PXFS file locks!
133 133 */
134 134 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */
135 135 static kmutex_t nlm_reg_lock; /* lock to protect arrary */
136 136 static uint_t nlm_status_size; /* size of state array */
137 137
138 138 /*
139 139 * Although we need a global lock dependency graph (and associated data
140 140 * structures), we also need a per-zone notion of whether the lock manager is
141 141 * running, and so whether to allow lock manager requests or not.
142 142 *
143 143 * Thus, on a per-zone basis we maintain a ``global'' variable
144 144 * (flk_lockmgr_status), protected by flock_lock, and set when the lock
145 145 * manager is determined to be changing state (starting or stopping).
146 146 *
147 147 * Each graph/zone pair also has a copy of this variable, which is protected by
148 148 * the graph's mutex.
149 149 *
150 150 * The per-graph copies are used to synchronize lock requests with shutdown
151 151 * requests. The global copy is used to initialize the per-graph field when a
152 152 * new graph is created.
153 153 */
154 154 struct flock_globals {
155 155 flk_lockmgr_status_t flk_lockmgr_status;
156 156 flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
157 157 };
158 158
159 159 zone_key_t flock_zone_key;
160 160
161 161 static void create_flock(lock_descriptor_t *, flock64_t *);
162 162 static lock_descriptor_t *flk_get_lock(void);
163 163 static void flk_free_lock(lock_descriptor_t *lock);
164 164 static void flk_get_first_blocking_lock(lock_descriptor_t *request);
165 165 static int flk_process_request(lock_descriptor_t *);
166 166 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
167 167 static edge_t *flk_get_edge(void);
168 168 static int flk_wait_execute_request(lock_descriptor_t *);
169 169 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
170 170 static void flk_insert_active_lock(lock_descriptor_t *);
171 171 static void flk_delete_active_lock(lock_descriptor_t *, int);
172 172 static void flk_insert_sleeping_lock(lock_descriptor_t *);
173 173 static void flk_graph_uncolor(graph_t *);
174 174 static void flk_wakeup(lock_descriptor_t *, int);
175 175 static void flk_free_edge(edge_t *);
176 176 static void flk_recompute_dependencies(lock_descriptor_t *,
177 177 lock_descriptor_t **, int, int);
178 178 static int flk_find_barriers(lock_descriptor_t *);
179 179 static void flk_update_barriers(lock_descriptor_t *);
180 180 static int flk_color_reachables(lock_descriptor_t *);
181 181 static int flk_canceled(lock_descriptor_t *);
182 182 static void flk_delete_locks_by_sysid(lock_descriptor_t *);
183 183 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
184 184 static void wait_for_lock(lock_descriptor_t *);
185 185 static void unlock_lockmgr_granted(struct flock_globals *);
186 186 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
187 187
188 188 /* Clustering hooks */
189 189 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t);
190 190 static void cl_flk_wakeup_sleeping_nlm_locks(int);
191 191 static void cl_flk_unlock_nlm_granted(int);
192 192
193 193 #ifdef DEBUG
194 194 static int check_lock_transition(int, int);
195 195 static void check_sleeping_locks(graph_t *);
196 196 static void check_active_locks(graph_t *);
197 197 static int no_path(lock_descriptor_t *, lock_descriptor_t *);
198 198 static void path(lock_descriptor_t *, lock_descriptor_t *);
199 199 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
200 200 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
201 201 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
202 202 #endif
203 203
204 204 /* proc_graph function definitions */
205 205 static int flk_check_deadlock(lock_descriptor_t *);
206 206 static void flk_proc_graph_uncolor(void);
207 207 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
208 208 static proc_edge_t *flk_get_proc_edge(void);
209 209 static void flk_proc_release(proc_vertex_t *);
210 210 static void flk_free_proc_edge(proc_edge_t *);
211 211 static void flk_update_proc_graph(edge_t *, int);
212 212
213 213 /* Non-blocking mandatory locking */
214 214 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t,
215 215 u_offset_t);
216 216
217 217 static struct flock_globals *
218 218 flk_get_globals(void)
219 219 {
220 220 /*
221 221 * The KLM module had better be loaded if we're attempting to handle
222 222 * lockmgr requests.
223 223 */
224 224 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
225 225 return (zone_getspecific(flock_zone_key, curproc->p_zone));
226 226 }
227 227
228 228 static flk_lockmgr_status_t
229 229 flk_get_lockmgr_status(void)
230 230 {
231 231 struct flock_globals *fg;
232 232
233 233 ASSERT(MUTEX_HELD(&flock_lock));
234 234
235 235 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
236 236 /*
237 237 * KLM module not loaded; lock manager definitely not running.
238 238 */
239 239 return (FLK_LOCKMGR_DOWN);
240 240 }
241 241 fg = flk_get_globals();
242 242 return (fg->flk_lockmgr_status);
243 243 }
244 244
245 245 /*
246 246 * Routine called from fs_frlock in fs/fs_subr.c
247 247 */
248 248
249 249 int
250 250 reclock(vnode_t *vp,
251 251 flock64_t *lckdat,
252 252 int cmd,
253 253 int flag,
254 254 u_offset_t offset,
255 255 flk_callback_t *flk_cbp)
256 256 {
257 257 lock_descriptor_t stack_lock_request;
258 258 lock_descriptor_t *lock_request;
259 259 int error = 0;
260 260 graph_t *gp;
261 261 int nlmid;
262 262
263 263 /*
264 264 * Check access permissions
265 265 */
266 266 if ((cmd & SETFLCK) &&
267 267 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
268 268 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
269 269 return (EBADF);
270 270
271 271 /*
272 272 * for query and unlock we use the stack_lock_request
273 273 */
274 274
275 275 if ((lckdat->l_type == F_UNLCK) ||
276 276 !((cmd & INOFLCK) || (cmd & SETFLCK))) {
277 277 lock_request = &stack_lock_request;
278 278 (void) bzero((caddr_t)lock_request,
279 279 sizeof (lock_descriptor_t));
280 280
281 281 /*
282 282 * following is added to make the assertions in
283 283 * flk_execute_request() to pass through
284 284 */
285 285
286 286 lock_request->l_edge.edge_in_next = &lock_request->l_edge;
287 287 lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
288 288 lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
289 289 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
290 290 lock_request->l_status = FLK_INITIAL_STATE;
291 291 } else {
292 292 lock_request = flk_get_lock();
293 293 }
294 294 lock_request->l_state = 0;
295 295 lock_request->l_vnode = vp;
296 296 lock_request->l_zoneid = getzoneid();
297 297
298 298 /*
299 299 * Convert the request range into the canonical start and end
300 300 * values. The NLM protocol supports locking over the entire
301 301 * 32-bit range, so there's no range checking for remote requests,
302 302 * but we still need to verify that local requests obey the rules.
303 303 */
304 304 /* Clustering */
305 305 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) {
306 306 ASSERT(lckdat->l_whence == 0);
307 307 lock_request->l_start = lckdat->l_start;
308 308 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
309 309 lckdat->l_start + (lckdat->l_len - 1);
310 310 } else {
311 311 /* check the validity of the lock range */
312 312 error = flk_convert_lock_data(vp, lckdat,
313 313 &lock_request->l_start, &lock_request->l_end,
314 314 offset);
315 315 if (error) {
316 316 goto done;
317 317 }
318 318 error = flk_check_lock_data(lock_request->l_start,
319 319 lock_request->l_end, MAXEND);
320 320 if (error) {
321 321 goto done;
322 322 }
323 323 }
324 324
325 325 ASSERT(lock_request->l_end >= lock_request->l_start);
326 326
327 327 lock_request->l_type = lckdat->l_type;
328 328 if (cmd & INOFLCK)
329 329 lock_request->l_state |= IO_LOCK;
330 330 if (cmd & SLPFLCK)
331 331 lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
332 332 if (cmd & RCMDLCK)
333 333 lock_request->l_state |= LOCKMGR_LOCK;
334 334 if (cmd & NBMLCK)
335 335 lock_request->l_state |= NBMAND_LOCK;
336 336 /*
337 337 * Clustering: set flag for PXFS locks
338 338 * We do not _only_ check for the PCMDLCK flag because PXFS locks could
339 339 * also be of type 'RCMDLCK'.
340 340 * We do not _only_ check the GETPXFSID() macro because local PXFS
341 341 * clients use a pxfsid of zero to permit deadlock detection in the LLM.
342 342 */
343 343
344 344 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) {
345 345 lock_request->l_state |= PXFS_LOCK;
346 346 }
347 347 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
348 348 if (lock_request->l_type == F_RDLCK ||
349 349 lock_request->l_type == F_WRLCK)
350 350 lock_request->l_state |= QUERY_LOCK;
351 351 }
352 352 lock_request->l_flock = (*lckdat);
353 353 lock_request->l_callbacks = flk_cbp;
354 354
355 355 /*
356 356 * We are ready for processing the request
357 357 */
358 358 if (IS_LOCKMGR(lock_request)) {
359 359 /*
360 360 * If the lock request is an NLM server request ....
361 361 */
362 362 if (nlm_status_size == 0) { /* not booted as cluster */
363 363 mutex_enter(&flock_lock);
364 364 /*
365 365 * Bail out if this is a lock manager request and the
366 366 * lock manager is not supposed to be running.
367 367 */
368 368 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
369 369 mutex_exit(&flock_lock);
370 370 error = ENOLCK;
371 371 goto done;
372 372 }
373 373 mutex_exit(&flock_lock);
374 374 } else { /* booted as a cluster */
375 375 nlmid = GETNLMID(lock_request->l_flock.l_sysid);
376 376 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
377 377
378 378 mutex_enter(&nlm_reg_lock);
379 379 /*
380 380 * If the NLM registry does not know about this
381 381 * NLM server making the request, add its nlmid
382 382 * to the registry.
383 383 */
384 384 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
385 385 nlmid)) {
386 386 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
387 387 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
388 388 nlmid)) {
389 389 /*
390 390 * If the NLM server is already known (has made
391 391 * previous lock requests) and its state is
392 392 * not NLM_UP (means that NLM server is
393 393 * shutting down), then bail out with an
394 394 * error to deny the lock request.
395 395 */
396 396 mutex_exit(&nlm_reg_lock);
397 397 error = ENOLCK;
398 398 goto done;
399 399 }
400 400 mutex_exit(&nlm_reg_lock);
401 401 }
402 402 }
403 403
404 404 /* Now get the lock graph for a particular vnode */
405 405 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
406 406
407 407 /*
408 408 * We drop rwlock here otherwise this might end up causing a
409 409 * deadlock if this IOLOCK sleeps. (bugid # 1183392).
410 410 */
411 411
412 412 if (IS_IO_LOCK(lock_request)) {
413 413 VOP_RWUNLOCK(vp,
414 414 (lock_request->l_type == F_RDLCK) ?
415 415 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
416 416 }
417 417 mutex_enter(&gp->gp_mutex);
418 418
419 419 lock_request->l_state |= REFERENCED_LOCK;
420 420 lock_request->l_graph = gp;
421 421
422 422 switch (lock_request->l_type) {
423 423 case F_RDLCK:
424 424 case F_WRLCK:
425 425 if (IS_QUERY_LOCK(lock_request)) {
426 426 flk_get_first_blocking_lock(lock_request);
427 427 (*lckdat) = lock_request->l_flock;
428 428 break;
429 429 }
430 430
431 431 /* process the request now */
432 432
433 433 error = flk_process_request(lock_request);
434 434 break;
435 435
436 436 case F_UNLCK:
437 437 /* unlock request will not block so execute it immediately */
438 438
439 439 if (IS_LOCKMGR(lock_request) &&
440 440 flk_canceled(lock_request)) {
441 441 error = 0;
442 442 } else {
443 443 error = flk_execute_request(lock_request);
444 444 }
445 445 break;
446 446
447 447 case F_UNLKSYS:
448 448 /*
449 449 * Recovery mechanism to release lock manager locks when
450 450 * NFS client crashes and restart. NFS server will clear
451 451 * old locks and grant new locks.
452 452 */
453 453
454 454 if (lock_request->l_flock.l_sysid == 0) {
455 455 mutex_exit(&gp->gp_mutex);
456 456 return (EINVAL);
457 457 }
458 458 if (secpolicy_nfs(CRED()) != 0) {
459 459 mutex_exit(&gp->gp_mutex);
460 460 return (EPERM);
461 461 }
462 462 flk_delete_locks_by_sysid(lock_request);
463 463 lock_request->l_state &= ~REFERENCED_LOCK;
464 464 flk_set_state(lock_request, FLK_DEAD_STATE);
465 465 flk_free_lock(lock_request);
466 466 mutex_exit(&gp->gp_mutex);
467 467 return (0);
468 468
469 469 default:
470 470 error = EINVAL;
471 471 break;
472 472 }
473 473
474 474 /* Clustering: For blocked PXFS locks, return */
475 475 if (error == PXFS_LOCK_BLOCKED) {
476 476 lock_request->l_state &= ~REFERENCED_LOCK;
477 477 mutex_exit(&gp->gp_mutex);
478 478 return (error);
479 479 }
480 480
481 481 /*
482 482 * Now that we have seen the status of locks in the system for
483 483 * this vnode we acquire the rwlock if it is an IO_LOCK.
484 484 */
485 485
486 486 if (IS_IO_LOCK(lock_request)) {
487 487 (void) VOP_RWLOCK(vp,
488 488 (lock_request->l_type == F_RDLCK) ?
489 489 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
490 490 if (!error) {
491 491 lckdat->l_type = F_UNLCK;
492 492
493 493 /*
494 494 * This wake up is needed otherwise
495 495 * if IO_LOCK has slept the dependents on this
496 496 * will not be woken up at all. (bugid # 1185482).
497 497 */
498 498
499 499 flk_wakeup(lock_request, 1);
500 500 flk_set_state(lock_request, FLK_DEAD_STATE);
501 501 flk_free_lock(lock_request);
502 502 }
503 503 /*
504 504 * else if error had occurred either flk_process_request()
505 505 * has returned EDEADLK in which case there will be no
506 506 * dependents for this lock or EINTR from flk_wait_execute_
507 507 * request() in which case flk_cancel_sleeping_lock()
508 508 * would have been done. same is true with EBADF.
509 509 */
510 510 }
511 511
512 512 if (lock_request == &stack_lock_request) {
513 513 flk_set_state(lock_request, FLK_DEAD_STATE);
514 514 } else {
515 515 lock_request->l_state &= ~REFERENCED_LOCK;
516 516 if ((error != 0) || IS_DELETED(lock_request)) {
517 517 flk_set_state(lock_request, FLK_DEAD_STATE);
518 518 flk_free_lock(lock_request);
519 519 }
520 520 }
521 521
522 522 mutex_exit(&gp->gp_mutex);
523 523 return (error);
524 524
525 525 done:
526 526 flk_set_state(lock_request, FLK_DEAD_STATE);
527 527 if (lock_request != &stack_lock_request)
528 528 flk_free_lock(lock_request);
529 529 return (error);
530 530 }
531 531
532 532 /*
533 533 * Invoke the callbacks in the given list. If before sleeping, invoke in
534 534 * list order. If after sleeping, invoke in reverse order.
535 535 *
536 536 * CPR (suspend/resume) support: if one of the callbacks returns a
537 537 * callb_cpr_t, return it. This will be used to make the thread CPR-safe
538 538 * while it is sleeping. There should be at most one callb_cpr_t for the
539 539 * thread.
540 540 * XXX This is unnecessarily complicated. The CPR information should just
541 541 * get passed in directly through VOP_FRLOCK and reclock, rather than
542 542 * sneaking it in via a callback.
543 543 */
544 544
545 545 callb_cpr_t *
546 546 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
547 547 {
548 548 callb_cpr_t *cpr_callbackp = NULL;
549 549 callb_cpr_t *one_result;
550 550 flk_callback_t *cb;
551 551
552 552 if (cblist == NULL)
553 553 return (NULL);
554 554
555 555 if (when == FLK_BEFORE_SLEEP) {
556 556 cb = cblist;
557 557 do {
558 558 one_result = (*cb->cb_callback)(when, cb->cb_data);
559 559 if (one_result != NULL) {
560 560 ASSERT(cpr_callbackp == NULL);
561 561 cpr_callbackp = one_result;
562 562 }
563 563 cb = cb->cb_next;
564 564 } while (cb != cblist);
565 565 } else {
566 566 cb = cblist->cb_prev;
567 567 do {
568 568 one_result = (*cb->cb_callback)(when, cb->cb_data);
569 569 if (one_result != NULL) {
570 570 cpr_callbackp = one_result;
571 571 }
572 572 cb = cb->cb_prev;
573 573 } while (cb != cblist->cb_prev);
574 574 }
575 575
576 576 return (cpr_callbackp);
577 577 }
578 578
579 579 /*
580 580 * Initialize a flk_callback_t to hold the given callback.
581 581 */
582 582
583 583 void
584 584 flk_init_callback(flk_callback_t *flk_cb,
585 585 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
586 586 {
587 587 flk_cb->cb_next = flk_cb;
588 588 flk_cb->cb_prev = flk_cb;
589 589 flk_cb->cb_callback = cb_fcn;
590 590 flk_cb->cb_data = cbdata;
591 591 }
592 592
593 593 /*
594 594 * Initialize an flk_callback_t and then link it into the head of an
595 595 * existing list (which may be NULL).
596 596 */
597 597
598 598 void
599 599 flk_add_callback(flk_callback_t *newcb,
600 600 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
601 601 void *cbdata, flk_callback_t *cblist)
602 602 {
603 603 flk_init_callback(newcb, cb_fcn, cbdata);
604 604
605 605 if (cblist == NULL)
606 606 return;
607 607
608 608 newcb->cb_prev = cblist->cb_prev;
609 609 newcb->cb_next = cblist;
610 610 cblist->cb_prev->cb_next = newcb;
611 611 cblist->cb_prev = newcb;
612 612 }
613 613
614 614 /*
615 615 * Initialize the flk_edge_cache data structure and create the
616 616 * nlm_reg_status array.
617 617 */
618 618
619 619 void
620 620 flk_init(void)
621 621 {
622 622 uint_t i;
623 623
624 624 flk_edge_cache = kmem_cache_create("flk_edges",
625 625 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
626 626 if (flk_edge_cache == NULL) {
627 627 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
628 628 }
629 629 /*
630 630 * Create the NLM registry object.
631 631 */
632 632
633 633 if (cluster_bootflags & CLUSTER_BOOTED) {
634 634 /*
635 635 * This routine tells you the maximum node id that will be used
636 636 * in the cluster. This number will be the size of the nlm
637 637 * registry status array. We add 1 because we will be using
638 638 * all entries indexed from 0 to maxnodeid; e.g., from 0
639 639 * to 64, for a total of 65 entries.
640 640 */
641 641 nlm_status_size = clconf_maximum_nodeid() + 1;
642 642 } else {
643 643 nlm_status_size = 0;
644 644 }
645 645
646 646 if (nlm_status_size != 0) { /* booted as a cluster */
647 647 nlm_reg_status = (flk_nlm_status_t *)
648 648 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size,
649 649 KM_SLEEP);
650 650
651 651 /* initialize all NLM states in array to NLM_UNKNOWN */
652 652 for (i = 0; i < nlm_status_size; i++) {
653 653 nlm_reg_status[i] = FLK_NLM_UNKNOWN;
654 654 }
655 655 }
656 656 }
657 657
658 658 /*
659 659 * Zone constructor/destructor callbacks to be executed when a zone is
660 660 * created/destroyed.
661 661 */
662 662 /* ARGSUSED */
663 663 void *
664 664 flk_zone_init(zoneid_t zoneid)
665 665 {
666 666 struct flock_globals *fg;
667 667 uint_t i;
668 668
669 669 fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
670 670 fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
671 671 for (i = 0; i < HASH_SIZE; i++)
672 672 fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
673 673 return (fg);
674 674 }
675 675
676 676 /* ARGSUSED */
677 677 void
678 678 flk_zone_fini(zoneid_t zoneid, void *data)
679 679 {
680 680 struct flock_globals *fg = data;
681 681
682 682 kmem_free(fg, sizeof (*fg));
683 683 }
684 684
685 685 /*
686 686 * Get a lock_descriptor structure with initialization of edge lists.
687 687 */
688 688
689 689 static lock_descriptor_t *
690 690 flk_get_lock(void)
691 691 {
692 692 lock_descriptor_t *l;
693 693
694 694 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
695 695
696 696 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
697 697 l->l_edge.edge_in_next = &l->l_edge;
698 698 l->l_edge.edge_in_prev = &l->l_edge;
699 699 l->l_edge.edge_adj_next = &l->l_edge;
700 700 l->l_edge.edge_adj_prev = &l->l_edge;
701 701 l->pvertex = -1;
702 702 l->l_status = FLK_INITIAL_STATE;
703 703 flk_lock_allocs++;
704 704 return (l);
705 705 }
706 706
707 707 /*
708 708 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag
709 709 * when some thread has a reference to it as in reclock().
710 710 */
711 711
712 712 void
713 713 flk_free_lock(lock_descriptor_t *lock)
714 714 {
715 715 ASSERT(IS_DEAD(lock));
716 716 if (IS_REFERENCED(lock)) {
717 717 lock->l_state |= DELETED_LOCK;
718 718 return;
719 719 }
720 720 flk_lock_frees++;
721 721 kmem_free((void *)lock, sizeof (lock_descriptor_t));
722 722 }
723 723
724 724 void
725 725 flk_set_state(lock_descriptor_t *lock, int new_state)
726 726 {
727 727 /*
728 728 * Locks in the sleeping list may be woken up in a number of ways,
729 729 * and more than once. If a sleeping lock is signaled awake more
730 730 * than once, then it may or may not change state depending on its
731 731 * current state.
732 732 * Also note that NLM locks that are sleeping could be moved to an
733 733 * interrupted state more than once if the unlock request is
734 734 * retransmitted by the NLM client - the second time around, this is
735 735 * just a nop.
736 736 * The ordering of being signaled awake is:
737 737 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE.
738 738 * The checks below implement this ordering.
739 739 */
740 740 if (IS_INTERRUPTED(lock)) {
741 741 if ((new_state == FLK_CANCELLED_STATE) ||
742 742 (new_state == FLK_GRANTED_STATE) ||
743 743 (new_state == FLK_INTERRUPTED_STATE)) {
744 744 return;
745 745 }
746 746 }
747 747 if (IS_CANCELLED(lock)) {
748 748 if ((new_state == FLK_GRANTED_STATE) ||
749 749 (new_state == FLK_CANCELLED_STATE)) {
750 750 return;
751 751 }
752 752 }
753 753 CHECK_LOCK_TRANSITION(lock->l_status, new_state);
754 754 if (IS_PXFS(lock)) {
755 755 cl_flk_state_transition_notify(lock, lock->l_status, new_state);
756 756 }
757 757 lock->l_status = new_state;
758 758 }
759 759
760 760 /*
761 761 * Routine that checks whether there are any blocking locks in the system.
762 762 *
763 763 * The policy followed is if a write lock is sleeping we don't allow read
764 764 * locks before this write lock even though there may not be any active
765 765 * locks corresponding to the read locks' region.
766 766 *
767 767 * flk_add_edge() function adds an edge between l1 and l2 iff there
768 768 * is no path between l1 and l2. This is done to have a "minimum
769 769 * storage representation" of the dependency graph.
770 770 *
771 771 * Another property of the graph is since only the new request throws
772 772 * edges to the existing locks in the graph, the graph is always topologically
773 773 * ordered.
774 774 */
775 775
776 776 static int
777 777 flk_process_request(lock_descriptor_t *request)
778 778 {
779 779 graph_t *gp = request->l_graph;
780 780 lock_descriptor_t *lock;
781 781 int request_blocked_by_active = 0;
782 782 int request_blocked_by_granted = 0;
783 783 int request_blocked_by_sleeping = 0;
784 784 vnode_t *vp = request->l_vnode;
785 785 int error = 0;
786 786 int request_will_wait = 0;
787 787 int found_covering_lock = 0;
788 788 lock_descriptor_t *covered_by = NULL;
789 789
790 790 ASSERT(MUTEX_HELD(&gp->gp_mutex));
791 791 request_will_wait = IS_WILLING_TO_SLEEP(request);
792 792
793 793 /*
794 794 * check active locks
795 795 */
796 796
797 797 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
798 798
799 799
800 800 if (lock) {
801 801 do {
802 802 if (BLOCKS(lock, request)) {
803 803 if (!request_will_wait)
804 804 return (EAGAIN);
805 805 request_blocked_by_active = 1;
806 806 break;
807 807 }
808 808 /*
809 809 * Grant lock if it is for the same owner holding active
810 810 * lock that covers the request.
811 811 */
812 812
813 813 if (SAME_OWNER(lock, request) &&
814 814 COVERS(lock, request) &&
815 815 (request->l_type == F_RDLCK))
816 816 return (flk_execute_request(request));
817 817 lock = lock->l_next;
818 818 } while (lock->l_vnode == vp);
819 819 }
820 820
821 821 if (!request_blocked_by_active) {
822 822 lock_descriptor_t *lk[1];
823 823 lock_descriptor_t *first_glock = NULL;
824 824 /*
825 825 * Shall we grant this?! NO!!
826 826 * What about those locks that were just granted and still
827 827 * in sleep queue. Those threads are woken up and so locks
828 828 * are almost active.
829 829 */
830 830 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
831 831 if (lock) {
832 832 do {
833 833 if (BLOCKS(lock, request)) {
834 834 if (IS_GRANTED(lock)) {
835 835 request_blocked_by_granted = 1;
836 836 } else {
837 837 request_blocked_by_sleeping = 1;
838 838 }
839 839 }
840 840
841 841 lock = lock->l_next;
842 842 } while ((lock->l_vnode == vp));
843 843 first_glock = lock->l_prev;
844 844 ASSERT(first_glock->l_vnode == vp);
845 845 }
846 846
847 847 if (request_blocked_by_granted)
848 848 goto block;
849 849
850 850 if (!request_blocked_by_sleeping) {
851 851 /*
852 852 * If the request isn't going to be blocked by a
853 853 * sleeping request, we know that it isn't going to
854 854 * be blocked; we can just execute the request --
855 855 * without performing costly deadlock detection.
856 856 */
857 857 ASSERT(!request_blocked_by_active);
858 858 return (flk_execute_request(request));
859 859 } else if (request->l_type == F_RDLCK) {
860 860 /*
861 861 * If we have a sleeping writer in the requested
862 862 * lock's range, block.
863 863 */
864 864 goto block;
865 865 }
866 866
867 867 lk[0] = request;
868 868 request->l_state |= RECOMPUTE_LOCK;
869 869 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
870 870 if (lock) {
871 871 do {
872 872 flk_recompute_dependencies(lock, lk, 1, 0);
873 873 lock = lock->l_next;
874 874 } while (lock->l_vnode == vp);
875 875 }
876 876 lock = first_glock;
877 877 if (lock) {
878 878 do {
879 879 if (IS_GRANTED(lock)) {
880 880 flk_recompute_dependencies(lock, lk, 1, 0);
881 881 }
882 882 lock = lock->l_prev;
883 883 } while ((lock->l_vnode == vp));
884 884 }
885 885 request->l_state &= ~RECOMPUTE_LOCK;
886 886 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
887 887 return (EDEADLK);
888 888 return (flk_execute_request(request));
889 889 }
890 890
891 891 block:
892 892 if (request_will_wait)
893 893 flk_graph_uncolor(gp);
894 894
895 895 /* check sleeping locks */
896 896
897 897 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
898 898
899 899 /*
900 900 * If we find a sleeping write lock that is a superset of the
901 901 * region wanted by request we can be assured that by adding an
902 902 * edge to this write lock we have paths to all locks in the
903 903 * graph that blocks the request except in one case and that is why
904 904 * another check for SAME_OWNER in the loop below. The exception
905 905 * case is when this process that owns the sleeping write lock 'l1'
906 906 * has other locks l2, l3, l4 that are in the system and arrived
907 907 * before l1. l1 does not have path to these locks as they are from
908 908 * same process. We break when we find a second covering sleeping
909 909 * lock l5 owned by a process different from that owning l1, because
910 910 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if
911 911 * it has l1 would have produced a deadlock already.
912 912 */
913 913
914 914 if (lock) {
915 915 do {
916 916 if (BLOCKS(lock, request)) {
917 917 if (!request_will_wait)
918 918 return (EAGAIN);
919 919 if (COVERS(lock, request) &&
920 920 lock->l_type == F_WRLCK) {
921 921 if (found_covering_lock &&
922 922 !SAME_OWNER(lock, covered_by)) {
923 923 found_covering_lock++;
924 924 break;
925 925 }
926 926 found_covering_lock = 1;
927 927 covered_by = lock;
928 928 }
929 929 if (found_covering_lock &&
930 930 !SAME_OWNER(lock, covered_by)) {
931 931 lock = lock->l_next;
932 932 continue;
933 933 }
934 934 if ((error = flk_add_edge(request, lock,
935 935 !found_covering_lock, 0)))
936 936 return (error);
937 937 }
938 938 lock = lock->l_next;
939 939 } while (lock->l_vnode == vp);
940 940 }
941 941
942 942 /*
943 943 * found_covering_lock == 2 iff at this point 'request' has paths
944 944 * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
945 945 * point 'request' has paths to all locks that blocks 'request' whose owners
946 946 * are not same as the one that covers 'request' (covered_by above) and
947 947 * we can have locks whose owner is same as covered_by in the active list.
948 948 */
949 949
950 950 if (request_blocked_by_active && found_covering_lock != 2) {
951 951 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
952 952 ASSERT(lock != NULL);
953 953 do {
954 954 if (BLOCKS(lock, request)) {
955 955 if (found_covering_lock &&
956 956 !SAME_OWNER(lock, covered_by)) {
957 957 lock = lock->l_next;
958 958 continue;
959 959 }
960 960 if ((error = flk_add_edge(request, lock,
961 961 CHECK_CYCLE, 0)))
962 962 return (error);
963 963 }
964 964 lock = lock->l_next;
965 965 } while (lock->l_vnode == vp);
966 966 }
967 967
968 968 if (NOT_BLOCKED(request)) {
969 969 /*
970 970 * request not dependent on any other locks
971 971 * so execute this request
972 972 */
973 973 return (flk_execute_request(request));
974 974 } else {
975 975 /*
976 976 * check for deadlock
977 977 */
978 978 if (flk_check_deadlock(request))
979 979 return (EDEADLK);
980 980 /*
981 981 * this thread has to sleep
982 982 */
983 983 return (flk_wait_execute_request(request));
984 984 }
985 985 }
986 986
987 987 /*
988 988 * The actual execution of the request in the simple case is only to
989 989 * insert the 'request' in the list of active locks if it is not an
990 990 * UNLOCK.
991 991 * We have to consider the existing active locks' relation to
992 992 * this 'request' if they are owned by same process. flk_relation() does
993 993 * this job and sees to that the dependency graph information is maintained
994 994 * properly.
995 995 */
996 996
997 997 int
998 998 flk_execute_request(lock_descriptor_t *request)
999 999 {
1000 1000 graph_t *gp = request->l_graph;
1001 1001 vnode_t *vp = request->l_vnode;
1002 1002 lock_descriptor_t *lock, *lock1;
1003 1003 int done_searching = 0;
1004 1004
1005 1005 CHECK_SLEEPING_LOCKS(gp);
1006 1006 CHECK_ACTIVE_LOCKS(gp);
1007 1007
1008 1008 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1009 1009
1010 1010 flk_set_state(request, FLK_START_STATE);
1011 1011
1012 1012 ASSERT(NOT_BLOCKED(request));
1013 1013
1014 1014 /* IO_LOCK requests are only to check status */
1015 1015
1016 1016 if (IS_IO_LOCK(request))
1017 1017 return (0);
1018 1018
1019 1019 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1020 1020
1021 1021 if (lock == NULL && request->l_type == F_UNLCK)
1022 1022 return (0);
1023 1023 if (lock == NULL) {
1024 1024 flk_insert_active_lock(request);
1025 1025 return (0);
1026 1026 }
1027 1027
1028 1028 do {
1029 1029 lock1 = lock->l_next;
1030 1030 if (SAME_OWNER(request, lock)) {
1031 1031 done_searching = flk_relation(lock, request);
1032 1032 }
1033 1033 lock = lock1;
1034 1034 } while (lock->l_vnode == vp && !done_searching);
1035 1035
1036 1036 /*
1037 1037 * insert in active queue
1038 1038 */
1039 1039
1040 1040 if (request->l_type != F_UNLCK)
1041 1041 flk_insert_active_lock(request);
1042 1042
1043 1043 return (0);
1044 1044 }
1045 1045
1046 1046 /*
1047 1047 * 'request' is blocked by some one therefore we put it into sleep queue.
1048 1048 */
1049 1049 static int
1050 1050 flk_wait_execute_request(lock_descriptor_t *request)
1051 1051 {
1052 1052 graph_t *gp = request->l_graph;
1053 1053 callb_cpr_t *cprp; /* CPR info from callback */
1054 1054 struct flock_globals *fg;
1055 1055 int index;
1056 1056
1057 1057 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1058 1058 ASSERT(IS_WILLING_TO_SLEEP(request));
1059 1059
1060 1060 flk_insert_sleeping_lock(request);
1061 1061
1062 1062 if (IS_LOCKMGR(request)) {
1063 1063 index = HASH_INDEX(request->l_vnode);
1064 1064 fg = flk_get_globals();
1065 1065
1066 1066 if (nlm_status_size == 0) { /* not booted as a cluster */
1067 1067 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
1068 1068 flk_cancel_sleeping_lock(request, 1);
1069 1069 return (ENOLCK);
1070 1070 }
1071 1071 } else { /* booted as a cluster */
1072 1072 /*
1073 1073 * If the request is an NLM server lock request,
1074 1074 * and the NLM state of the lock request is not
1075 1075 * NLM_UP (because the NLM server is shutting
1076 1076 * down), then cancel the sleeping lock and
1077 1077 * return error ENOLCK that will encourage the
1078 1078 * client to retransmit.
1079 1079 */
1080 1080 if (!IS_NLM_UP(request)) {
1081 1081 flk_cancel_sleeping_lock(request, 1);
1082 1082 return (ENOLCK);
1083 1083 }
1084 1084 }
1085 1085 }
1086 1086
1087 1087 /* Clustering: For blocking PXFS locks, return */
1088 1088 if (IS_PXFS(request)) {
1089 1089 /*
1090 1090 * PXFS locks sleep on the client side.
1091 1091 * The callback argument is used to wake up the sleeper
1092 1092 * when the lock is granted.
1093 1093 * We return -1 (rather than an errno value) to indicate
1094 1094 * the client side should sleep
1095 1095 */
1096 1096 return (PXFS_LOCK_BLOCKED);
1097 1097 }
1098 1098
1099 1099 if (request->l_callbacks != NULL) {
1100 1100 /*
1101 1101 * To make sure the shutdown code works correctly, either
1102 1102 * the callback must happen after putting the lock on the
1103 1103 * sleep list, or we must check the shutdown status after
1104 1104 * returning from the callback (and before sleeping). At
1105 1105 * least for now, we'll use the first option. If a
1106 1106 * shutdown or signal or whatever happened while the graph
1107 1107 * mutex was dropped, that will be detected by
1108 1108 * wait_for_lock().
1109 1109 */
1110 1110 mutex_exit(&gp->gp_mutex);
1111 1111
1112 1112 cprp = flk_invoke_callbacks(request->l_callbacks,
1113 1113 FLK_BEFORE_SLEEP);
1114 1114
1115 1115 mutex_enter(&gp->gp_mutex);
1116 1116
1117 1117 if (cprp == NULL) {
1118 1118 wait_for_lock(request);
1119 1119 } else {
1120 1120 mutex_enter(cprp->cc_lockp);
1121 1121 CALLB_CPR_SAFE_BEGIN(cprp);
1122 1122 mutex_exit(cprp->cc_lockp);
1123 1123 wait_for_lock(request);
1124 1124 mutex_enter(cprp->cc_lockp);
1125 1125 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
1126 1126 mutex_exit(cprp->cc_lockp);
1127 1127 }
1128 1128
1129 1129 mutex_exit(&gp->gp_mutex);
1130 1130 (void) flk_invoke_callbacks(request->l_callbacks,
1131 1131 FLK_AFTER_SLEEP);
1132 1132 mutex_enter(&gp->gp_mutex);
1133 1133 } else {
1134 1134 wait_for_lock(request);
1135 1135 }
1136 1136
1137 1137 if (IS_LOCKMGR(request)) {
1138 1138 /*
1139 1139 * If the lock manager is shutting down, return an
1140 1140 * error that will encourage the client to retransmit.
1141 1141 */
1142 1142 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
1143 1143 !IS_GRANTED(request)) {
1144 1144 flk_cancel_sleeping_lock(request, 1);
1145 1145 return (ENOLCK);
1146 1146 }
1147 1147 }
1148 1148
1149 1149 if (IS_INTERRUPTED(request)) {
1150 1150 /* we got a signal, or act like we did */
1151 1151 flk_cancel_sleeping_lock(request, 1);
1152 1152 return (EINTR);
1153 1153 }
1154 1154
1155 1155 /* Cancelled if some other thread has closed the file */
1156 1156
1157 1157 if (IS_CANCELLED(request)) {
1158 1158 flk_cancel_sleeping_lock(request, 1);
1159 1159 return (EBADF);
1160 1160 }
1161 1161
1162 1162 request->l_state &= ~GRANTED_LOCK;
1163 1163 REMOVE_SLEEP_QUEUE(request);
1164 1164 return (flk_execute_request(request));
1165 1165 }
1166 1166
1167 1167 /*
1168 1168 * This routine adds an edge between from and to because from depends
1169 1169 * to. If asked to check for deadlock it checks whether there are any
1170 1170 * reachable locks from "from_lock" that is owned by the same process
1171 1171 * as "from_lock".
1172 1172 * NOTE: It is the caller's responsibility to make sure that the color
1173 1173 * of the graph is consistent between the calls to flk_add_edge as done
1174 1174 * in flk_process_request. This routine does not color and check for
1175 1175 * deadlock explicitly.
1176 1176 */
1177 1177
1178 1178 static int
1179 1179 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
1180 1180 int check_cycle, int update_graph)
1181 1181 {
1182 1182 edge_t *edge;
1183 1183 edge_t *ep;
1184 1184 lock_descriptor_t *vertex;
1185 1185 lock_descriptor_t *vertex_stack;
1186 1186
1187 1187 STACK_INIT(vertex_stack);
1188 1188
1189 1189 /*
1190 1190 * if to vertex already has mark_color just return
1191 1191 * don't add an edge as it is reachable from from vertex
1192 1192 * before itself.
1193 1193 */
1194 1194
1195 1195 if (COLORED(to_lock))
1196 1196 return (0);
1197 1197
1198 1198 edge = flk_get_edge();
1199 1199
1200 1200 /*
1201 1201 * set the from and to vertex
1202 1202 */
1203 1203
1204 1204 edge->from_vertex = from_lock;
1205 1205 edge->to_vertex = to_lock;
1206 1206
1207 1207 /*
1208 1208 * put in adjacency list of from vertex
1209 1209 */
1210 1210
1211 1211 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
1212 1212 edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
1213 1213 edge->edge_adj_prev = &from_lock->l_edge;
1214 1214 from_lock->l_edge.edge_adj_next = edge;
1215 1215
1216 1216 /*
1217 1217 * put in in list of to vertex
1218 1218 */
1219 1219
1220 1220 to_lock->l_edge.edge_in_next->edge_in_prev = edge;
1221 1221 edge->edge_in_next = to_lock->l_edge.edge_in_next;
1222 1222 to_lock->l_edge.edge_in_next = edge;
1223 1223 edge->edge_in_prev = &to_lock->l_edge;
1224 1224
1225 1225
1226 1226 if (update_graph) {
1227 1227 flk_update_proc_graph(edge, 0);
1228 1228 return (0);
1229 1229 }
1230 1230 if (!check_cycle) {
1231 1231 return (0);
1232 1232 }
1233 1233
1234 1234 STACK_PUSH(vertex_stack, from_lock, l_stack);
1235 1235
1236 1236 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1237 1237
1238 1238 STACK_POP(vertex_stack, l_stack);
1239 1239
1240 1240 for (ep = FIRST_ADJ(vertex);
1241 1241 ep != HEAD(vertex);
1242 1242 ep = NEXT_ADJ(ep)) {
1243 1243 if (COLORED(ep->to_vertex))
1244 1244 continue;
1245 1245 COLOR(ep->to_vertex);
1246 1246 if (SAME_OWNER(ep->to_vertex, from_lock))
1247 1247 goto dead_lock;
1248 1248 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1249 1249 }
1250 1250 }
1251 1251 return (0);
1252 1252
1253 1253 dead_lock:
1254 1254
1255 1255 /*
1256 1256 * remove all edges
1257 1257 */
1258 1258
1259 1259 ep = FIRST_ADJ(from_lock);
1260 1260
1261 1261 while (ep != HEAD(from_lock)) {
1262 1262 IN_LIST_REMOVE(ep);
1263 1263 from_lock->l_sedge = NEXT_ADJ(ep);
1264 1264 ADJ_LIST_REMOVE(ep);
1265 1265 flk_free_edge(ep);
1266 1266 ep = from_lock->l_sedge;
1267 1267 }
1268 1268 return (EDEADLK);
1269 1269 }
1270 1270
1271 1271 /*
1272 1272 * Get an edge structure for representing the dependency between two locks.
1273 1273 */
1274 1274
1275 1275 static edge_t *
1276 1276 flk_get_edge()
1277 1277 {
1278 1278 edge_t *ep;
1279 1279
1280 1280 ASSERT(flk_edge_cache != NULL);
1281 1281
1282 1282 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
1283 1283 edge_allocs++;
1284 1284 return (ep);
1285 1285 }
1286 1286
1287 1287 /*
1288 1288 * Free the edge structure.
1289 1289 */
1290 1290
1291 1291 static void
1292 1292 flk_free_edge(edge_t *ep)
1293 1293 {
1294 1294 edge_frees++;
1295 1295 kmem_cache_free(flk_edge_cache, (void *)ep);
1296 1296 }
1297 1297
1298 1298 /*
1299 1299 * Check the relationship of request with lock and perform the
1300 1300 * recomputation of dependencies, break lock if required, and return
1301 1301 * 1 if request cannot have any more relationship with the next
1302 1302 * active locks.
1303 1303 * The 'lock' and 'request' are compared and in case of overlap we
1304 1304 * delete the 'lock' and form new locks to represent the non-overlapped
1305 1305 * portion of original 'lock'. This function has side effects such as
1306 1306 * 'lock' will be freed, new locks will be added to the active list.
1307 1307 */
1308 1308
1309 1309 static int
1310 1310 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
1311 1311 {
1312 1312 int lock_effect;
1313 1313 lock_descriptor_t *lock1, *lock2;
1314 1314 lock_descriptor_t *topology[3];
1315 1315 int nvertex = 0;
1316 1316 int i;
1317 1317 edge_t *ep;
1318 1318 graph_t *gp = (lock->l_graph);
1319 1319
1320 1320
1321 1321 CHECK_SLEEPING_LOCKS(gp);
1322 1322 CHECK_ACTIVE_LOCKS(gp);
1323 1323
1324 1324 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1325 1325
1326 1326 topology[0] = topology[1] = topology[2] = NULL;
1327 1327
1328 1328 if (request->l_type == F_UNLCK)
1329 1329 lock_effect = FLK_UNLOCK;
1330 1330 else if (request->l_type == F_RDLCK &&
1331 1331 lock->l_type == F_WRLCK)
1332 1332 lock_effect = FLK_DOWNGRADE;
1333 1333 else if (request->l_type == F_WRLCK &&
1334 1334 lock->l_type == F_RDLCK)
1335 1335 lock_effect = FLK_UPGRADE;
1336 1336 else
1337 1337 lock_effect = FLK_STAY_SAME;
1338 1338
1339 1339 if (lock->l_end < request->l_start) {
1340 1340 if (lock->l_end == request->l_start - 1 &&
1341 1341 lock_effect == FLK_STAY_SAME) {
1342 1342 topology[0] = request;
1343 1343 request->l_start = lock->l_start;
1344 1344 nvertex = 1;
1345 1345 goto recompute;
1346 1346 } else {
1347 1347 return (0);
1348 1348 }
1349 1349 }
1350 1350
1351 1351 if (lock->l_start > request->l_end) {
1352 1352 if (request->l_end == lock->l_start - 1 &&
1353 1353 lock_effect == FLK_STAY_SAME) {
1354 1354 topology[0] = request;
1355 1355 request->l_end = lock->l_end;
1356 1356 nvertex = 1;
1357 1357 goto recompute;
1358 1358 } else {
1359 1359 return (1);
1360 1360 }
1361 1361 }
1362 1362
1363 1363 if (request->l_end < lock->l_end) {
1364 1364 if (request->l_start > lock->l_start) {
1365 1365 if (lock_effect == FLK_STAY_SAME) {
1366 1366 request->l_start = lock->l_start;
1367 1367 request->l_end = lock->l_end;
1368 1368 topology[0] = request;
1369 1369 nvertex = 1;
1370 1370 } else {
1371 1371 lock1 = flk_get_lock();
1372 1372 lock2 = flk_get_lock();
1373 1373 COPY(lock1, lock);
1374 1374 COPY(lock2, lock);
1375 1375 lock1->l_start = lock->l_start;
1376 1376 lock1->l_end = request->l_start - 1;
1377 1377 lock2->l_start = request->l_end + 1;
1378 1378 lock2->l_end = lock->l_end;
1379 1379 topology[0] = lock1;
1380 1380 topology[1] = lock2;
1381 1381 topology[2] = request;
1382 1382 nvertex = 3;
1383 1383 }
1384 1384 } else if (request->l_start < lock->l_start) {
1385 1385 if (lock_effect == FLK_STAY_SAME) {
1386 1386 request->l_end = lock->l_end;
1387 1387 topology[0] = request;
1388 1388 nvertex = 1;
1389 1389 } else {
1390 1390 lock1 = flk_get_lock();
1391 1391 COPY(lock1, lock);
1392 1392 lock1->l_start = request->l_end + 1;
1393 1393 topology[0] = lock1;
1394 1394 topology[1] = request;
1395 1395 nvertex = 2;
1396 1396 }
1397 1397 } else {
1398 1398 if (lock_effect == FLK_STAY_SAME) {
1399 1399 request->l_start = lock->l_start;
1400 1400 request->l_end = lock->l_end;
1401 1401 topology[0] = request;
1402 1402 nvertex = 1;
1403 1403 } else {
1404 1404 lock1 = flk_get_lock();
1405 1405 COPY(lock1, lock);
1406 1406 lock1->l_start = request->l_end + 1;
1407 1407 topology[0] = lock1;
1408 1408 topology[1] = request;
1409 1409 nvertex = 2;
1410 1410 }
1411 1411 }
1412 1412 } else if (request->l_end > lock->l_end) {
1413 1413 if (request->l_start > lock->l_start) {
1414 1414 if (lock_effect == FLK_STAY_SAME) {
1415 1415 request->l_start = lock->l_start;
1416 1416 topology[0] = request;
1417 1417 nvertex = 1;
1418 1418 } else {
1419 1419 lock1 = flk_get_lock();
1420 1420 COPY(lock1, lock);
1421 1421 lock1->l_end = request->l_start - 1;
1422 1422 topology[0] = lock1;
1423 1423 topology[1] = request;
1424 1424 nvertex = 2;
1425 1425 }
1426 1426 } else if (request->l_start < lock->l_start) {
1427 1427 topology[0] = request;
1428 1428 nvertex = 1;
1429 1429 } else {
1430 1430 topology[0] = request;
1431 1431 nvertex = 1;
1432 1432 }
1433 1433 } else {
1434 1434 if (request->l_start > lock->l_start) {
1435 1435 if (lock_effect == FLK_STAY_SAME) {
1436 1436 request->l_start = lock->l_start;
1437 1437 topology[0] = request;
1438 1438 nvertex = 1;
1439 1439 } else {
1440 1440 lock1 = flk_get_lock();
1441 1441 COPY(lock1, lock);
1442 1442 lock1->l_end = request->l_start - 1;
1443 1443 topology[0] = lock1;
1444 1444 topology[1] = request;
1445 1445 nvertex = 2;
1446 1446 }
1447 1447 } else if (request->l_start < lock->l_start) {
1448 1448 topology[0] = request;
1449 1449 nvertex = 1;
1450 1450 } else {
1451 1451 if (lock_effect != FLK_UNLOCK) {
1452 1452 topology[0] = request;
1453 1453 nvertex = 1;
1454 1454 } else {
1455 1455 flk_delete_active_lock(lock, 0);
1456 1456 flk_wakeup(lock, 1);
1457 1457 flk_free_lock(lock);
1458 1458 CHECK_SLEEPING_LOCKS(gp);
1459 1459 CHECK_ACTIVE_LOCKS(gp);
1460 1460 return (1);
1461 1461 }
1462 1462 }
1463 1463 }
1464 1464
1465 1465 recompute:
1466 1466
1467 1467 /*
1468 1468 * For unlock we don't send the 'request' to for recomputing
1469 1469 * dependencies because no lock will add an edge to this.
1470 1470 */
1471 1471
1472 1472 if (lock_effect == FLK_UNLOCK) {
1473 1473 topology[nvertex-1] = NULL;
1474 1474 nvertex--;
1475 1475 }
1476 1476 for (i = 0; i < nvertex; i++) {
1477 1477 topology[i]->l_state |= RECOMPUTE_LOCK;
1478 1478 topology[i]->l_color = NO_COLOR;
1479 1479 }
1480 1480
1481 1481 ASSERT(FIRST_ADJ(lock) == HEAD(lock));
1482 1482
1483 1483 /*
1484 1484 * we remove the adjacent edges for all vertices' to this vertex
1485 1485 * 'lock'.
1486 1486 */
1487 1487
1488 1488 ep = FIRST_IN(lock);
1489 1489 while (ep != HEAD(lock)) {
1490 1490 ADJ_LIST_REMOVE(ep);
1491 1491 ep = NEXT_IN(ep);
1492 1492 }
1493 1493
1494 1494 flk_delete_active_lock(lock, 0);
1495 1495
1496 1496 /* We are ready for recomputing the dependencies now */
1497 1497
1498 1498 flk_recompute_dependencies(lock, topology, nvertex, 1);
1499 1499
1500 1500 for (i = 0; i < nvertex; i++) {
1501 1501 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1502 1502 topology[i]->l_color = NO_COLOR;
1503 1503 }
1504 1504
1505 1505
1506 1506 if (lock_effect == FLK_UNLOCK) {
1507 1507 nvertex++;
1508 1508 }
1509 1509 for (i = 0; i < nvertex - 1; i++) {
1510 1510 flk_insert_active_lock(topology[i]);
1511 1511 }
1512 1512
1513 1513
1514 1514 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
1515 1515 flk_wakeup(lock, 0);
1516 1516 } else {
1517 1517 ep = FIRST_IN(lock);
1518 1518 while (ep != HEAD(lock)) {
1519 1519 lock->l_sedge = NEXT_IN(ep);
1520 1520 IN_LIST_REMOVE(ep);
1521 1521 flk_update_proc_graph(ep, 1);
1522 1522 flk_free_edge(ep);
1523 1523 ep = lock->l_sedge;
1524 1524 }
1525 1525 }
1526 1526 flk_free_lock(lock);
1527 1527
1528 1528 CHECK_SLEEPING_LOCKS(gp);
1529 1529 CHECK_ACTIVE_LOCKS(gp);
1530 1530 return (0);
1531 1531 }
1532 1532
1533 1533 /*
1534 1534 * Insert a lock into the active queue.
1535 1535 */
1536 1536
1537 1537 static void
1538 1538 flk_insert_active_lock(lock_descriptor_t *new_lock)
1539 1539 {
1540 1540 graph_t *gp = new_lock->l_graph;
1541 1541 vnode_t *vp = new_lock->l_vnode;
1542 1542 lock_descriptor_t *first_lock, *lock;
1543 1543
1544 1544 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1545 1545
1546 1546 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1547 1547 first_lock = lock;
1548 1548
1549 1549 if (first_lock != NULL) {
1550 1550 for (; (lock->l_vnode == vp &&
1551 1551 lock->l_start < new_lock->l_start); lock = lock->l_next)
1552 1552 ;
1553 1553 } else {
1554 1554 lock = ACTIVE_HEAD(gp);
1555 1555 }
1556 1556
1557 1557 lock->l_prev->l_next = new_lock;
1558 1558 new_lock->l_next = lock;
1559 1559 new_lock->l_prev = lock->l_prev;
1560 1560 lock->l_prev = new_lock;
1561 1561
1562 1562 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
1563 1563 vp->v_filocks = (struct filock *)new_lock;
1564 1564 }
1565 1565 flk_set_state(new_lock, FLK_ACTIVE_STATE);
1566 1566 new_lock->l_state |= ACTIVE_LOCK;
1567 1567
1568 1568 CHECK_ACTIVE_LOCKS(gp);
1569 1569 CHECK_SLEEPING_LOCKS(gp);
1570 1570 }
1571 1571
1572 1572 /*
1573 1573 * Delete the active lock : Performs two functions depending on the
1574 1574 * value of second parameter. One is to remove from the active lists
1575 1575 * only and other is to both remove and free the lock.
1576 1576 */
1577 1577
1578 1578 static void
1579 1579 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
1580 1580 {
1581 1581 vnode_t *vp = lock->l_vnode;
1582 1582 graph_t *gp = lock->l_graph;
1583 1583
1584 1584 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1585 1585 if (free_lock)
1586 1586 ASSERT(NO_DEPENDENTS(lock));
1587 1587 ASSERT(NOT_BLOCKED(lock));
1588 1588 ASSERT(IS_ACTIVE(lock));
1589 1589
1590 1590 ASSERT((vp->v_filocks != NULL));
1591 1591
1592 1592 if (vp->v_filocks == (struct filock *)lock) {
1593 1593 vp->v_filocks = (struct filock *)
1594 1594 ((lock->l_next->l_vnode == vp) ? lock->l_next :
1595 1595 NULL);
1596 1596 }
1597 1597 lock->l_next->l_prev = lock->l_prev;
1598 1598 lock->l_prev->l_next = lock->l_next;
1599 1599 lock->l_next = lock->l_prev = NULL;
1600 1600 flk_set_state(lock, FLK_DEAD_STATE);
1601 1601 lock->l_state &= ~ACTIVE_LOCK;
1602 1602
1603 1603 if (free_lock)
1604 1604 flk_free_lock(lock);
1605 1605 CHECK_ACTIVE_LOCKS(gp);
1606 1606 CHECK_SLEEPING_LOCKS(gp);
1607 1607 }
1608 1608
1609 1609 /*
1610 1610 * Insert into the sleep queue.
1611 1611 */
1612 1612
1613 1613 static void
1614 1614 flk_insert_sleeping_lock(lock_descriptor_t *request)
1615 1615 {
1616 1616 graph_t *gp = request->l_graph;
1617 1617 vnode_t *vp = request->l_vnode;
1618 1618 lock_descriptor_t *lock;
1619 1619
1620 1620 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1621 1621 ASSERT(IS_INITIAL(request));
1622 1622
1623 1623 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
1624 1624 lock->l_vnode < vp); lock = lock->l_next)
1625 1625 ;
1626 1626
1627 1627 lock->l_prev->l_next = request;
1628 1628 request->l_prev = lock->l_prev;
1629 1629 lock->l_prev = request;
1630 1630 request->l_next = lock;
1631 1631 flk_set_state(request, FLK_SLEEPING_STATE);
1632 1632 request->l_state |= SLEEPING_LOCK;
1633 1633 }
1634 1634
1635 1635 /*
1636 1636 * Cancelling a sleeping lock implies removing a vertex from the
1637 1637 * dependency graph and therefore we should recompute the dependencies
1638 1638 * of all vertices that have a path to this vertex, w.r.t. all
1639 1639 * vertices reachable from this vertex.
1640 1640 */
1641 1641
1642 1642 void
1643 1643 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
1644 1644 {
1645 1645 graph_t *gp = request->l_graph;
1646 1646 vnode_t *vp = request->l_vnode;
1647 1647 lock_descriptor_t **topology = NULL;
1648 1648 edge_t *ep;
1649 1649 lock_descriptor_t *vertex, *lock;
1650 1650 int nvertex = 0;
1651 1651 int i;
1652 1652 lock_descriptor_t *vertex_stack;
1653 1653
1654 1654 STACK_INIT(vertex_stack);
1655 1655
1656 1656 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1657 1657 /*
1658 1658 * count number of vertex pointers that has to be allocated
1659 1659 * All vertices that are reachable from request.
1660 1660 */
1661 1661
1662 1662 STACK_PUSH(vertex_stack, request, l_stack);
1663 1663
1664 1664 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1665 1665 STACK_POP(vertex_stack, l_stack);
1666 1666 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
1667 1667 ep = NEXT_ADJ(ep)) {
1668 1668 if (IS_RECOMPUTE(ep->to_vertex))
1669 1669 continue;
1670 1670 ep->to_vertex->l_state |= RECOMPUTE_LOCK;
1671 1671 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1672 1672 nvertex++;
1673 1673 }
1674 1674 }
1675 1675
1676 1676 /*
1677 1677 * allocate memory for holding the vertex pointers
1678 1678 */
1679 1679
1680 1680 if (nvertex) {
1681 1681 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
1682 1682 KM_SLEEP);
1683 1683 }
1684 1684
1685 1685 /*
1686 1686 * one more pass to actually store the vertices in the
1687 1687 * allocated array.
1688 1688 * We first check sleeping locks and then active locks
1689 1689 * so that topology array will be in a topological
1690 1690 * order.
1691 1691 */
1692 1692
1693 1693 nvertex = 0;
1694 1694 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1695 1695
1696 1696 if (lock) {
1697 1697 do {
1698 1698 if (IS_RECOMPUTE(lock)) {
1699 1699 lock->l_index = nvertex;
1700 1700 topology[nvertex++] = lock;
1701 1701 }
1702 1702 lock->l_color = NO_COLOR;
1703 1703 lock = lock->l_next;
1704 1704 } while (lock->l_vnode == vp);
1705 1705 }
1706 1706
1707 1707 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1708 1708
1709 1709 if (lock) {
1710 1710 do {
1711 1711 if (IS_RECOMPUTE(lock)) {
1712 1712 lock->l_index = nvertex;
1713 1713 topology[nvertex++] = lock;
1714 1714 }
1715 1715 lock->l_color = NO_COLOR;
1716 1716 lock = lock->l_next;
1717 1717 } while (lock->l_vnode == vp);
1718 1718 }
1719 1719
1720 1720 /*
1721 1721 * remove in and out edges of request
1722 1722 * They are freed after updating proc_graph below.
1723 1723 */
1724 1724
1725 1725 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
1726 1726 ADJ_LIST_REMOVE(ep);
1727 1727 }
1728 1728
1729 1729
1730 1730 if (remove_from_queue)
1731 1731 REMOVE_SLEEP_QUEUE(request);
1732 1732
1733 1733 /* we are ready to recompute */
1734 1734
1735 1735 flk_recompute_dependencies(request, topology, nvertex, 1);
1736 1736
1737 1737 ep = FIRST_ADJ(request);
1738 1738 while (ep != HEAD(request)) {
1739 1739 IN_LIST_REMOVE(ep);
1740 1740 request->l_sedge = NEXT_ADJ(ep);
1741 1741 ADJ_LIST_REMOVE(ep);
1742 1742 flk_update_proc_graph(ep, 1);
1743 1743 flk_free_edge(ep);
1744 1744 ep = request->l_sedge;
1745 1745 }
1746 1746
1747 1747
1748 1748 /*
1749 1749 * unset the RECOMPUTE flag in those vertices
1750 1750 */
1751 1751
1752 1752 for (i = 0; i < nvertex; i++) {
1753 1753 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1754 1754 }
1755 1755
1756 1756 /*
1757 1757 * free the topology
1758 1758 */
1759 1759 if (nvertex)
1760 1760 kmem_free((void *)topology,
1761 1761 (nvertex * sizeof (lock_descriptor_t *)));
1762 1762 /*
1763 1763 * Possibility of some locks unblocked now
1764 1764 */
1765 1765
1766 1766 flk_wakeup(request, 0);
1767 1767
1768 1768 /*
1769 1769 * we expect to have a correctly recomputed graph now.
1770 1770 */
1771 1771 flk_set_state(request, FLK_DEAD_STATE);
1772 1772 flk_free_lock(request);
1773 1773 CHECK_SLEEPING_LOCKS(gp);
1774 1774 CHECK_ACTIVE_LOCKS(gp);
1775 1775
1776 1776 }
1777 1777
1778 1778 /*
1779 1779 * Uncoloring the graph is simply to increment the mark value of the graph
1780 1780 * And only when wrap round takes place will we color all vertices in
1781 1781 * the graph explicitly.
1782 1782 */
1783 1783
1784 1784 static void
1785 1785 flk_graph_uncolor(graph_t *gp)
1786 1786 {
1787 1787 lock_descriptor_t *lock;
1788 1788
1789 1789 if (gp->mark == UINT_MAX) {
1790 1790 gp->mark = 1;
1791 1791 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
1792 1792 lock = lock->l_next)
1793 1793 lock->l_color = 0;
1794 1794
1795 1795 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp);
1796 1796 lock = lock->l_next)
1797 1797 lock->l_color = 0;
1798 1798 } else {
1799 1799 gp->mark++;
1800 1800 }
1801 1801 }
1802 1802
1803 1803 /*
1804 1804 * Wake up locks that are blocked on the given lock.
1805 1805 */
1806 1806
1807 1807 static void
1808 1808 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
1809 1809 {
1810 1810 edge_t *ep;
1811 1811 graph_t *gp = lock->l_graph;
1812 1812 lock_descriptor_t *lck;
1813 1813
1814 1814 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1815 1815 if (NO_DEPENDENTS(lock))
1816 1816 return;
1817 1817 ep = FIRST_IN(lock);
1818 1818 do {
1819 1819 /*
1820 1820 * delete the edge from the adjacency list
1821 1821 * of from vertex. if no more adjacent edges
1822 1822 * for this vertex wake this process.
1823 1823 */
1824 1824 lck = ep->from_vertex;
1825 1825 if (adj_list_remove)
1826 1826 ADJ_LIST_REMOVE(ep);
1827 1827 flk_update_proc_graph(ep, 1);
1828 1828 if (NOT_BLOCKED(lck)) {
1829 1829 GRANT_WAKEUP(lck);
1830 1830 }
1831 1831 lock->l_sedge = NEXT_IN(ep);
1832 1832 IN_LIST_REMOVE(ep);
1833 1833 flk_free_edge(ep);
1834 1834 ep = lock->l_sedge;
1835 1835 } while (ep != HEAD(lock));
1836 1836 ASSERT(NO_DEPENDENTS(lock));
1837 1837 }
1838 1838
1839 1839 /*
1840 1840 * The dependents of request, is checked for its dependency against the
1841 1841 * locks in topology (called topology because the array is and should be in
1842 1842 * topological order for this algorithm, if not in topological order the
1843 1843 * inner loop below might add more edges than necessary. Topological ordering
1844 1844 * of vertices satisfies the property that all edges will be from left to
1845 1845 * right i.e., topology[i] can have an edge to topology[j], iff i<j)
1846 1846 * If lock l1 in the dependent set of request is dependent (blocked by)
1847 1847 * on lock l2 in topology but does not have a path to it, we add an edge
1848 1848 * in the inner loop below.
1849 1849 *
1850 1850 * We don't want to add an edge between l1 and l2 if there exists
1851 1851 * already a path from l1 to l2, so care has to be taken for those vertices
1852 1852 * that have two paths to 'request'. These vertices are referred to here
1853 1853 * as barrier locks.
1854 1854 *
1855 1855 * The barriers has to be found (those vertex that originally had two paths
1856 1856 * to request) because otherwise we may end up adding edges unnecessarily
1857 1857 * to vertices in topology, and thus barrier vertices can have an edge
1858 1858 * to a vertex in topology as well a path to it.
1859 1859 */
1860 1860
1861 1861 static void
1862 1862 flk_recompute_dependencies(lock_descriptor_t *request,
1863 1863 lock_descriptor_t **topology,
1864 1864 int nvertex, int update_graph)
1865 1865 {
1866 1866 lock_descriptor_t *vertex, *lock;
1867 1867 graph_t *gp = request->l_graph;
1868 1868 int i, count;
1869 1869 int barrier_found = 0;
1870 1870 edge_t *ep;
1871 1871 lock_descriptor_t *vertex_stack;
1872 1872
1873 1873 STACK_INIT(vertex_stack);
1874 1874
1875 1875 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1876 1876 if (nvertex == 0)
1877 1877 return;
1878 1878 flk_graph_uncolor(request->l_graph);
1879 1879 barrier_found = flk_find_barriers(request);
1880 1880 request->l_state |= RECOMPUTE_DONE;
1881 1881
1882 1882 STACK_PUSH(vertex_stack, request, l_stack);
1883 1883 request->l_sedge = FIRST_IN(request);
1884 1884
1885 1885
1886 1886 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1887 1887 if (vertex->l_state & RECOMPUTE_DONE) {
1888 1888 count = 0;
1889 1889 goto next_in_edge;
1890 1890 }
1891 1891 if (IS_BARRIER(vertex)) {
1892 1892 /* decrement the barrier count */
1893 1893 if (vertex->l_index) {
1894 1894 vertex->l_index--;
1895 1895 /* this guy will be pushed again anyway ? */
1896 1896 STACK_POP(vertex_stack, l_stack);
1897 1897 if (vertex->l_index == 0) {
1898 1898 /*
1899 1899 * barrier is over we can recompute
1900 1900 * dependencies for this lock in the
1901 1901 * next stack pop
1902 1902 */
1903 1903 vertex->l_state &= ~BARRIER_LOCK;
1904 1904 }
1905 1905 continue;
1906 1906 }
1907 1907 }
1908 1908 vertex->l_state |= RECOMPUTE_DONE;
1909 1909 flk_graph_uncolor(gp);
1910 1910 count = flk_color_reachables(vertex);
1911 1911 for (i = 0; i < nvertex; i++) {
1912 1912 lock = topology[i];
1913 1913 if (COLORED(lock))
1914 1914 continue;
1915 1915 if (BLOCKS(lock, vertex)) {
1916 1916 (void) flk_add_edge(vertex, lock,
1917 1917 NO_CHECK_CYCLE, update_graph);
1918 1918 COLOR(lock);
1919 1919 count++;
1920 1920 count += flk_color_reachables(lock);
1921 1921 }
1922 1922
1923 1923 }
1924 1924
1925 1925 next_in_edge:
1926 1926 if (count == nvertex ||
1927 1927 vertex->l_sedge == HEAD(vertex)) {
1928 1928 /* prune the tree below this */
1929 1929 STACK_POP(vertex_stack, l_stack);
1930 1930 vertex->l_state &= ~RECOMPUTE_DONE;
1931 1931 /* update the barrier locks below this! */
1932 1932 if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
1933 1933 flk_graph_uncolor(gp);
1934 1934 flk_update_barriers(vertex);
1935 1935 }
1936 1936 continue;
1937 1937 }
1938 1938
1939 1939 ep = vertex->l_sedge;
1940 1940 lock = ep->from_vertex;
1941 1941 STACK_PUSH(vertex_stack, lock, l_stack);
1942 1942 lock->l_sedge = FIRST_IN(lock);
1943 1943 vertex->l_sedge = NEXT_IN(ep);
1944 1944 }
1945 1945
1946 1946 }
1947 1947
1948 1948 /*
1949 1949 * Color all reachable vertices from vertex that belongs to topology (here
1950 1950 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored.
1951 1951 *
1952 1952 * Note: we need to use a different stack_link l_stack1 because this is
1953 1953 * called from flk_recompute_dependencies() that already uses a stack with
1954 1954 * l_stack as stack_link.
1955 1955 */
1956 1956
1957 1957 static int
1958 1958 flk_color_reachables(lock_descriptor_t *vertex)
1959 1959 {
1960 1960 lock_descriptor_t *ver, *lock;
1961 1961 int count;
1962 1962 edge_t *ep;
1963 1963 lock_descriptor_t *vertex_stack;
1964 1964
1965 1965 STACK_INIT(vertex_stack);
1966 1966
1967 1967 STACK_PUSH(vertex_stack, vertex, l_stack1);
1968 1968 count = 0;
1969 1969 while ((ver = STACK_TOP(vertex_stack)) != NULL) {
1970 1970
1971 1971 STACK_POP(vertex_stack, l_stack1);
1972 1972 for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
1973 1973 ep = NEXT_ADJ(ep)) {
1974 1974 lock = ep->to_vertex;
1975 1975 if (COLORED(lock))
1976 1976 continue;
1977 1977 COLOR(lock);
1978 1978 if (IS_RECOMPUTE(lock))
1979 1979 count++;
1980 1980 STACK_PUSH(vertex_stack, lock, l_stack1);
1981 1981 }
1982 1982
1983 1983 }
1984 1984 return (count);
1985 1985 }
1986 1986
1987 1987 /*
1988 1988 * Called from flk_recompute_dependencies() this routine decrements
1989 1989 * the barrier count of barrier vertices that are reachable from lock.
1990 1990 */
1991 1991
1992 1992 static void
1993 1993 flk_update_barriers(lock_descriptor_t *lock)
1994 1994 {
1995 1995 lock_descriptor_t *vertex, *lck;
1996 1996 edge_t *ep;
1997 1997 lock_descriptor_t *vertex_stack;
1998 1998
1999 1999 STACK_INIT(vertex_stack);
2000 2000
2001 2001 STACK_PUSH(vertex_stack, lock, l_stack1);
2002 2002
2003 2003 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2004 2004 STACK_POP(vertex_stack, l_stack1);
2005 2005 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2006 2006 ep = NEXT_IN(ep)) {
2007 2007 lck = ep->from_vertex;
2008 2008 if (COLORED(lck)) {
2009 2009 if (IS_BARRIER(lck)) {
2010 2010 ASSERT(lck->l_index > 0);
2011 2011 lck->l_index--;
2012 2012 if (lck->l_index == 0)
2013 2013 lck->l_state &= ~BARRIER_LOCK;
2014 2014 }
2015 2015 continue;
2016 2016 }
2017 2017 COLOR(lck);
2018 2018 if (IS_BARRIER(lck)) {
2019 2019 ASSERT(lck->l_index > 0);
2020 2020 lck->l_index--;
2021 2021 if (lck->l_index == 0)
2022 2022 lck->l_state &= ~BARRIER_LOCK;
2023 2023 }
2024 2024 STACK_PUSH(vertex_stack, lck, l_stack1);
2025 2025 }
2026 2026 }
2027 2027 }
2028 2028
2029 2029 /*
2030 2030 * Finds all vertices that are reachable from 'lock' more than once and
2031 2031 * mark them as barrier vertices and increment their barrier count.
2032 2032 * The barrier count is one minus the total number of paths from lock
2033 2033 * to that vertex.
2034 2034 */
2035 2035
2036 2036 static int
2037 2037 flk_find_barriers(lock_descriptor_t *lock)
2038 2038 {
2039 2039 lock_descriptor_t *vertex, *lck;
2040 2040 int found = 0;
2041 2041 edge_t *ep;
2042 2042 lock_descriptor_t *vertex_stack;
2043 2043
2044 2044 STACK_INIT(vertex_stack);
2045 2045
2046 2046 STACK_PUSH(vertex_stack, lock, l_stack1);
2047 2047
2048 2048 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2049 2049 STACK_POP(vertex_stack, l_stack1);
2050 2050 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2051 2051 ep = NEXT_IN(ep)) {
2052 2052 lck = ep->from_vertex;
2053 2053 if (COLORED(lck)) {
2054 2054 /* this is a barrier */
2055 2055 lck->l_state |= BARRIER_LOCK;
2056 2056 /* index will have barrier count */
2057 2057 lck->l_index++;
2058 2058 if (!found)
2059 2059 found = 1;
2060 2060 continue;
2061 2061 }
2062 2062 COLOR(lck);
2063 2063 lck->l_index = 0;
2064 2064 STACK_PUSH(vertex_stack, lck, l_stack1);
2065 2065 }
2066 2066 }
2067 2067 return (found);
2068 2068 }
2069 2069
2070 2070 /*
2071 2071 * Finds the first lock that is mainly responsible for blocking this
2072 2072 * request. If there is no such lock, request->l_flock.l_type is set to
2073 2073 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars
2074 2074 * of the blocking lock.
2075 2075 *
2076 2076 * Note: It is possible a request is blocked by a sleeping lock because
2077 2077 * of the fairness policy used in flk_process_request() to construct the
2078 2078 * dependencies. (see comments before flk_process_request()).
2079 2079 */
2080 2080
2081 2081 static void
2082 2082 flk_get_first_blocking_lock(lock_descriptor_t *request)
2083 2083 {
2084 2084 graph_t *gp = request->l_graph;
2085 2085 vnode_t *vp = request->l_vnode;
2086 2086 lock_descriptor_t *lock, *blocker;
2087 2087
2088 2088 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2089 2089 blocker = NULL;
2090 2090 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2091 2091
↓ open down ↓ |
2091 lines elided |
↑ open up ↑ |
2092 2092 if (lock) {
2093 2093 do {
2094 2094 if (BLOCKS(lock, request)) {
2095 2095 blocker = lock;
2096 2096 break;
2097 2097 }
2098 2098 lock = lock->l_next;
2099 2099 } while (lock->l_vnode == vp);
2100 2100 }
2101 2101
2102 + if (blocker == NULL && request->l_flock.l_type == F_RDLCK) {
2103 + /*
2104 + * No active lock is blocking this request, but if a read
2105 + * lock is requested, it may also get blocked by a waiting
2106 + * writer. So search all sleeping locks and see if there is
2107 + * a writer waiting.
2108 + */
2109 + SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2110 + if (lock) {
2111 + do {
2112 + if (BLOCKS(lock, request)) {
2113 + blocker = lock;
2114 + break;
2115 + }
2116 + lock = lock->l_next;
2117 + } while (lock->l_vnode == vp);
2118 + }
2119 + }
2120 +
2102 2121 if (blocker) {
2103 2122 report_blocker(blocker, request);
2104 2123 } else
2105 2124 request->l_flock.l_type = F_UNLCK;
2106 2125 }
2107 2126
2108 2127 /*
2109 2128 * Get the graph_t structure associated with a vnode.
2110 2129 * If 'initialize' is non-zero, and the graph_t structure for this vnode has
2111 2130 * not yet been initialized, then a new element is allocated and returned.
2112 2131 */
2113 2132 graph_t *
2114 2133 flk_get_lock_graph(vnode_t *vp, int initialize)
2115 2134 {
2116 2135 graph_t *gp;
2117 2136 graph_t *gp_alloc = NULL;
2118 2137 int index = HASH_INDEX(vp);
2119 2138
2120 2139 if (initialize == FLK_USE_GRAPH) {
2121 2140 mutex_enter(&flock_lock);
2122 2141 gp = lock_graph[index];
2123 2142 mutex_exit(&flock_lock);
2124 2143 return (gp);
2125 2144 }
2126 2145
2127 2146 ASSERT(initialize == FLK_INIT_GRAPH);
2128 2147
2129 2148 if (lock_graph[index] == NULL) {
2130 2149
2131 2150 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
2132 2151
2133 2152 /* Initialize the graph */
2134 2153
2135 2154 gp_alloc->active_locks.l_next =
2136 2155 gp_alloc->active_locks.l_prev =
2137 2156 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
2138 2157 gp_alloc->sleeping_locks.l_next =
2139 2158 gp_alloc->sleeping_locks.l_prev =
2140 2159 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
2141 2160 gp_alloc->index = index;
2142 2161 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
2143 2162 }
2144 2163
2145 2164 mutex_enter(&flock_lock);
2146 2165
2147 2166 gp = lock_graph[index];
2148 2167
2149 2168 /* Recheck the value within flock_lock */
2150 2169 if (gp == NULL) {
2151 2170 struct flock_globals *fg;
2152 2171
2153 2172 /* We must have previously allocated the graph_t structure */
2154 2173 ASSERT(gp_alloc != NULL);
2155 2174 lock_graph[index] = gp = gp_alloc;
2156 2175 /*
2157 2176 * The lockmgr status is only needed if KLM is loaded.
2158 2177 */
2159 2178 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
2160 2179 fg = flk_get_globals();
2161 2180 fg->lockmgr_status[index] = fg->flk_lockmgr_status;
2162 2181 }
2163 2182 }
2164 2183
2165 2184 mutex_exit(&flock_lock);
2166 2185
2167 2186 if ((gp_alloc != NULL) && (gp != gp_alloc)) {
2168 2187 /* There was a race to allocate the graph_t and we lost */
2169 2188 mutex_destroy(&gp_alloc->gp_mutex);
2170 2189 kmem_free(gp_alloc, sizeof (graph_t));
2171 2190 }
2172 2191
2173 2192 return (gp);
2174 2193 }
2175 2194
2176 2195 /*
2177 2196 * PSARC case 1997/292
2178 2197 */
2179 2198 int
2180 2199 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid)
2181 2200 {
2182 2201 lock_descriptor_t *lock;
2183 2202 int result = 0;
2184 2203 graph_t *gp;
2185 2204 int lock_nlmid;
2186 2205
2187 2206 /*
2188 2207 * Check to see if node is booted as a cluster. If not, return.
2189 2208 */
2190 2209 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2191 2210 return (0);
2192 2211 }
2193 2212
2194 2213 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2195 2214 if (gp == NULL) {
2196 2215 return (0);
2197 2216 }
2198 2217
2199 2218 mutex_enter(&gp->gp_mutex);
2200 2219
2201 2220 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2202 2221
2203 2222 if (lock) {
2204 2223 while (lock->l_vnode == vp) {
2205 2224 /* get NLM id from sysid */
2206 2225 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2207 2226
2208 2227 /*
2209 2228 * If NLM server request _and_ nlmid of lock matches
2210 2229 * nlmid of argument, then we've found a remote lock.
2211 2230 */
2212 2231 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2213 2232 result = 1;
2214 2233 goto done;
2215 2234 }
2216 2235 lock = lock->l_next;
2217 2236 }
2218 2237 }
2219 2238
2220 2239 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2221 2240
2222 2241 if (lock) {
2223 2242 while (lock->l_vnode == vp) {
2224 2243 /* get NLM id from sysid */
2225 2244 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2226 2245
2227 2246 /*
2228 2247 * If NLM server request _and_ nlmid of lock matches
2229 2248 * nlmid of argument, then we've found a remote lock.
2230 2249 */
2231 2250 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2232 2251 result = 1;
2233 2252 goto done;
2234 2253 }
2235 2254 lock = lock->l_next;
2236 2255 }
2237 2256 }
2238 2257
2239 2258 done:
2240 2259 mutex_exit(&gp->gp_mutex);
2241 2260 return (result);
2242 2261 }
2243 2262
2244 2263 /*
2245 2264 * Determine whether there are any locks for the given vnode with a remote
2246 2265 * sysid. Returns zero if not, non-zero if there are.
2247 2266 *
2248 2267 * Note that the return value from this function is potentially invalid
2249 2268 * once it has been returned. The caller is responsible for providing its
2250 2269 * own synchronization mechanism to ensure that the return value is useful
2251 2270 * (e.g., see nfs_lockcompletion()).
2252 2271 */
2253 2272 int
2254 2273 flk_has_remote_locks(vnode_t *vp)
2255 2274 {
2256 2275 lock_descriptor_t *lock;
2257 2276 int result = 0;
2258 2277 graph_t *gp;
2259 2278
2260 2279 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2261 2280 if (gp == NULL) {
2262 2281 return (0);
2263 2282 }
2264 2283
2265 2284 mutex_enter(&gp->gp_mutex);
2266 2285
2267 2286 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2268 2287
2269 2288 if (lock) {
2270 2289 while (lock->l_vnode == vp) {
2271 2290 if (IS_REMOTE(lock)) {
2272 2291 result = 1;
2273 2292 goto done;
2274 2293 }
2275 2294 lock = lock->l_next;
2276 2295 }
2277 2296 }
2278 2297
2279 2298 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2280 2299
2281 2300 if (lock) {
2282 2301 while (lock->l_vnode == vp) {
2283 2302 if (IS_REMOTE(lock)) {
2284 2303 result = 1;
2285 2304 goto done;
2286 2305 }
2287 2306 lock = lock->l_next;
2288 2307 }
2289 2308 }
2290 2309
2291 2310 done:
2292 2311 mutex_exit(&gp->gp_mutex);
2293 2312 return (result);
2294 2313 }
2295 2314
2296 2315 /*
2297 2316 * Determine whether there are any locks for the given vnode with a remote
2298 2317 * sysid matching given sysid.
2299 2318 * Used by the new (open source) NFS Lock Manager (NLM)
2300 2319 */
2301 2320 int
2302 2321 flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid)
2303 2322 {
2304 2323 lock_descriptor_t *lock;
2305 2324 int result = 0;
2306 2325 graph_t *gp;
2307 2326
2308 2327 if (sysid == 0)
2309 2328 return (0);
2310 2329
2311 2330 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2312 2331 if (gp == NULL) {
2313 2332 return (0);
2314 2333 }
2315 2334
2316 2335 mutex_enter(&gp->gp_mutex);
2317 2336
2318 2337 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2319 2338
2320 2339 if (lock) {
2321 2340 while (lock->l_vnode == vp) {
2322 2341 if (lock->l_flock.l_sysid == sysid) {
2323 2342 result = 1;
2324 2343 goto done;
2325 2344 }
2326 2345 lock = lock->l_next;
2327 2346 }
2328 2347 }
2329 2348
2330 2349 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2331 2350
2332 2351 if (lock) {
2333 2352 while (lock->l_vnode == vp) {
2334 2353 if (lock->l_flock.l_sysid == sysid) {
2335 2354 result = 1;
2336 2355 goto done;
2337 2356 }
2338 2357 lock = lock->l_next;
2339 2358 }
2340 2359 }
2341 2360
2342 2361 done:
2343 2362 mutex_exit(&gp->gp_mutex);
2344 2363 return (result);
2345 2364 }
2346 2365
2347 2366 /*
2348 2367 * Determine if there are any locks owned by the given sysid.
2349 2368 * Returns zero if not, non-zero if there are. Note that this return code
2350 2369 * could be derived from flk_get_{sleeping,active}_locks, but this routine
2351 2370 * avoids all the memory allocations of those routines.
2352 2371 *
2353 2372 * This routine has the same synchronization issues as
2354 2373 * flk_has_remote_locks.
2355 2374 */
2356 2375
2357 2376 int
2358 2377 flk_sysid_has_locks(int sysid, int lck_type)
2359 2378 {
2360 2379 int has_locks = 0;
2361 2380 lock_descriptor_t *lock;
2362 2381 graph_t *gp;
2363 2382 int i;
2364 2383
2365 2384 for (i = 0; i < HASH_SIZE && !has_locks; i++) {
2366 2385 mutex_enter(&flock_lock);
2367 2386 gp = lock_graph[i];
2368 2387 mutex_exit(&flock_lock);
2369 2388 if (gp == NULL) {
2370 2389 continue;
2371 2390 }
2372 2391
2373 2392 mutex_enter(&gp->gp_mutex);
2374 2393
2375 2394 if (lck_type & FLK_QUERY_ACTIVE) {
2376 2395 for (lock = ACTIVE_HEAD(gp)->l_next;
2377 2396 lock != ACTIVE_HEAD(gp) && !has_locks;
2378 2397 lock = lock->l_next) {
2379 2398 if (lock->l_flock.l_sysid == sysid)
2380 2399 has_locks = 1;
2381 2400 }
2382 2401 }
2383 2402
2384 2403 if (lck_type & FLK_QUERY_SLEEPING) {
2385 2404 for (lock = SLEEPING_HEAD(gp)->l_next;
2386 2405 lock != SLEEPING_HEAD(gp) && !has_locks;
2387 2406 lock = lock->l_next) {
2388 2407 if (lock->l_flock.l_sysid == sysid)
2389 2408 has_locks = 1;
2390 2409 }
2391 2410 }
2392 2411 mutex_exit(&gp->gp_mutex);
2393 2412 }
2394 2413
2395 2414 return (has_locks);
2396 2415 }
2397 2416
2398 2417
2399 2418 /*
2400 2419 * PSARC case 1997/292
2401 2420 *
2402 2421 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit
2403 2422 * quantity, the real sysid generated by the NLM server; the upper half
2404 2423 * identifies the node of the cluster where the NLM server ran.
2405 2424 * This routine is only called by an NLM server running in a cluster.
2406 2425 * Effects: Remove all locks held on behalf of the client identified
2407 2426 * by "sysid."
2408 2427 */
2409 2428 void
2410 2429 cl_flk_remove_locks_by_sysid(int sysid)
2411 2430 {
2412 2431 graph_t *gp;
2413 2432 int i;
2414 2433 lock_descriptor_t *lock, *nlock;
2415 2434
2416 2435 /*
2417 2436 * Check to see if node is booted as a cluster. If not, return.
2418 2437 */
2419 2438 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2420 2439 return;
2421 2440 }
2422 2441
2423 2442 ASSERT(sysid != 0);
2424 2443 for (i = 0; i < HASH_SIZE; i++) {
2425 2444 mutex_enter(&flock_lock);
2426 2445 gp = lock_graph[i];
2427 2446 mutex_exit(&flock_lock);
2428 2447
2429 2448 if (gp == NULL)
2430 2449 continue;
2431 2450
2432 2451 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */
2433 2452
2434 2453 /* signal sleeping requests so that they bail out */
2435 2454 lock = SLEEPING_HEAD(gp)->l_next;
2436 2455 while (lock != SLEEPING_HEAD(gp)) {
2437 2456 nlock = lock->l_next;
2438 2457 if (lock->l_flock.l_sysid == sysid) {
2439 2458 INTERRUPT_WAKEUP(lock);
2440 2459 }
2441 2460 lock = nlock;
2442 2461 }
2443 2462
2444 2463 /* delete active locks */
2445 2464 lock = ACTIVE_HEAD(gp)->l_next;
2446 2465 while (lock != ACTIVE_HEAD(gp)) {
2447 2466 nlock = lock->l_next;
2448 2467 if (lock->l_flock.l_sysid == sysid) {
2449 2468 flk_delete_active_lock(lock, 0);
2450 2469 flk_wakeup(lock, 1);
2451 2470 flk_free_lock(lock);
2452 2471 }
2453 2472 lock = nlock;
2454 2473 }
2455 2474 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */
2456 2475 }
2457 2476 }
2458 2477
2459 2478 /*
2460 2479 * Delete all locks in the system that belongs to the sysid of the request.
2461 2480 */
2462 2481
2463 2482 static void
2464 2483 flk_delete_locks_by_sysid(lock_descriptor_t *request)
2465 2484 {
2466 2485 int sysid = request->l_flock.l_sysid;
2467 2486 lock_descriptor_t *lock, *nlock;
2468 2487 graph_t *gp;
2469 2488 int i;
2470 2489
2471 2490 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
2472 2491 ASSERT(sysid != 0);
2473 2492
2474 2493 mutex_exit(&request->l_graph->gp_mutex);
2475 2494
2476 2495 for (i = 0; i < HASH_SIZE; i++) {
2477 2496 mutex_enter(&flock_lock);
2478 2497 gp = lock_graph[i];
2479 2498 mutex_exit(&flock_lock);
2480 2499
2481 2500 if (gp == NULL)
2482 2501 continue;
2483 2502
2484 2503 mutex_enter(&gp->gp_mutex);
2485 2504
2486 2505 /* signal sleeping requests so that they bail out */
2487 2506 lock = SLEEPING_HEAD(gp)->l_next;
2488 2507 while (lock != SLEEPING_HEAD(gp)) {
2489 2508 nlock = lock->l_next;
2490 2509 if (lock->l_flock.l_sysid == sysid) {
2491 2510 INTERRUPT_WAKEUP(lock);
2492 2511 }
2493 2512 lock = nlock;
2494 2513 }
2495 2514
2496 2515 /* delete active locks */
2497 2516 lock = ACTIVE_HEAD(gp)->l_next;
2498 2517 while (lock != ACTIVE_HEAD(gp)) {
2499 2518 nlock = lock->l_next;
2500 2519 if (lock->l_flock.l_sysid == sysid) {
2501 2520 flk_delete_active_lock(lock, 0);
2502 2521 flk_wakeup(lock, 1);
2503 2522 flk_free_lock(lock);
2504 2523 }
2505 2524 lock = nlock;
2506 2525 }
2507 2526 mutex_exit(&gp->gp_mutex);
2508 2527 }
2509 2528
2510 2529 mutex_enter(&request->l_graph->gp_mutex);
2511 2530 }
2512 2531
2513 2532 /*
2514 2533 * Clustering: Deletes PXFS locks
2515 2534 * Effects: Delete all locks on files in the given file system and with the
2516 2535 * given PXFS id.
2517 2536 */
2518 2537 void
2519 2538 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid)
2520 2539 {
2521 2540 lock_descriptor_t *lock, *nlock;
2522 2541 graph_t *gp;
2523 2542 int i;
2524 2543
2525 2544 for (i = 0; i < HASH_SIZE; i++) {
2526 2545 mutex_enter(&flock_lock);
2527 2546 gp = lock_graph[i];
2528 2547 mutex_exit(&flock_lock);
2529 2548
2530 2549 if (gp == NULL)
2531 2550 continue;
2532 2551
2533 2552 mutex_enter(&gp->gp_mutex);
2534 2553
2535 2554 /* signal sleeping requests so that they bail out */
2536 2555 lock = SLEEPING_HEAD(gp)->l_next;
2537 2556 while (lock != SLEEPING_HEAD(gp)) {
2538 2557 nlock = lock->l_next;
2539 2558 if (lock->l_vnode->v_vfsp == vfsp) {
2540 2559 ASSERT(IS_PXFS(lock));
2541 2560 if (GETPXFSID(lock->l_flock.l_sysid) ==
2542 2561 pxfsid) {
2543 2562 flk_set_state(lock,
2544 2563 FLK_CANCELLED_STATE);
2545 2564 flk_cancel_sleeping_lock(lock, 1);
2546 2565 }
2547 2566 }
2548 2567 lock = nlock;
2549 2568 }
2550 2569
2551 2570 /* delete active locks */
2552 2571 lock = ACTIVE_HEAD(gp)->l_next;
2553 2572 while (lock != ACTIVE_HEAD(gp)) {
2554 2573 nlock = lock->l_next;
2555 2574 if (lock->l_vnode->v_vfsp == vfsp) {
2556 2575 ASSERT(IS_PXFS(lock));
2557 2576 if (GETPXFSID(lock->l_flock.l_sysid) ==
2558 2577 pxfsid) {
2559 2578 flk_delete_active_lock(lock, 0);
2560 2579 flk_wakeup(lock, 1);
2561 2580 flk_free_lock(lock);
2562 2581 }
2563 2582 }
2564 2583 lock = nlock;
2565 2584 }
2566 2585 mutex_exit(&gp->gp_mutex);
2567 2586 }
2568 2587 }
2569 2588
2570 2589 /*
2571 2590 * Search for a sleeping lock manager lock which matches exactly this lock
2572 2591 * request; if one is found, fake a signal to cancel it.
2573 2592 *
2574 2593 * Return 1 if a matching lock was found, 0 otherwise.
2575 2594 */
2576 2595
2577 2596 static int
2578 2597 flk_canceled(lock_descriptor_t *request)
2579 2598 {
2580 2599 lock_descriptor_t *lock, *nlock;
2581 2600 graph_t *gp = request->l_graph;
2582 2601 vnode_t *vp = request->l_vnode;
2583 2602
2584 2603 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2585 2604 ASSERT(IS_LOCKMGR(request));
2586 2605 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2587 2606
2588 2607 if (lock) {
2589 2608 while (lock->l_vnode == vp) {
2590 2609 nlock = lock->l_next;
2591 2610 if (SAME_OWNER(lock, request) &&
2592 2611 lock->l_start == request->l_start &&
2593 2612 lock->l_end == request->l_end) {
2594 2613 INTERRUPT_WAKEUP(lock);
2595 2614 return (1);
2596 2615 }
2597 2616 lock = nlock;
2598 2617 }
2599 2618 }
2600 2619 return (0);
2601 2620 }
2602 2621
2603 2622 /*
2604 2623 * Remove all the locks for the vnode belonging to the given pid and sysid.
2605 2624 */
2606 2625
2607 2626 void
2608 2627 cleanlocks(vnode_t *vp, pid_t pid, int sysid)
2609 2628 {
2610 2629 graph_t *gp;
2611 2630 lock_descriptor_t *lock, *nlock;
2612 2631 lock_descriptor_t *link_stack;
2613 2632
2614 2633 STACK_INIT(link_stack);
2615 2634
2616 2635 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2617 2636
2618 2637 if (gp == NULL)
2619 2638 return;
2620 2639 mutex_enter(&gp->gp_mutex);
2621 2640
2622 2641 CHECK_SLEEPING_LOCKS(gp);
2623 2642 CHECK_ACTIVE_LOCKS(gp);
2624 2643
2625 2644 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2626 2645
2627 2646 if (lock) {
2628 2647 do {
2629 2648 nlock = lock->l_next;
2630 2649 if ((lock->l_flock.l_pid == pid ||
2631 2650 pid == IGN_PID) &&
2632 2651 lock->l_flock.l_sysid == sysid) {
2633 2652 CANCEL_WAKEUP(lock);
2634 2653 }
2635 2654 lock = nlock;
2636 2655 } while (lock->l_vnode == vp);
2637 2656 }
2638 2657
2639 2658 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2640 2659
2641 2660 if (lock) {
2642 2661 do {
2643 2662 nlock = lock->l_next;
2644 2663 if ((lock->l_flock.l_pid == pid ||
2645 2664 pid == IGN_PID) &&
2646 2665 lock->l_flock.l_sysid == sysid) {
2647 2666 flk_delete_active_lock(lock, 0);
2648 2667 STACK_PUSH(link_stack, lock, l_stack);
2649 2668 }
2650 2669 lock = nlock;
2651 2670 } while (lock->l_vnode == vp);
2652 2671 }
2653 2672
2654 2673 while ((lock = STACK_TOP(link_stack)) != NULL) {
2655 2674 STACK_POP(link_stack, l_stack);
2656 2675 flk_wakeup(lock, 1);
2657 2676 flk_free_lock(lock);
2658 2677 }
2659 2678
2660 2679 CHECK_SLEEPING_LOCKS(gp);
2661 2680 CHECK_ACTIVE_LOCKS(gp);
2662 2681 CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
2663 2682 mutex_exit(&gp->gp_mutex);
2664 2683 }
2665 2684
2666 2685
2667 2686 /*
2668 2687 * Called from 'fs' read and write routines for files that have mandatory
2669 2688 * locking enabled.
2670 2689 */
2671 2690
2672 2691 int
2673 2692 chklock(
2674 2693 struct vnode *vp,
2675 2694 int iomode,
2676 2695 u_offset_t offset,
2677 2696 ssize_t len,
2678 2697 int fmode,
2679 2698 caller_context_t *ct)
2680 2699 {
2681 2700 register int i;
2682 2701 struct flock64 bf;
2683 2702 int error = 0;
2684 2703
2685 2704 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
2686 2705 bf.l_whence = 0;
2687 2706 bf.l_start = offset;
2688 2707 bf.l_len = len;
2689 2708 if (ct == NULL) {
2690 2709 bf.l_pid = curproc->p_pid;
2691 2710 bf.l_sysid = 0;
2692 2711 } else {
2693 2712 bf.l_pid = ct->cc_pid;
2694 2713 bf.l_sysid = ct->cc_sysid;
2695 2714 }
2696 2715 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
2697 2716 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
2698 2717 bf.l_type != F_UNLCK)
2699 2718 error = i ? i : EAGAIN;
2700 2719 return (error);
2701 2720 }
2702 2721
2703 2722 /*
2704 2723 * convoff - converts the given data (start, whence) to the
2705 2724 * given whence.
2706 2725 */
2707 2726 int
2708 2727 convoff(vp, lckdat, whence, offset)
2709 2728 struct vnode *vp;
2710 2729 struct flock64 *lckdat;
2711 2730 int whence;
2712 2731 offset_t offset;
2713 2732 {
2714 2733 int error;
2715 2734 struct vattr vattr;
2716 2735
2717 2736 if ((lckdat->l_whence == 2) || (whence == 2)) {
2718 2737 vattr.va_mask = AT_SIZE;
2719 2738 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
2720 2739 return (error);
2721 2740 }
2722 2741
2723 2742 switch (lckdat->l_whence) {
2724 2743 case 1:
2725 2744 lckdat->l_start += offset;
2726 2745 break;
2727 2746 case 2:
2728 2747 lckdat->l_start += vattr.va_size;
2729 2748 /* FALLTHRU */
2730 2749 case 0:
2731 2750 break;
2732 2751 default:
2733 2752 return (EINVAL);
2734 2753 }
2735 2754
2736 2755 if (lckdat->l_start < 0)
2737 2756 return (EINVAL);
2738 2757
2739 2758 switch (whence) {
2740 2759 case 1:
2741 2760 lckdat->l_start -= offset;
2742 2761 break;
2743 2762 case 2:
2744 2763 lckdat->l_start -= vattr.va_size;
2745 2764 /* FALLTHRU */
2746 2765 case 0:
2747 2766 break;
2748 2767 default:
2749 2768 return (EINVAL);
2750 2769 }
2751 2770
2752 2771 lckdat->l_whence = (short)whence;
2753 2772 return (0);
2754 2773 }
2755 2774
2756 2775
2757 2776 /* proc_graph function definitions */
2758 2777
2759 2778 /*
2760 2779 * Function checks for deadlock due to the new 'lock'. If deadlock found
2761 2780 * edges of this lock are freed and returned.
2762 2781 */
2763 2782
2764 2783 static int
2765 2784 flk_check_deadlock(lock_descriptor_t *lock)
2766 2785 {
2767 2786 proc_vertex_t *start_vertex, *pvertex;
2768 2787 proc_vertex_t *dvertex;
2769 2788 proc_edge_t *pep, *ppep;
2770 2789 edge_t *ep, *nep;
2771 2790 proc_vertex_t *process_stack;
2772 2791
2773 2792 STACK_INIT(process_stack);
2774 2793
2775 2794 mutex_enter(&flock_lock);
2776 2795 start_vertex = flk_get_proc_vertex(lock);
2777 2796 ASSERT(start_vertex != NULL);
2778 2797
2779 2798 /* construct the edges from this process to other processes */
2780 2799
2781 2800 ep = FIRST_ADJ(lock);
2782 2801 while (ep != HEAD(lock)) {
2783 2802 proc_vertex_t *adj_proc;
2784 2803
2785 2804 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2786 2805 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
2787 2806 if (pep->to_proc == adj_proc) {
2788 2807 ASSERT(pep->refcount);
2789 2808 pep->refcount++;
2790 2809 break;
2791 2810 }
2792 2811 }
2793 2812 if (pep == NULL) {
2794 2813 pep = flk_get_proc_edge();
2795 2814 pep->to_proc = adj_proc;
2796 2815 pep->refcount = 1;
2797 2816 adj_proc->incount++;
2798 2817 pep->next = start_vertex->edge;
2799 2818 start_vertex->edge = pep;
2800 2819 }
2801 2820 ep = NEXT_ADJ(ep);
2802 2821 }
2803 2822
2804 2823 ep = FIRST_IN(lock);
2805 2824
2806 2825 while (ep != HEAD(lock)) {
2807 2826 proc_vertex_t *in_proc;
2808 2827
2809 2828 in_proc = flk_get_proc_vertex(ep->from_vertex);
2810 2829
2811 2830 for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
2812 2831 if (pep->to_proc == start_vertex) {
2813 2832 ASSERT(pep->refcount);
2814 2833 pep->refcount++;
2815 2834 break;
2816 2835 }
2817 2836 }
2818 2837 if (pep == NULL) {
2819 2838 pep = flk_get_proc_edge();
2820 2839 pep->to_proc = start_vertex;
2821 2840 pep->refcount = 1;
2822 2841 start_vertex->incount++;
2823 2842 pep->next = in_proc->edge;
2824 2843 in_proc->edge = pep;
2825 2844 }
2826 2845 ep = NEXT_IN(ep);
2827 2846 }
2828 2847
2829 2848 if (start_vertex->incount == 0) {
2830 2849 mutex_exit(&flock_lock);
2831 2850 return (0);
2832 2851 }
2833 2852
2834 2853 flk_proc_graph_uncolor();
2835 2854
2836 2855 start_vertex->p_sedge = start_vertex->edge;
2837 2856
2838 2857 STACK_PUSH(process_stack, start_vertex, p_stack);
2839 2858
2840 2859 while ((pvertex = STACK_TOP(process_stack)) != NULL) {
2841 2860 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
2842 2861 dvertex = pep->to_proc;
2843 2862 if (!PROC_ARRIVED(dvertex)) {
2844 2863 STACK_PUSH(process_stack, dvertex, p_stack);
2845 2864 dvertex->p_sedge = dvertex->edge;
2846 2865 PROC_ARRIVE(pvertex);
2847 2866 pvertex->p_sedge = pep->next;
2848 2867 break;
2849 2868 }
2850 2869 if (!PROC_DEPARTED(dvertex))
2851 2870 goto deadlock;
2852 2871 }
2853 2872 if (pep == NULL) {
2854 2873 PROC_DEPART(pvertex);
2855 2874 STACK_POP(process_stack, p_stack);
2856 2875 }
2857 2876 }
2858 2877 mutex_exit(&flock_lock);
2859 2878 return (0);
2860 2879
2861 2880 deadlock:
2862 2881
2863 2882 /* we remove all lock edges and proc edges */
2864 2883
2865 2884 ep = FIRST_ADJ(lock);
2866 2885 while (ep != HEAD(lock)) {
2867 2886 proc_vertex_t *adj_proc;
2868 2887 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2869 2888 nep = NEXT_ADJ(ep);
2870 2889 IN_LIST_REMOVE(ep);
2871 2890 ADJ_LIST_REMOVE(ep);
2872 2891 flk_free_edge(ep);
2873 2892 ppep = start_vertex->edge;
2874 2893 for (pep = start_vertex->edge; pep != NULL; ppep = pep,
2875 2894 pep = ppep->next) {
2876 2895 if (pep->to_proc == adj_proc) {
2877 2896 pep->refcount--;
2878 2897 if (pep->refcount == 0) {
2879 2898 if (pep == ppep) {
2880 2899 start_vertex->edge = pep->next;
2881 2900 } else {
2882 2901 ppep->next = pep->next;
2883 2902 }
2884 2903 adj_proc->incount--;
2885 2904 flk_proc_release(adj_proc);
2886 2905 flk_free_proc_edge(pep);
2887 2906 }
2888 2907 break;
2889 2908 }
2890 2909 }
2891 2910 ep = nep;
2892 2911 }
2893 2912 ep = FIRST_IN(lock);
2894 2913 while (ep != HEAD(lock)) {
2895 2914 proc_vertex_t *in_proc;
2896 2915 in_proc = flk_get_proc_vertex(ep->from_vertex);
2897 2916 nep = NEXT_IN(ep);
2898 2917 IN_LIST_REMOVE(ep);
2899 2918 ADJ_LIST_REMOVE(ep);
2900 2919 flk_free_edge(ep);
2901 2920 ppep = in_proc->edge;
2902 2921 for (pep = in_proc->edge; pep != NULL; ppep = pep,
2903 2922 pep = ppep->next) {
2904 2923 if (pep->to_proc == start_vertex) {
2905 2924 pep->refcount--;
2906 2925 if (pep->refcount == 0) {
2907 2926 if (pep == ppep) {
2908 2927 in_proc->edge = pep->next;
2909 2928 } else {
2910 2929 ppep->next = pep->next;
2911 2930 }
2912 2931 start_vertex->incount--;
2913 2932 flk_proc_release(in_proc);
2914 2933 flk_free_proc_edge(pep);
2915 2934 }
2916 2935 break;
2917 2936 }
2918 2937 }
2919 2938 ep = nep;
2920 2939 }
2921 2940 flk_proc_release(start_vertex);
2922 2941 mutex_exit(&flock_lock);
2923 2942 return (1);
2924 2943 }
2925 2944
2926 2945 /*
2927 2946 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex
2928 2947 * from the list we return that, otherwise we allocate one. If necessary,
2929 2948 * we grow the list of vertices also.
2930 2949 */
2931 2950
2932 2951 static proc_vertex_t *
2933 2952 flk_get_proc_vertex(lock_descriptor_t *lock)
2934 2953 {
2935 2954 int i;
2936 2955 proc_vertex_t *pv;
2937 2956 proc_vertex_t **palloc;
2938 2957
2939 2958 ASSERT(MUTEX_HELD(&flock_lock));
2940 2959 if (lock->pvertex != -1) {
2941 2960 ASSERT(lock->pvertex >= 0);
2942 2961 pv = pgraph.proc[lock->pvertex];
2943 2962 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2944 2963 return (pv);
2945 2964 }
2946 2965 }
2947 2966 for (i = 0; i < pgraph.gcount; i++) {
2948 2967 pv = pgraph.proc[i];
2949 2968 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
2950 2969 lock->pvertex = pv->index = i;
2951 2970 return (pv);
2952 2971 }
2953 2972 }
2954 2973 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
2955 2974 pv->pid = lock->l_flock.l_pid;
2956 2975 pv->sysid = lock->l_flock.l_sysid;
2957 2976 flk_proc_vertex_allocs++;
2958 2977 if (pgraph.free != 0) {
2959 2978 for (i = 0; i < pgraph.gcount; i++) {
2960 2979 if (pgraph.proc[i] == NULL) {
2961 2980 pgraph.proc[i] = pv;
2962 2981 lock->pvertex = pv->index = i;
2963 2982 pgraph.free--;
2964 2983 return (pv);
2965 2984 }
2966 2985 }
2967 2986 }
2968 2987 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
2969 2988 sizeof (proc_vertex_t *), KM_SLEEP);
2970 2989
2971 2990 if (pgraph.proc) {
2972 2991 bcopy(pgraph.proc, palloc,
2973 2992 pgraph.gcount * sizeof (proc_vertex_t *));
2974 2993
2975 2994 kmem_free(pgraph.proc,
2976 2995 pgraph.gcount * sizeof (proc_vertex_t *));
2977 2996 }
2978 2997 pgraph.proc = palloc;
2979 2998 pgraph.free += (PROC_CHUNK - 1);
2980 2999 pv->index = lock->pvertex = pgraph.gcount;
2981 3000 pgraph.gcount += PROC_CHUNK;
2982 3001 pgraph.proc[pv->index] = pv;
2983 3002 return (pv);
2984 3003 }
2985 3004
2986 3005 /*
2987 3006 * Allocate a proc edge.
2988 3007 */
2989 3008
2990 3009 static proc_edge_t *
2991 3010 flk_get_proc_edge()
2992 3011 {
2993 3012 proc_edge_t *pep;
2994 3013
2995 3014 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
2996 3015 flk_proc_edge_allocs++;
2997 3016 return (pep);
2998 3017 }
2999 3018
3000 3019 /*
3001 3020 * Free the proc edge. Called whenever its reference count goes to zero.
3002 3021 */
3003 3022
3004 3023 static void
3005 3024 flk_free_proc_edge(proc_edge_t *pep)
3006 3025 {
3007 3026 ASSERT(pep->refcount == 0);
3008 3027 kmem_free((void *)pep, sizeof (proc_edge_t));
3009 3028 flk_proc_edge_frees++;
3010 3029 }
3011 3030
3012 3031 /*
3013 3032 * Color the graph explicitly done only when the mark value hits max value.
3014 3033 */
3015 3034
3016 3035 static void
3017 3036 flk_proc_graph_uncolor()
3018 3037 {
3019 3038 int i;
3020 3039
3021 3040 if (pgraph.mark == UINT_MAX) {
3022 3041 for (i = 0; i < pgraph.gcount; i++)
3023 3042 if (pgraph.proc[i] != NULL) {
3024 3043 pgraph.proc[i]->atime = 0;
3025 3044 pgraph.proc[i]->dtime = 0;
3026 3045 }
3027 3046 pgraph.mark = 1;
3028 3047 } else {
3029 3048 pgraph.mark++;
3030 3049 }
3031 3050 }
3032 3051
3033 3052 /*
3034 3053 * Release the proc vertex iff both there are no in edges and out edges
3035 3054 */
3036 3055
3037 3056 static void
3038 3057 flk_proc_release(proc_vertex_t *proc)
3039 3058 {
3040 3059 ASSERT(MUTEX_HELD(&flock_lock));
3041 3060 if (proc->edge == NULL && proc->incount == 0) {
3042 3061 pgraph.proc[proc->index] = NULL;
3043 3062 pgraph.free++;
3044 3063 kmem_free(proc, sizeof (proc_vertex_t));
3045 3064 flk_proc_vertex_frees++;
3046 3065 }
3047 3066 }
3048 3067
3049 3068 /*
3050 3069 * Updates process graph to reflect change in a lock_graph.
3051 3070 * Note: We should call this function only after we have a correctly
3052 3071 * recomputed lock graph. Otherwise we might miss a deadlock detection.
3053 3072 * eg: in function flk_relation() we call this function after flk_recompute_
3054 3073 * dependencies() otherwise if a process tries to lock a vnode hashed
3055 3074 * into another graph it might sleep for ever.
3056 3075 */
3057 3076
3058 3077 static void
3059 3078 flk_update_proc_graph(edge_t *ep, int delete)
3060 3079 {
3061 3080 proc_vertex_t *toproc, *fromproc;
3062 3081 proc_edge_t *pep, *prevpep;
3063 3082
3064 3083 mutex_enter(&flock_lock);
3065 3084 toproc = flk_get_proc_vertex(ep->to_vertex);
3066 3085 fromproc = flk_get_proc_vertex(ep->from_vertex);
3067 3086
3068 3087 if (!delete)
3069 3088 goto add;
3070 3089 pep = prevpep = fromproc->edge;
3071 3090
3072 3091 ASSERT(pep != NULL);
3073 3092 while (pep != NULL) {
3074 3093 if (pep->to_proc == toproc) {
3075 3094 ASSERT(pep->refcount > 0);
3076 3095 pep->refcount--;
3077 3096 if (pep->refcount == 0) {
3078 3097 if (pep == prevpep) {
3079 3098 fromproc->edge = pep->next;
3080 3099 } else {
3081 3100 prevpep->next = pep->next;
3082 3101 }
3083 3102 toproc->incount--;
3084 3103 flk_proc_release(toproc);
3085 3104 flk_free_proc_edge(pep);
3086 3105 }
3087 3106 break;
3088 3107 }
3089 3108 prevpep = pep;
3090 3109 pep = pep->next;
3091 3110 }
3092 3111 flk_proc_release(fromproc);
3093 3112 mutex_exit(&flock_lock);
3094 3113 return;
3095 3114 add:
3096 3115
3097 3116 pep = fromproc->edge;
3098 3117
3099 3118 while (pep != NULL) {
3100 3119 if (pep->to_proc == toproc) {
3101 3120 ASSERT(pep->refcount > 0);
3102 3121 pep->refcount++;
3103 3122 break;
3104 3123 }
3105 3124 pep = pep->next;
3106 3125 }
3107 3126 if (pep == NULL) {
3108 3127 pep = flk_get_proc_edge();
3109 3128 pep->to_proc = toproc;
3110 3129 pep->refcount = 1;
3111 3130 toproc->incount++;
3112 3131 pep->next = fromproc->edge;
3113 3132 fromproc->edge = pep;
3114 3133 }
3115 3134 mutex_exit(&flock_lock);
3116 3135 }
3117 3136
3118 3137 /*
3119 3138 * Set the control status for lock manager requests.
3120 3139 *
3121 3140 */
3122 3141
3123 3142 /*
3124 3143 * PSARC case 1997/292
3125 3144 *
3126 3145 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid().
3127 3146 * Effects: Set the state of the NLM server identified by "nlmid"
3128 3147 * in the NLM registry to state "nlm_state."
3129 3148 * Raises exception no_such_nlm if "nlmid" doesn't identify a known
3130 3149 * NLM server to this LLM.
3131 3150 * Note that when this routine is called with NLM_SHUTTING_DOWN there
3132 3151 * may be locks requests that have gotten started but not finished. In
3133 3152 * particular, there may be blocking requests that are in the callback code
3134 3153 * before sleeping (so they're not holding the lock for the graph). If
3135 3154 * such a thread reacquires the graph's lock (to go to sleep) after
3136 3155 * NLM state in the NLM registry is set to a non-up value,
3137 3156 * it will notice the status and bail out. If the request gets
3138 3157 * granted before the thread can check the NLM registry, let it
3139 3158 * continue normally. It will get flushed when we are called with NLM_DOWN.
3140 3159 *
3141 3160 * Modifies: nlm_reg_obj (global)
3142 3161 * Arguments:
3143 3162 * nlmid (IN): id uniquely identifying an NLM server
3144 3163 * nlm_state (IN): NLM server state to change "nlmid" to
3145 3164 */
3146 3165 void
3147 3166 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state)
3148 3167 {
3149 3168 /*
3150 3169 * Check to see if node is booted as a cluster. If not, return.
3151 3170 */
3152 3171 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3153 3172 return;
3154 3173 }
3155 3174
3156 3175 /*
3157 3176 * Check for development/debugging. It is possible to boot a node
3158 3177 * in non-cluster mode, and then run a special script, currently
3159 3178 * available only to developers, to bring up the node as part of a
3160 3179 * cluster. The problem is that running such a script does not
3161 3180 * result in the routine flk_init() being called and hence global array
3162 3181 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode,
3163 3182 * but the LLM needs to do an additional check to see if the global
3164 3183 * array has been created or not. If nlm_reg_status is NULL, then
3165 3184 * return, else continue.
3166 3185 */
3167 3186 if (nlm_reg_status == NULL) {
3168 3187 return;
3169 3188 }
3170 3189
3171 3190 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3172 3191 mutex_enter(&nlm_reg_lock);
3173 3192
3174 3193 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) {
3175 3194 /*
3176 3195 * If the NLM server "nlmid" is unknown in the NLM registry,
3177 3196 * add it to the registry in the nlm shutting down state.
3178 3197 */
3179 3198 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3180 3199 FLK_NLM_SHUTTING_DOWN);
3181 3200 } else {
3182 3201 /*
3183 3202 * Change the state of the NLM server identified by "nlmid"
3184 3203 * in the NLM registry to the argument "nlm_state."
3185 3204 */
3186 3205 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3187 3206 nlm_state);
3188 3207 }
3189 3208
3190 3209 /*
3191 3210 * The reason we must register the NLM server that is shutting down
3192 3211 * with an LLM that doesn't already know about it (never sent a lock
3193 3212 * request) is to handle correctly a race between shutdown and a new
3194 3213 * lock request. Suppose that a shutdown request from the NLM server
3195 3214 * invokes this routine at the LLM, and a thread is spawned to
3196 3215 * service the request. Now suppose a new lock request is in
3197 3216 * progress and has already passed the first line of defense in
3198 3217 * reclock(), which denies new locks requests from NLM servers
3199 3218 * that are not in the NLM_UP state. After the current routine
3200 3219 * is invoked for both phases of shutdown, the routine will return,
3201 3220 * having done nothing, and the lock request will proceed and
3202 3221 * probably be granted. The problem is that the shutdown was ignored
3203 3222 * by the lock request because there was no record of that NLM server
3204 3223 * shutting down. We will be in the peculiar position of thinking
3205 3224 * that we've shutdown the NLM server and all locks at all LLMs have
3206 3225 * been discarded, but in fact there's still one lock held.
3207 3226 * The solution is to record the existence of NLM server and change
3208 3227 * its state immediately to NLM_SHUTTING_DOWN. The lock request in
3209 3228 * progress may proceed because the next phase NLM_DOWN will catch
3210 3229 * this lock and discard it.
3211 3230 */
3212 3231 mutex_exit(&nlm_reg_lock);
3213 3232
3214 3233 switch (nlm_state) {
3215 3234 case FLK_NLM_UP:
3216 3235 /*
3217 3236 * Change the NLM state of all locks still held on behalf of
3218 3237 * the NLM server identified by "nlmid" to NLM_UP.
3219 3238 */
3220 3239 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP);
3221 3240 break;
3222 3241
3223 3242 case FLK_NLM_SHUTTING_DOWN:
3224 3243 /*
3225 3244 * Wake up all sleeping locks for the NLM server identified
3226 3245 * by "nlmid." Note that eventually all woken threads will
3227 3246 * have their lock requests cancelled and descriptors
3228 3247 * removed from the sleeping lock list. Note that the NLM
3229 3248 * server state associated with each lock descriptor is
3230 3249 * changed to FLK_NLM_SHUTTING_DOWN.
3231 3250 */
3232 3251 cl_flk_wakeup_sleeping_nlm_locks(nlmid);
3233 3252 break;
3234 3253
3235 3254 case FLK_NLM_DOWN:
3236 3255 /*
3237 3256 * Discard all active, granted locks for this NLM server
3238 3257 * identified by "nlmid."
3239 3258 */
3240 3259 cl_flk_unlock_nlm_granted(nlmid);
3241 3260 break;
3242 3261
3243 3262 default:
3244 3263 panic("cl_set_nlm_status: bad status (%d)", nlm_state);
3245 3264 }
3246 3265 }
3247 3266
3248 3267 /*
3249 3268 * Set the control status for lock manager requests.
3250 3269 *
3251 3270 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there
3252 3271 * may be locks requests that have gotten started but not finished. In
3253 3272 * particular, there may be blocking requests that are in the callback code
3254 3273 * before sleeping (so they're not holding the lock for the graph). If
3255 3274 * such a thread reacquires the graph's lock (to go to sleep) after
3256 3275 * flk_lockmgr_status is set to a non-up value, it will notice the status
3257 3276 * and bail out. If the request gets granted before the thread can check
3258 3277 * flk_lockmgr_status, let it continue normally. It will get flushed when
3259 3278 * we are called with FLK_LOCKMGR_DOWN.
3260 3279 */
3261 3280
3262 3281 void
3263 3282 flk_set_lockmgr_status(flk_lockmgr_status_t status)
3264 3283 {
3265 3284 int i;
3266 3285 graph_t *gp;
3267 3286 struct flock_globals *fg;
3268 3287
3269 3288 fg = flk_get_globals();
3270 3289 ASSERT(fg != NULL);
3271 3290
3272 3291 mutex_enter(&flock_lock);
3273 3292 fg->flk_lockmgr_status = status;
3274 3293 mutex_exit(&flock_lock);
3275 3294
3276 3295 /*
3277 3296 * If the lock manager is coming back up, all that's needed is to
3278 3297 * propagate this information to the graphs. If the lock manager
3279 3298 * is going down, additional action is required, and each graph's
3280 3299 * copy of the state is updated atomically with this other action.
3281 3300 */
3282 3301 switch (status) {
3283 3302 case FLK_LOCKMGR_UP:
3284 3303 for (i = 0; i < HASH_SIZE; i++) {
3285 3304 mutex_enter(&flock_lock);
3286 3305 gp = lock_graph[i];
3287 3306 mutex_exit(&flock_lock);
3288 3307 if (gp == NULL)
3289 3308 continue;
3290 3309 mutex_enter(&gp->gp_mutex);
3291 3310 fg->lockmgr_status[i] = status;
3292 3311 mutex_exit(&gp->gp_mutex);
3293 3312 }
3294 3313 break;
3295 3314 case FLK_WAKEUP_SLEEPERS:
3296 3315 wakeup_sleeping_lockmgr_locks(fg);
3297 3316 break;
3298 3317 case FLK_LOCKMGR_DOWN:
3299 3318 unlock_lockmgr_granted(fg);
3300 3319 break;
3301 3320 default:
3302 3321 panic("flk_set_lockmgr_status: bad status (%d)", status);
3303 3322 break;
3304 3323 }
3305 3324 }
3306 3325
3307 3326 /*
3308 3327 * This routine returns all the locks that are active or sleeping and are
3309 3328 * associated with a particular set of identifiers. If lock_state != 0, then
3310 3329 * only locks that match the lock_state are returned. If lock_state == 0, then
3311 3330 * all locks are returned. If pid == NOPID, the pid is ignored. If
3312 3331 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the
3313 3332 * vnode pointer is ignored.
3314 3333 *
3315 3334 * A list containing the vnode pointer and an flock structure
3316 3335 * describing the lock is returned. Each element in the list is
3317 3336 * dynamically allocated and must be freed by the caller. The
3318 3337 * last item in the list is denoted by a NULL value in the ll_next
3319 3338 * field.
3320 3339 *
3321 3340 * The vnode pointers returned are held. The caller is responsible
3322 3341 * for releasing these. Note that the returned list is only a snapshot of
3323 3342 * the current lock information, and that it is a snapshot of a moving
3324 3343 * target (only one graph is locked at a time).
3325 3344 */
3326 3345
3327 3346 locklist_t *
3328 3347 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
3329 3348 pid_t pid, const vnode_t *vp, zoneid_t zoneid)
3330 3349 {
3331 3350 lock_descriptor_t *lock;
3332 3351 lock_descriptor_t *graph_head;
3333 3352 locklist_t listhead;
3334 3353 locklist_t *llheadp;
3335 3354 locklist_t *llp;
3336 3355 locklist_t *lltp;
3337 3356 graph_t *gp;
3338 3357 int i;
3339 3358 int first_index; /* graph index */
3340 3359 int num_indexes; /* graph index */
3341 3360
3342 3361 ASSERT((list_type == FLK_ACTIVE_STATE) ||
3343 3362 (list_type == FLK_SLEEPING_STATE));
3344 3363
3345 3364 /*
3346 3365 * Get a pointer to something to use as a list head while building
3347 3366 * the rest of the list.
3348 3367 */
3349 3368 llheadp = &listhead;
3350 3369 lltp = llheadp;
3351 3370 llheadp->ll_next = (locklist_t *)NULL;
3352 3371
3353 3372 /* Figure out which graphs we want to look at. */
3354 3373 if (vp == NULL) {
3355 3374 first_index = 0;
3356 3375 num_indexes = HASH_SIZE;
3357 3376 } else {
3358 3377 first_index = HASH_INDEX(vp);
3359 3378 num_indexes = 1;
3360 3379 }
3361 3380
3362 3381 for (i = first_index; i < first_index + num_indexes; i++) {
3363 3382 mutex_enter(&flock_lock);
3364 3383 gp = lock_graph[i];
3365 3384 mutex_exit(&flock_lock);
3366 3385 if (gp == NULL) {
3367 3386 continue;
3368 3387 }
3369 3388
3370 3389 mutex_enter(&gp->gp_mutex);
3371 3390 graph_head = (list_type == FLK_ACTIVE_STATE) ?
3372 3391 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
3373 3392 for (lock = graph_head->l_next;
3374 3393 lock != graph_head;
3375 3394 lock = lock->l_next) {
3376 3395 if (use_sysid && lock->l_flock.l_sysid != sysid)
3377 3396 continue;
3378 3397 if (pid != NOPID && lock->l_flock.l_pid != pid)
3379 3398 continue;
3380 3399 if (vp != NULL && lock->l_vnode != vp)
3381 3400 continue;
3382 3401 if (lock_state && !(lock_state & lock->l_state))
3383 3402 continue;
3384 3403 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
3385 3404 continue;
3386 3405 /*
3387 3406 * A matching lock was found. Allocate
3388 3407 * space for a new locklist entry and fill
3389 3408 * it in.
3390 3409 */
3391 3410 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
3392 3411 lltp->ll_next = llp;
3393 3412 VN_HOLD(lock->l_vnode);
3394 3413 llp->ll_vp = lock->l_vnode;
3395 3414 create_flock(lock, &(llp->ll_flock));
3396 3415 llp->ll_next = (locklist_t *)NULL;
3397 3416 lltp = llp;
3398 3417 }
3399 3418 mutex_exit(&gp->gp_mutex);
3400 3419 }
3401 3420
3402 3421 llp = llheadp->ll_next;
3403 3422 return (llp);
3404 3423 }
3405 3424
3406 3425 /*
3407 3426 * These two functions are simply interfaces to get_lock_list. They return
3408 3427 * a list of sleeping or active locks for the given sysid and pid. See
3409 3428 * get_lock_list for details.
3410 3429 *
3411 3430 * In either case we don't particularly care to specify the zone of interest;
3412 3431 * the sysid-space is global across zones, so the sysid will map to exactly one
3413 3432 * zone, and we'll return information for that zone.
3414 3433 */
3415 3434
3416 3435 locklist_t *
3417 3436 flk_get_sleeping_locks(int sysid, pid_t pid)
3418 3437 {
3419 3438 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
3420 3439 ALL_ZONES));
3421 3440 }
3422 3441
3423 3442 locklist_t *
3424 3443 flk_get_active_locks(int sysid, pid_t pid)
3425 3444 {
3426 3445 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
3427 3446 ALL_ZONES));
3428 3447 }
3429 3448
3430 3449 /*
3431 3450 * Another interface to get_lock_list. This one returns all the active
3432 3451 * locks for a given vnode. Again, see get_lock_list for details.
3433 3452 *
3434 3453 * We don't need to specify which zone's locks we're interested in. The matter
3435 3454 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't
3436 3455 * be used by multiple zones, so the list of locks will all be from the right
3437 3456 * zone.
3438 3457 */
3439 3458
3440 3459 locklist_t *
3441 3460 flk_active_locks_for_vp(const vnode_t *vp)
3442 3461 {
3443 3462 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
3444 3463 ALL_ZONES));
3445 3464 }
3446 3465
3447 3466 /*
3448 3467 * Another interface to get_lock_list. This one returns all the active
3449 3468 * nbmand locks for a given vnode. Again, see get_lock_list for details.
3450 3469 *
3451 3470 * See the comment for flk_active_locks_for_vp() for why we don't care to
3452 3471 * specify the particular zone of interest.
3453 3472 */
3454 3473 locklist_t *
3455 3474 flk_active_nbmand_locks_for_vp(const vnode_t *vp)
3456 3475 {
3457 3476 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3458 3477 NOPID, vp, ALL_ZONES));
3459 3478 }
3460 3479
3461 3480 /*
3462 3481 * Another interface to get_lock_list. This one returns all the active
3463 3482 * nbmand locks for a given pid. Again, see get_lock_list for details.
3464 3483 *
3465 3484 * The zone doesn't need to be specified here; the locks held by a
3466 3485 * particular process will either be local (ie, non-NFS) or from the zone
3467 3486 * the process is executing in. This is because other parts of the system
3468 3487 * ensure that an NFS vnode can't be used in a zone other than that in
3469 3488 * which it was opened.
3470 3489 */
3471 3490 locklist_t *
3472 3491 flk_active_nbmand_locks(pid_t pid)
3473 3492 {
3474 3493 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3475 3494 pid, NULL, ALL_ZONES));
3476 3495 }
3477 3496
3478 3497 /*
3479 3498 * Free up all entries in the locklist.
3480 3499 */
3481 3500 void
3482 3501 flk_free_locklist(locklist_t *llp)
3483 3502 {
3484 3503 locklist_t *next_llp;
3485 3504
3486 3505 while (llp) {
3487 3506 next_llp = llp->ll_next;
3488 3507 VN_RELE(llp->ll_vp);
3489 3508 kmem_free(llp, sizeof (*llp));
3490 3509 llp = next_llp;
3491 3510 }
3492 3511 }
3493 3512
3494 3513 static void
3495 3514 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state)
3496 3515 {
3497 3516 /*
3498 3517 * For each graph "lg" in the hash table lock_graph do
3499 3518 * a. Get the list of sleeping locks
3500 3519 * b. For each lock descriptor in the list do
3501 3520 * i. If the requested lock is an NLM server request AND
3502 3521 * the nlmid is the same as the routine argument then
3503 3522 * change the lock descriptor's state field to
3504 3523 * "nlm_state."
3505 3524 * c. Get the list of active locks
3506 3525 * d. For each lock descriptor in the list do
3507 3526 * i. If the requested lock is an NLM server request AND
3508 3527 * the nlmid is the same as the routine argument then
3509 3528 * change the lock descriptor's state field to
3510 3529 * "nlm_state."
3511 3530 */
3512 3531
3513 3532 int i;
3514 3533 graph_t *gp; /* lock graph */
3515 3534 lock_descriptor_t *lock; /* lock */
3516 3535 lock_descriptor_t *nlock = NULL; /* next lock */
3517 3536 int lock_nlmid;
3518 3537
3519 3538 for (i = 0; i < HASH_SIZE; i++) {
3520 3539 mutex_enter(&flock_lock);
3521 3540 gp = lock_graph[i];
3522 3541 mutex_exit(&flock_lock);
3523 3542 if (gp == NULL) {
3524 3543 continue;
3525 3544 }
3526 3545
3527 3546 /* Get list of sleeping locks in current lock graph. */
3528 3547 mutex_enter(&gp->gp_mutex);
3529 3548 for (lock = SLEEPING_HEAD(gp)->l_next;
3530 3549 lock != SLEEPING_HEAD(gp);
3531 3550 lock = nlock) {
3532 3551 nlock = lock->l_next;
3533 3552 /* get NLM id */
3534 3553 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3535 3554
3536 3555 /*
3537 3556 * If NLM server request AND nlmid of lock matches
3538 3557 * nlmid of argument, then set the NLM state of the
3539 3558 * lock to "nlm_state."
3540 3559 */
3541 3560 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3542 3561 SET_NLM_STATE(lock, nlm_state);
3543 3562 }
3544 3563 }
3545 3564
3546 3565 /* Get list of active locks in current lock graph. */
3547 3566 for (lock = ACTIVE_HEAD(gp)->l_next;
3548 3567 lock != ACTIVE_HEAD(gp);
3549 3568 lock = nlock) {
3550 3569 nlock = lock->l_next;
3551 3570 /* get NLM id */
3552 3571 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3553 3572
3554 3573 /*
3555 3574 * If NLM server request AND nlmid of lock matches
3556 3575 * nlmid of argument, then set the NLM state of the
3557 3576 * lock to "nlm_state."
3558 3577 */
3559 3578 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3560 3579 ASSERT(IS_ACTIVE(lock));
3561 3580 SET_NLM_STATE(lock, nlm_state);
3562 3581 }
3563 3582 }
3564 3583 mutex_exit(&gp->gp_mutex);
3565 3584 }
3566 3585 }
3567 3586
3568 3587 /*
3569 3588 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid().
3570 3589 * Effects: Find all sleeping lock manager requests _only_ for the NLM server
3571 3590 * identified by "nlmid." Poke those lock requests.
3572 3591 */
3573 3592 static void
3574 3593 cl_flk_wakeup_sleeping_nlm_locks(int nlmid)
3575 3594 {
3576 3595 lock_descriptor_t *lock;
3577 3596 lock_descriptor_t *nlock = NULL; /* next lock */
3578 3597 int i;
3579 3598 graph_t *gp;
3580 3599 int lock_nlmid;
3581 3600
3582 3601 for (i = 0; i < HASH_SIZE; i++) {
3583 3602 mutex_enter(&flock_lock);
3584 3603 gp = lock_graph[i];
3585 3604 mutex_exit(&flock_lock);
3586 3605 if (gp == NULL) {
3587 3606 continue;
3588 3607 }
3589 3608
3590 3609 mutex_enter(&gp->gp_mutex);
3591 3610 for (lock = SLEEPING_HEAD(gp)->l_next;
3592 3611 lock != SLEEPING_HEAD(gp);
3593 3612 lock = nlock) {
3594 3613 nlock = lock->l_next;
3595 3614 /*
3596 3615 * If NLM server request _and_ nlmid of lock matches
3597 3616 * nlmid of argument, then set the NLM state of the
3598 3617 * lock to NLM_SHUTTING_DOWN, and wake up sleeping
3599 3618 * request.
3600 3619 */
3601 3620 if (IS_LOCKMGR(lock)) {
3602 3621 /* get NLM id */
3603 3622 lock_nlmid =
3604 3623 GETNLMID(lock->l_flock.l_sysid);
3605 3624 if (nlmid == lock_nlmid) {
3606 3625 SET_NLM_STATE(lock,
3607 3626 FLK_NLM_SHUTTING_DOWN);
3608 3627 INTERRUPT_WAKEUP(lock);
3609 3628 }
3610 3629 }
3611 3630 }
3612 3631 mutex_exit(&gp->gp_mutex);
3613 3632 }
3614 3633 }
3615 3634
3616 3635 /*
3617 3636 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid()
3618 3637 * Effects: Find all active (granted) lock manager locks _only_ for the
3619 3638 * NLM server identified by "nlmid" and release them.
3620 3639 */
3621 3640 static void
3622 3641 cl_flk_unlock_nlm_granted(int nlmid)
3623 3642 {
3624 3643 lock_descriptor_t *lock;
3625 3644 lock_descriptor_t *nlock = NULL; /* next lock */
3626 3645 int i;
3627 3646 graph_t *gp;
3628 3647 int lock_nlmid;
3629 3648
3630 3649 for (i = 0; i < HASH_SIZE; i++) {
3631 3650 mutex_enter(&flock_lock);
3632 3651 gp = lock_graph[i];
3633 3652 mutex_exit(&flock_lock);
3634 3653 if (gp == NULL) {
3635 3654 continue;
3636 3655 }
3637 3656
3638 3657 mutex_enter(&gp->gp_mutex);
3639 3658 for (lock = ACTIVE_HEAD(gp)->l_next;
3640 3659 lock != ACTIVE_HEAD(gp);
3641 3660 lock = nlock) {
3642 3661 nlock = lock->l_next;
3643 3662 ASSERT(IS_ACTIVE(lock));
3644 3663
3645 3664 /*
3646 3665 * If it's an NLM server request _and_ nlmid of
3647 3666 * the lock matches nlmid of argument, then
3648 3667 * remove the active lock the list, wakup blocked
3649 3668 * threads, and free the storage for the lock.
3650 3669 * Note that there's no need to mark the NLM state
3651 3670 * of this lock to NLM_DOWN because the lock will
3652 3671 * be deleted anyway and its storage freed.
3653 3672 */
3654 3673 if (IS_LOCKMGR(lock)) {
3655 3674 /* get NLM id */
3656 3675 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3657 3676 if (nlmid == lock_nlmid) {
3658 3677 flk_delete_active_lock(lock, 0);
3659 3678 flk_wakeup(lock, 1);
3660 3679 flk_free_lock(lock);
3661 3680 }
3662 3681 }
3663 3682 }
3664 3683 mutex_exit(&gp->gp_mutex);
3665 3684 }
3666 3685 }
3667 3686
3668 3687 /*
3669 3688 * Find all sleeping lock manager requests and poke them.
3670 3689 */
3671 3690 static void
3672 3691 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
3673 3692 {
3674 3693 lock_descriptor_t *lock;
3675 3694 lock_descriptor_t *nlock = NULL; /* next lock */
3676 3695 int i;
3677 3696 graph_t *gp;
3678 3697 zoneid_t zoneid = getzoneid();
3679 3698
3680 3699 for (i = 0; i < HASH_SIZE; i++) {
3681 3700 mutex_enter(&flock_lock);
3682 3701 gp = lock_graph[i];
3683 3702 mutex_exit(&flock_lock);
3684 3703 if (gp == NULL) {
3685 3704 continue;
3686 3705 }
3687 3706
3688 3707 mutex_enter(&gp->gp_mutex);
3689 3708 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
3690 3709 for (lock = SLEEPING_HEAD(gp)->l_next;
3691 3710 lock != SLEEPING_HEAD(gp);
3692 3711 lock = nlock) {
3693 3712 nlock = lock->l_next;
3694 3713 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3695 3714 INTERRUPT_WAKEUP(lock);
3696 3715 }
3697 3716 }
3698 3717 mutex_exit(&gp->gp_mutex);
3699 3718 }
3700 3719 }
3701 3720
3702 3721
3703 3722 /*
3704 3723 * Find all active (granted) lock manager locks and release them.
3705 3724 */
3706 3725 static void
3707 3726 unlock_lockmgr_granted(struct flock_globals *fg)
3708 3727 {
3709 3728 lock_descriptor_t *lock;
3710 3729 lock_descriptor_t *nlock = NULL; /* next lock */
3711 3730 int i;
3712 3731 graph_t *gp;
3713 3732 zoneid_t zoneid = getzoneid();
3714 3733
3715 3734 for (i = 0; i < HASH_SIZE; i++) {
3716 3735 mutex_enter(&flock_lock);
3717 3736 gp = lock_graph[i];
3718 3737 mutex_exit(&flock_lock);
3719 3738 if (gp == NULL) {
3720 3739 continue;
3721 3740 }
3722 3741
3723 3742 mutex_enter(&gp->gp_mutex);
3724 3743 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
3725 3744 for (lock = ACTIVE_HEAD(gp)->l_next;
3726 3745 lock != ACTIVE_HEAD(gp);
3727 3746 lock = nlock) {
3728 3747 nlock = lock->l_next;
3729 3748 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3730 3749 ASSERT(IS_ACTIVE(lock));
3731 3750 flk_delete_active_lock(lock, 0);
3732 3751 flk_wakeup(lock, 1);
3733 3752 flk_free_lock(lock);
3734 3753 }
3735 3754 }
3736 3755 mutex_exit(&gp->gp_mutex);
3737 3756 }
3738 3757 }
3739 3758
3740 3759
3741 3760 /*
3742 3761 * Wait until a lock is granted, cancelled, or interrupted.
3743 3762 */
3744 3763
3745 3764 static void
3746 3765 wait_for_lock(lock_descriptor_t *request)
3747 3766 {
3748 3767 graph_t *gp = request->l_graph;
3749 3768
3750 3769 ASSERT(MUTEX_HELD(&gp->gp_mutex));
3751 3770
3752 3771 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
3753 3772 !(IS_INTERRUPTED(request))) {
3754 3773 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
3755 3774 flk_set_state(request, FLK_INTERRUPTED_STATE);
3756 3775 request->l_state |= INTERRUPTED_LOCK;
3757 3776 }
3758 3777 }
3759 3778 }
3760 3779
3761 3780 /*
3762 3781 * Create an flock structure from the existing lock information
3763 3782 *
3764 3783 * This routine is used to create flock structures for the lock manager
3765 3784 * to use in a reclaim request. Since the lock was originated on this
3766 3785 * host, it must be conforming to UNIX semantics, so no checking is
3767 3786 * done to make sure it falls within the lower half of the 32-bit range.
3768 3787 */
3769 3788
3770 3789 static void
3771 3790 create_flock(lock_descriptor_t *lp, flock64_t *flp)
3772 3791 {
3773 3792 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
3774 3793 ASSERT(lp->l_end >= lp->l_start);
3775 3794
3776 3795 flp->l_type = lp->l_type;
3777 3796 flp->l_whence = 0;
3778 3797 flp->l_start = lp->l_start;
3779 3798 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
3780 3799 (lp->l_end - lp->l_start + 1);
3781 3800 flp->l_sysid = lp->l_flock.l_sysid;
3782 3801 flp->l_pid = lp->l_flock.l_pid;
3783 3802 }
3784 3803
3785 3804 /*
3786 3805 * Convert flock_t data describing a lock range into unsigned long starting
3787 3806 * and ending points, which are put into lock_request. Returns 0 or an
3788 3807 * errno value.
3789 3808 * Large Files: max is passed by the caller and we return EOVERFLOW
3790 3809 * as defined by LFS API.
3791 3810 */
3792 3811
3793 3812 int
3794 3813 flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
3795 3814 u_offset_t *start, u_offset_t *end, offset_t offset)
3796 3815 {
3797 3816 struct vattr vattr;
3798 3817 int error;
3799 3818
3800 3819 /*
3801 3820 * Determine the starting point of the request
3802 3821 */
3803 3822 switch (flp->l_whence) {
3804 3823 case 0: /* SEEK_SET */
3805 3824 *start = (u_offset_t)flp->l_start;
3806 3825 break;
3807 3826 case 1: /* SEEK_CUR */
3808 3827 *start = (u_offset_t)(flp->l_start + offset);
3809 3828 break;
3810 3829 case 2: /* SEEK_END */
3811 3830 vattr.va_mask = AT_SIZE;
3812 3831 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
3813 3832 return (error);
3814 3833 *start = (u_offset_t)(flp->l_start + vattr.va_size);
3815 3834 break;
3816 3835 default:
3817 3836 return (EINVAL);
3818 3837 }
3819 3838
3820 3839 /*
3821 3840 * Determine the range covered by the request.
3822 3841 */
3823 3842 if (flp->l_len == 0)
3824 3843 *end = MAX_U_OFFSET_T;
3825 3844 else if ((offset_t)flp->l_len > 0) {
3826 3845 *end = (u_offset_t)(*start + (flp->l_len - 1));
3827 3846 } else {
3828 3847 /*
3829 3848 * Negative length; why do we even allow this ?
3830 3849 * Because this allows easy specification of
3831 3850 * the last n bytes of the file.
3832 3851 */
3833 3852 *end = *start;
3834 3853 *start += (u_offset_t)flp->l_len;
3835 3854 (*start)++;
3836 3855 }
3837 3856 return (0);
3838 3857 }
3839 3858
3840 3859 /*
3841 3860 * Check the validity of lock data. This can used by the NFS
3842 3861 * frlock routines to check data before contacting the server. The
3843 3862 * server must support semantics that aren't as restrictive as
3844 3863 * the UNIX API, so the NFS client is required to check.
3845 3864 * The maximum is now passed in by the caller.
3846 3865 */
3847 3866
3848 3867 int
3849 3868 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
3850 3869 {
3851 3870 /*
3852 3871 * The end (length) for local locking should never be greater
3853 3872 * than MAXEND. However, the representation for
3854 3873 * the entire file is MAX_U_OFFSET_T.
3855 3874 */
3856 3875 if ((start > max) ||
3857 3876 ((end > max) && (end != MAX_U_OFFSET_T))) {
3858 3877 return (EINVAL);
3859 3878 }
3860 3879 if (start > end) {
3861 3880 return (EINVAL);
3862 3881 }
3863 3882 return (0);
3864 3883 }
3865 3884
3866 3885 /*
3867 3886 * Fill in request->l_flock with information about the lock blocking the
3868 3887 * request. The complexity here is that lock manager requests are allowed
3869 3888 * to see into the upper part of the 32-bit address range, whereas local
3870 3889 * requests are only allowed to see signed values.
3871 3890 *
3872 3891 * What should be done when "blocker" is a lock manager lock that uses the
3873 3892 * upper portion of the 32-bit range, but "request" is local? Since the
3874 3893 * request has already been determined to have been blocked by the blocker,
3875 3894 * at least some portion of "blocker" must be in the range of the request,
3876 3895 * or the request extends to the end of file. For the first case, the
3877 3896 * portion in the lower range is returned with the indication that it goes
3878 3897 * "to EOF." For the second case, the last byte of the lower range is
3879 3898 * returned with the indication that it goes "to EOF."
3880 3899 */
3881 3900
3882 3901 static void
3883 3902 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
3884 3903 {
3885 3904 flock64_t *flrp; /* l_flock portion of request */
3886 3905
3887 3906 ASSERT(blocker != NULL);
3888 3907
3889 3908 flrp = &request->l_flock;
3890 3909 flrp->l_whence = 0;
3891 3910 flrp->l_type = blocker->l_type;
3892 3911 flrp->l_pid = blocker->l_flock.l_pid;
3893 3912 flrp->l_sysid = blocker->l_flock.l_sysid;
3894 3913
3895 3914 if (IS_LOCKMGR(request)) {
3896 3915 flrp->l_start = blocker->l_start;
3897 3916 if (blocker->l_end == MAX_U_OFFSET_T)
3898 3917 flrp->l_len = 0;
3899 3918 else
3900 3919 flrp->l_len = blocker->l_end - blocker->l_start + 1;
3901 3920 } else {
3902 3921 if (blocker->l_start > MAXEND) {
3903 3922 flrp->l_start = MAXEND;
3904 3923 flrp->l_len = 0;
3905 3924 } else {
3906 3925 flrp->l_start = blocker->l_start;
3907 3926 if (blocker->l_end == MAX_U_OFFSET_T)
3908 3927 flrp->l_len = 0;
3909 3928 else
3910 3929 flrp->l_len = blocker->l_end -
3911 3930 blocker->l_start + 1;
3912 3931 }
3913 3932 }
3914 3933 }
3915 3934
3916 3935 /*
3917 3936 * PSARC case 1997/292
3918 3937 */
3919 3938 /*
3920 3939 * This is the public routine exported by flock.h.
3921 3940 */
3922 3941 void
3923 3942 cl_flk_change_nlm_state_to_unknown(int nlmid)
3924 3943 {
3925 3944 /*
3926 3945 * Check to see if node is booted as a cluster. If not, return.
3927 3946 */
3928 3947 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3929 3948 return;
3930 3949 }
3931 3950
3932 3951 /*
3933 3952 * See comment in cl_flk_set_nlm_status().
3934 3953 */
3935 3954 if (nlm_reg_status == NULL) {
3936 3955 return;
3937 3956 }
3938 3957
3939 3958 /*
3940 3959 * protect NLM registry state with a mutex.
3941 3960 */
3942 3961 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3943 3962 mutex_enter(&nlm_reg_lock);
3944 3963 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN);
3945 3964 mutex_exit(&nlm_reg_lock);
3946 3965 }
3947 3966
3948 3967 /*
3949 3968 * Return non-zero if the given I/O request conflicts with an active NBMAND
3950 3969 * lock.
3951 3970 * If svmand is non-zero, it means look at all active locks, not just NBMAND
3952 3971 * locks.
3953 3972 */
3954 3973
3955 3974 int
3956 3975 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset,
3957 3976 ssize_t length, int svmand, caller_context_t *ct)
3958 3977 {
3959 3978 int conflict = 0;
3960 3979 graph_t *gp;
3961 3980 lock_descriptor_t *lock;
3962 3981 pid_t pid;
3963 3982 int sysid;
3964 3983
3965 3984 if (ct == NULL) {
3966 3985 pid = curproc->p_pid;
3967 3986 sysid = 0;
3968 3987 } else {
3969 3988 pid = ct->cc_pid;
3970 3989 sysid = ct->cc_sysid;
3971 3990 }
3972 3991
3973 3992 mutex_enter(&flock_lock);
3974 3993 gp = lock_graph[HASH_INDEX(vp)];
3975 3994 mutex_exit(&flock_lock);
3976 3995 if (gp == NULL)
3977 3996 return (0);
3978 3997
3979 3998 mutex_enter(&gp->gp_mutex);
3980 3999 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
3981 4000
3982 4001 for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
3983 4002 if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
3984 4003 (lock->l_flock.l_sysid != sysid ||
3985 4004 lock->l_flock.l_pid != pid) &&
3986 4005 lock_blocks_io(op, offset, length,
3987 4006 lock->l_type, lock->l_start, lock->l_end)) {
3988 4007 conflict = 1;
3989 4008 break;
3990 4009 }
3991 4010 }
3992 4011 mutex_exit(&gp->gp_mutex);
3993 4012
3994 4013 return (conflict);
3995 4014 }
3996 4015
3997 4016 /*
3998 4017 * Return non-zero if the given I/O request conflicts with the given lock.
3999 4018 */
4000 4019
4001 4020 static int
4002 4021 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length,
4003 4022 int lock_type, u_offset_t lock_start, u_offset_t lock_end)
4004 4023 {
4005 4024 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
4006 4025 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
4007 4026
4008 4027 if (op == NBL_READ && lock_type == F_RDLCK)
4009 4028 return (0);
4010 4029
4011 4030 if (offset <= lock_start && lock_start < offset + length)
4012 4031 return (1);
4013 4032 if (lock_start <= offset && offset <= lock_end)
4014 4033 return (1);
4015 4034
4016 4035 return (0);
4017 4036 }
4018 4037
4019 4038 #ifdef DEBUG
4020 4039 static void
4021 4040 check_active_locks(graph_t *gp)
4022 4041 {
4023 4042 lock_descriptor_t *lock, *lock1;
4024 4043 edge_t *ep;
4025 4044
4026 4045 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
4027 4046 lock = lock->l_next) {
4028 4047 ASSERT(IS_ACTIVE(lock));
4029 4048 ASSERT(NOT_BLOCKED(lock));
4030 4049 ASSERT(!IS_BARRIER(lock));
4031 4050
4032 4051 ep = FIRST_IN(lock);
4033 4052
4034 4053 while (ep != HEAD(lock)) {
4035 4054 ASSERT(IS_SLEEPING(ep->from_vertex));
4036 4055 ASSERT(!NOT_BLOCKED(ep->from_vertex));
4037 4056 ep = NEXT_IN(ep);
4038 4057 }
4039 4058
4040 4059 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
4041 4060 lock1 = lock1->l_next) {
4042 4061 if (lock1->l_vnode == lock->l_vnode) {
4043 4062 if (BLOCKS(lock1, lock)) {
4044 4063 cmn_err(CE_PANIC,
4045 4064 "active lock %p blocks %p",
4046 4065 (void *)lock1, (void *)lock);
4047 4066 } else if (BLOCKS(lock, lock1)) {
4048 4067 cmn_err(CE_PANIC,
4049 4068 "active lock %p blocks %p",
4050 4069 (void *)lock, (void *)lock1);
4051 4070 }
4052 4071 }
4053 4072 }
4054 4073 }
4055 4074 }
4056 4075
4057 4076 /*
4058 4077 * Effect: This functions checks to see if the transition from 'old_state' to
4059 4078 * 'new_state' is a valid one. It returns 0 if the transition is valid
4060 4079 * and 1 if it is not.
4061 4080 * For a map of valid transitions, see sys/flock_impl.h
4062 4081 */
4063 4082 static int
4064 4083 check_lock_transition(int old_state, int new_state)
4065 4084 {
4066 4085 switch (old_state) {
4067 4086 case FLK_INITIAL_STATE:
4068 4087 if ((new_state == FLK_START_STATE) ||
4069 4088 (new_state == FLK_SLEEPING_STATE) ||
4070 4089 (new_state == FLK_ACTIVE_STATE) ||
4071 4090 (new_state == FLK_DEAD_STATE)) {
4072 4091 return (0);
4073 4092 } else {
4074 4093 return (1);
4075 4094 }
4076 4095 case FLK_START_STATE:
4077 4096 if ((new_state == FLK_ACTIVE_STATE) ||
4078 4097 (new_state == FLK_DEAD_STATE)) {
4079 4098 return (0);
4080 4099 } else {
4081 4100 return (1);
4082 4101 }
4083 4102 case FLK_ACTIVE_STATE:
4084 4103 if (new_state == FLK_DEAD_STATE) {
4085 4104 return (0);
4086 4105 } else {
4087 4106 return (1);
4088 4107 }
4089 4108 case FLK_SLEEPING_STATE:
4090 4109 if ((new_state == FLK_GRANTED_STATE) ||
4091 4110 (new_state == FLK_INTERRUPTED_STATE) ||
4092 4111 (new_state == FLK_CANCELLED_STATE)) {
4093 4112 return (0);
4094 4113 } else {
4095 4114 return (1);
4096 4115 }
4097 4116 case FLK_GRANTED_STATE:
4098 4117 if ((new_state == FLK_START_STATE) ||
4099 4118 (new_state == FLK_INTERRUPTED_STATE) ||
4100 4119 (new_state == FLK_CANCELLED_STATE)) {
4101 4120 return (0);
4102 4121 } else {
4103 4122 return (1);
4104 4123 }
4105 4124 case FLK_CANCELLED_STATE:
4106 4125 if ((new_state == FLK_INTERRUPTED_STATE) ||
4107 4126 (new_state == FLK_DEAD_STATE)) {
4108 4127 return (0);
4109 4128 } else {
4110 4129 return (1);
4111 4130 }
4112 4131 case FLK_INTERRUPTED_STATE:
4113 4132 if (new_state == FLK_DEAD_STATE) {
4114 4133 return (0);
4115 4134 } else {
4116 4135 return (1);
4117 4136 }
4118 4137 case FLK_DEAD_STATE:
4119 4138 /* May be set more than once */
4120 4139 if (new_state == FLK_DEAD_STATE) {
4121 4140 return (0);
4122 4141 } else {
4123 4142 return (1);
4124 4143 }
4125 4144 default:
4126 4145 return (1);
4127 4146 }
4128 4147 }
4129 4148
4130 4149 static void
4131 4150 check_sleeping_locks(graph_t *gp)
4132 4151 {
4133 4152 lock_descriptor_t *lock1, *lock2;
4134 4153 edge_t *ep;
4135 4154 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
4136 4155 lock1 = lock1->l_next) {
4137 4156 ASSERT(!IS_BARRIER(lock1));
4138 4157 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
4139 4158 lock2 = lock2->l_next) {
4140 4159 if (lock1->l_vnode == lock2->l_vnode) {
4141 4160 if (BLOCKS(lock2, lock1)) {
4142 4161 ASSERT(!IS_GRANTED(lock1));
4143 4162 ASSERT(!NOT_BLOCKED(lock1));
4144 4163 path(lock1, lock2);
4145 4164 }
4146 4165 }
4147 4166 }
4148 4167
4149 4168 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
4150 4169 lock2 = lock2->l_next) {
4151 4170 ASSERT(!IS_BARRIER(lock1));
4152 4171 if (lock1->l_vnode == lock2->l_vnode) {
4153 4172 if (BLOCKS(lock2, lock1)) {
4154 4173 ASSERT(!IS_GRANTED(lock1));
4155 4174 ASSERT(!NOT_BLOCKED(lock1));
4156 4175 path(lock1, lock2);
4157 4176 }
4158 4177 }
4159 4178 }
4160 4179 ep = FIRST_ADJ(lock1);
4161 4180 while (ep != HEAD(lock1)) {
4162 4181 ASSERT(BLOCKS(ep->to_vertex, lock1));
4163 4182 ep = NEXT_ADJ(ep);
4164 4183 }
4165 4184 }
4166 4185 }
4167 4186
4168 4187 static int
4169 4188 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
4170 4189 {
4171 4190 edge_t *ep;
4172 4191 lock_descriptor_t *vertex;
4173 4192 lock_descriptor_t *vertex_stack;
4174 4193
4175 4194 STACK_INIT(vertex_stack);
4176 4195
4177 4196 flk_graph_uncolor(lock1->l_graph);
4178 4197 ep = FIRST_ADJ(lock1);
4179 4198 ASSERT(ep != HEAD(lock1));
4180 4199 while (ep != HEAD(lock1)) {
4181 4200 if (no_path)
4182 4201 ASSERT(ep->to_vertex != lock2);
4183 4202 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4184 4203 COLOR(ep->to_vertex);
4185 4204 ep = NEXT_ADJ(ep);
4186 4205 }
4187 4206
4188 4207 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
4189 4208 STACK_POP(vertex_stack, l_dstack);
4190 4209 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
4191 4210 ep = NEXT_ADJ(ep)) {
4192 4211 if (COLORED(ep->to_vertex))
4193 4212 continue;
4194 4213 COLOR(ep->to_vertex);
4195 4214 if (ep->to_vertex == lock2)
4196 4215 return (1);
4197 4216
4198 4217 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4199 4218 }
4200 4219 }
4201 4220 return (0);
4202 4221 }
4203 4222
4204 4223 static void
4205 4224 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
4206 4225 {
4207 4226 lock_descriptor_t *lock;
4208 4227
4209 4228 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4210 4229
4211 4230 if (lock) {
4212 4231 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
4213 4232 if (lock->l_flock.l_pid == pid &&
4214 4233 lock->l_flock.l_sysid == sysid)
4215 4234 cmn_err(CE_PANIC,
4216 4235 "owner pid %d's lock %p in active queue",
4217 4236 pid, (void *)lock);
4218 4237 lock = lock->l_next;
4219 4238 }
4220 4239 }
4221 4240 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
4222 4241
4223 4242 if (lock) {
4224 4243 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
4225 4244 if (lock->l_flock.l_pid == pid &&
4226 4245 lock->l_flock.l_sysid == sysid)
4227 4246 cmn_err(CE_PANIC,
4228 4247 "owner pid %d's lock %p in sleep queue",
4229 4248 pid, (void *)lock);
4230 4249 lock = lock->l_next;
4231 4250 }
4232 4251 }
4233 4252 }
4234 4253
4235 4254 static int
4236 4255 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4237 4256 {
4238 4257 edge_t *ep = FIRST_ADJ(lock1);
4239 4258
4240 4259 while (ep != HEAD(lock1)) {
4241 4260 if (ep->to_vertex == lock2)
4242 4261 return (1);
4243 4262 else
4244 4263 ep = NEXT_ADJ(ep);
4245 4264 }
4246 4265 return (0);
4247 4266 }
4248 4267
4249 4268 static int
4250 4269 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4251 4270 {
4252 4271 return (!level_two_path(lock1, lock2, 1));
4253 4272 }
4254 4273
4255 4274 static void
4256 4275 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4257 4276 {
4258 4277 if (level_one_path(lock1, lock2)) {
4259 4278 if (level_two_path(lock1, lock2, 0) != 0) {
4260 4279 cmn_err(CE_WARN,
4261 4280 "one edge one path from lock1 %p lock2 %p",
4262 4281 (void *)lock1, (void *)lock2);
4263 4282 }
4264 4283 } else if (no_path(lock1, lock2)) {
4265 4284 cmn_err(CE_PANIC,
4266 4285 "No path from lock1 %p to lock2 %p",
4267 4286 (void *)lock1, (void *)lock2);
4268 4287 }
4269 4288 }
4270 4289 #endif /* DEBUG */
↓ open down ↓ |
2159 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX