7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
28 */
29
30 #include <sys/param.h>
31 #include <sys/thread.h>
32 #include <sys/cmn_err.h>
33 #include <sys/debug.h>
34 #include <sys/cpuvar.h>
35 #include <sys/sobject.h>
36 #include <sys/turnstile.h>
37 #include <sys/rwlock.h>
38 #include <sys/rwlock_impl.h>
39 #include <sys/atomic.h>
40 #include <sys/lockstat.h>
41
42 /*
43 * Big Theory Statement for readers/writer locking primitives.
44 *
45 * An rwlock provides exclusive access to a single thread ("writer") or
46 * concurrent access to multiple threads ("readers"). See rwlock(9F)
47 * for a full description of the interfaces and programming model.
252 static int
253 rw_locked(rwlock_impl_t *lp, krw_t rw)
254 {
255 uintptr_t old = lp->rw_wwwh;
256
257 if (rw == RW_READER || rw == RW_READER_STARVEWRITER)
258 return ((old & RW_LOCKED) && !(old & RW_WRITE_LOCKED));
259
260 if (rw == RW_WRITER)
261 return ((old & RW_OWNER) == (uintptr_t)curthread);
262
263 return (0);
264 }
265
266 uint_t (*rw_lock_backoff)(uint_t) = NULL;
267 void (*rw_lock_delay)(uint_t) = NULL;
268
269 /*
270 * Full-service implementation of rw_enter() to handle all the hard cases.
271 * Called from the assembly version if anything complicated is going on.
272 * The only semantic difference between calling rw_enter() and calling
273 * rw_enter_sleep() directly is that we assume the caller has already done
274 * a THREAD_KPRI_REQUEST() in the RW_READER cases.
275 */
276 void
277 rw_enter_sleep(rwlock_impl_t *lp, krw_t rw)
278 {
279 uintptr_t old, new, lock_value, lock_busy, lock_wait;
280 hrtime_t sleep_time;
281 turnstile_t *ts;
282 uint_t backoff = 0;
283 int loop_count = 0;
284
285 if (rw == RW_READER) {
286 lock_value = RW_READ_LOCK;
287 lock_busy = RW_WRITE_CLAIMED;
288 lock_wait = RW_HAS_WAITERS;
289 } else if (rw == RW_READER_STARVEWRITER) {
290 lock_value = RW_READ_LOCK;
291 lock_busy = RW_WRITE_LOCKED;
292 lock_wait = RW_HAS_WAITERS;
293 } else {
294 lock_value = RW_WRITE_LOCK(curthread);
325 return;
326 }
327
328 ts = turnstile_lookup(lp);
329
330 do {
331 if (((old = lp->rw_wwwh) & lock_busy) == 0)
332 break;
333 new = old | lock_wait;
334 } while (old != new && casip(&lp->rw_wwwh, old, new) != old);
335
336 if ((old & lock_busy) == 0) {
337 /*
338 * The lock appears free now; try the dance again
339 */
340 turnstile_exit(lp);
341 continue;
342 }
343
344 /*
345 * We really are going to block. Bump the stats, and drop
346 * kpri if we're a reader.
347 */
348 ASSERT(lp->rw_wwwh & lock_wait);
349 ASSERT(lp->rw_wwwh & RW_LOCKED);
350
351 sleep_time = -gethrtime();
352 if (rw != RW_WRITER) {
353 THREAD_KPRI_RELEASE();
354 CPU_STATS_ADDQ(CPU, sys, rw_rdfails, 1);
355 (void) turnstile_block(ts, TS_READER_Q, lp,
356 &rw_sobj_ops, NULL, NULL);
357 } else {
358 CPU_STATS_ADDQ(CPU, sys, rw_wrfails, 1);
359 (void) turnstile_block(ts, TS_WRITER_Q, lp,
360 &rw_sobj_ops, NULL, NULL);
361 }
362 sleep_time += gethrtime();
363
364 LOCKSTAT_RECORD4(LS_RW_ENTER_BLOCK, lp, sleep_time, rw,
365 (old & RW_WRITE_LOCKED) ? 1 : 0,
366 old >> RW_HOLD_COUNT_SHIFT);
367
368 /*
369 * We wake up holding the lock (and having kpri if we're
370 * a reader) via direct handoff from the previous owner.
371 */
372 break;
373 }
374
375 ASSERT(rw_locked(lp, rw));
376
377 membar_enter();
378
379 LOCKSTAT_RECORD(LS_RW_ENTER_ACQUIRE, lp, rw);
380 }
381
382 /*
383 * Return the number of readers to wake, or zero if we should wake a writer.
384 * Called only by exiting/downgrading writers (readers don't wake readers).
385 */
386 static int
387 rw_readers_to_wake(turnstile_t *ts)
388 {
389 kthread_t *next_writer = ts->ts_sleepq[TS_WRITER_Q].sq_first;
390 kthread_t *next_reader = ts->ts_sleepq[TS_READER_Q].sq_first;
391 pri_t wpri = (next_writer != NULL) ? DISP_PRIO(next_writer) : -1;
392 int count = 0;
393
394 while (next_reader != NULL) {
395 if (DISP_PRIO(next_reader) < wpri)
396 break;
397 next_reader->t_kpri_req++;
398 next_reader = next_reader->t_link;
399 count++;
400 }
401 return (count);
402 }
403
404 /*
405 * Full-service implementation of rw_exit() to handle all the hard cases.
406 * Called from the assembly version if anything complicated is going on.
407 * There is no semantic difference between calling rw_exit() and calling
408 * rw_exit_wakeup() directly.
409 */
410 void
411 rw_exit_wakeup(rwlock_impl_t *lp)
412 {
413 turnstile_t *ts;
414 uintptr_t old, new, lock_value;
415 kthread_t *next_writer;
416 int nreaders;
417 uint_t backoff = 0;
506 * since the lock is read-locked there would be no
507 * reason for other readers to have blocked unless
508 * the RW_WRITE_WANTED bit was set. In case (B),
509 * since there are waiters but no waiting readers,
510 * they must all be waiting writers.
511 */
512 ASSERT(lp->rw_wwwh & RW_WRITE_WANTED);
513 new = RW_WRITE_LOCK(next_writer);
514 if (ts->ts_waiters > 1)
515 new |= RW_HAS_WAITERS;
516 if (next_writer->t_link)
517 new |= RW_WRITE_WANTED;
518 lp->rw_wwwh = new;
519 membar_enter();
520 turnstile_wakeup(ts, TS_WRITER_Q, 1, next_writer);
521 }
522 break;
523 }
524
525 if (lock_value == RW_READ_LOCK) {
526 THREAD_KPRI_RELEASE();
527 LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_READER);
528 } else {
529 LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_WRITER);
530 }
531 }
532
533 int
534 rw_tryenter(krwlock_t *rwlp, krw_t rw)
535 {
536 rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
537 uintptr_t old;
538
539 if (rw != RW_WRITER) {
540 uint_t backoff = 0;
541 int loop_count = 0;
542 THREAD_KPRI_REQUEST();
543 for (;;) {
544 if ((old = lp->rw_wwwh) & (rw == RW_READER ?
545 RW_WRITE_CLAIMED : RW_WRITE_LOCKED)) {
546 THREAD_KPRI_RELEASE();
547 return (0);
548 }
549 if (casip(&lp->rw_wwwh, old, old + RW_READ_LOCK) == old)
550 break;
551 if (rw_lock_delay != NULL) {
552 backoff = rw_lock_backoff(backoff);
553 rw_lock_delay(backoff);
554 if (++loop_count == ncpus_online) {
555 backoff = 0;
556 loop_count = 0;
557 }
558 }
559 }
560 LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw);
561 } else {
562 if (casip(&lp->rw_wwwh, 0, RW_WRITE_LOCK(curthread)) != 0)
563 return (0);
564 LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw);
565 }
566 ASSERT(rw_locked(lp, rw));
567 membar_enter();
568 return (1);
569 }
570
571 void
572 rw_downgrade(krwlock_t *rwlp)
573 {
574 rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
575
576 THREAD_KPRI_REQUEST();
577 membar_exit();
578
579 if ((lp->rw_wwwh & RW_OWNER) != (uintptr_t)curthread) {
580 rw_panic("rw_downgrade: not owner", lp);
581 return;
582 }
583
584 if (atomic_add_ip_nv(&lp->rw_wwwh,
585 RW_READ_LOCK - RW_WRITE_LOCK(curthread)) & RW_HAS_WAITERS) {
586 turnstile_t *ts = turnstile_lookup(lp);
587 int nreaders = rw_readers_to_wake(ts);
588 if (nreaders > 0) {
589 uintptr_t delta = nreaders * RW_READ_LOCK;
590 if (ts->ts_waiters == nreaders)
591 delta -= RW_HAS_WAITERS;
592 atomic_add_ip(&lp->rw_wwwh, delta);
593 }
594 turnstile_wakeup(ts, TS_READER_Q, nreaders, NULL);
595 }
596 ASSERT(rw_locked(lp, RW_READER));
597 LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, lp);
598 }
599
600 int
601 rw_tryupgrade(krwlock_t *rwlp)
602 {
603 rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
604 uintptr_t old, new;
605
606 ASSERT(rw_locked(lp, RW_READER));
607
608 do {
609 if (((old = lp->rw_wwwh) & ~RW_HAS_WAITERS) != RW_READ_LOCK)
610 return (0);
611 new = old + RW_WRITE_LOCK(curthread) - RW_READ_LOCK;
612 } while (casip(&lp->rw_wwwh, old, new) != old);
613
614 membar_enter();
615 THREAD_KPRI_RELEASE();
616 LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, lp);
617 ASSERT(rw_locked(lp, RW_WRITER));
618 return (1);
619 }
620
621 int
622 rw_read_held(krwlock_t *rwlp)
623 {
624 uintptr_t tmp;
625
626 return (_RW_READ_HELD(rwlp, tmp));
627 }
628
629 int
630 rw_write_held(krwlock_t *rwlp)
631 {
632 return (_RW_WRITE_HELD(rwlp));
633 }
634
635 int
|
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2019 Joyent, Inc.
28 */
29
30 #include <sys/param.h>
31 #include <sys/thread.h>
32 #include <sys/cmn_err.h>
33 #include <sys/debug.h>
34 #include <sys/cpuvar.h>
35 #include <sys/sobject.h>
36 #include <sys/turnstile.h>
37 #include <sys/rwlock.h>
38 #include <sys/rwlock_impl.h>
39 #include <sys/atomic.h>
40 #include <sys/lockstat.h>
41
42 /*
43 * Big Theory Statement for readers/writer locking primitives.
44 *
45 * An rwlock provides exclusive access to a single thread ("writer") or
46 * concurrent access to multiple threads ("readers"). See rwlock(9F)
47 * for a full description of the interfaces and programming model.
252 static int
253 rw_locked(rwlock_impl_t *lp, krw_t rw)
254 {
255 uintptr_t old = lp->rw_wwwh;
256
257 if (rw == RW_READER || rw == RW_READER_STARVEWRITER)
258 return ((old & RW_LOCKED) && !(old & RW_WRITE_LOCKED));
259
260 if (rw == RW_WRITER)
261 return ((old & RW_OWNER) == (uintptr_t)curthread);
262
263 return (0);
264 }
265
266 uint_t (*rw_lock_backoff)(uint_t) = NULL;
267 void (*rw_lock_delay)(uint_t) = NULL;
268
269 /*
270 * Full-service implementation of rw_enter() to handle all the hard cases.
271 * Called from the assembly version if anything complicated is going on.
272 */
273 void
274 rw_enter_sleep(rwlock_impl_t *lp, krw_t rw)
275 {
276 uintptr_t old, new, lock_value, lock_busy, lock_wait;
277 hrtime_t sleep_time;
278 turnstile_t *ts;
279 uint_t backoff = 0;
280 int loop_count = 0;
281
282 if (rw == RW_READER) {
283 lock_value = RW_READ_LOCK;
284 lock_busy = RW_WRITE_CLAIMED;
285 lock_wait = RW_HAS_WAITERS;
286 } else if (rw == RW_READER_STARVEWRITER) {
287 lock_value = RW_READ_LOCK;
288 lock_busy = RW_WRITE_LOCKED;
289 lock_wait = RW_HAS_WAITERS;
290 } else {
291 lock_value = RW_WRITE_LOCK(curthread);
322 return;
323 }
324
325 ts = turnstile_lookup(lp);
326
327 do {
328 if (((old = lp->rw_wwwh) & lock_busy) == 0)
329 break;
330 new = old | lock_wait;
331 } while (old != new && casip(&lp->rw_wwwh, old, new) != old);
332
333 if ((old & lock_busy) == 0) {
334 /*
335 * The lock appears free now; try the dance again
336 */
337 turnstile_exit(lp);
338 continue;
339 }
340
341 /*
342 * We really are going to block, so bump the stats.
343 */
344 ASSERT(lp->rw_wwwh & lock_wait);
345 ASSERT(lp->rw_wwwh & RW_LOCKED);
346
347 sleep_time = -gethrtime();
348 if (rw != RW_WRITER) {
349 CPU_STATS_ADDQ(CPU, sys, rw_rdfails, 1);
350 (void) turnstile_block(ts, TS_READER_Q, lp,
351 &rw_sobj_ops, NULL, NULL);
352 } else {
353 CPU_STATS_ADDQ(CPU, sys, rw_wrfails, 1);
354 (void) turnstile_block(ts, TS_WRITER_Q, lp,
355 &rw_sobj_ops, NULL, NULL);
356 }
357 sleep_time += gethrtime();
358
359 LOCKSTAT_RECORD4(LS_RW_ENTER_BLOCK, lp, sleep_time, rw,
360 (old & RW_WRITE_LOCKED) ? 1 : 0,
361 old >> RW_HOLD_COUNT_SHIFT);
362
363 /*
364 * We wake up holding the lock via direct handoff from the
365 * previous owner.
366 */
367 break;
368 }
369
370 ASSERT(rw_locked(lp, rw));
371
372 membar_enter();
373
374 LOCKSTAT_RECORD(LS_RW_ENTER_ACQUIRE, lp, rw);
375 }
376
377 /*
378 * Return the number of readers to wake, or zero if we should wake a writer.
379 * Called only by exiting/downgrading writers (readers don't wake readers).
380 */
381 static int
382 rw_readers_to_wake(turnstile_t *ts)
383 {
384 kthread_t *next_writer = ts->ts_sleepq[TS_WRITER_Q].sq_first;
385 kthread_t *next_reader = ts->ts_sleepq[TS_READER_Q].sq_first;
386 pri_t wpri = (next_writer != NULL) ? DISP_PRIO(next_writer) : -1;
387 int count = 0;
388
389 while (next_reader != NULL) {
390 if (DISP_PRIO(next_reader) < wpri)
391 break;
392 next_reader = next_reader->t_link;
393 count++;
394 }
395 return (count);
396 }
397
398 /*
399 * Full-service implementation of rw_exit() to handle all the hard cases.
400 * Called from the assembly version if anything complicated is going on.
401 * There is no semantic difference between calling rw_exit() and calling
402 * rw_exit_wakeup() directly.
403 */
404 void
405 rw_exit_wakeup(rwlock_impl_t *lp)
406 {
407 turnstile_t *ts;
408 uintptr_t old, new, lock_value;
409 kthread_t *next_writer;
410 int nreaders;
411 uint_t backoff = 0;
500 * since the lock is read-locked there would be no
501 * reason for other readers to have blocked unless
502 * the RW_WRITE_WANTED bit was set. In case (B),
503 * since there are waiters but no waiting readers,
504 * they must all be waiting writers.
505 */
506 ASSERT(lp->rw_wwwh & RW_WRITE_WANTED);
507 new = RW_WRITE_LOCK(next_writer);
508 if (ts->ts_waiters > 1)
509 new |= RW_HAS_WAITERS;
510 if (next_writer->t_link)
511 new |= RW_WRITE_WANTED;
512 lp->rw_wwwh = new;
513 membar_enter();
514 turnstile_wakeup(ts, TS_WRITER_Q, 1, next_writer);
515 }
516 break;
517 }
518
519 if (lock_value == RW_READ_LOCK) {
520 LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_READER);
521 } else {
522 LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_WRITER);
523 }
524 }
525
526 int
527 rw_tryenter(krwlock_t *rwlp, krw_t rw)
528 {
529 rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
530 uintptr_t old;
531
532 if (rw != RW_WRITER) {
533 uint_t backoff = 0;
534 int loop_count = 0;
535 for (;;) {
536 if ((old = lp->rw_wwwh) & (rw == RW_READER ?
537 RW_WRITE_CLAIMED : RW_WRITE_LOCKED)) {
538 return (0);
539 }
540 if (casip(&lp->rw_wwwh, old, old + RW_READ_LOCK) == old)
541 break;
542 if (rw_lock_delay != NULL) {
543 backoff = rw_lock_backoff(backoff);
544 rw_lock_delay(backoff);
545 if (++loop_count == ncpus_online) {
546 backoff = 0;
547 loop_count = 0;
548 }
549 }
550 }
551 LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw);
552 } else {
553 if (casip(&lp->rw_wwwh, 0, RW_WRITE_LOCK(curthread)) != 0)
554 return (0);
555 LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw);
556 }
557 ASSERT(rw_locked(lp, rw));
558 membar_enter();
559 return (1);
560 }
561
562 void
563 rw_downgrade(krwlock_t *rwlp)
564 {
565 rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
566
567 membar_exit();
568
569 if ((lp->rw_wwwh & RW_OWNER) != (uintptr_t)curthread) {
570 rw_panic("rw_downgrade: not owner", lp);
571 return;
572 }
573
574 if (atomic_add_ip_nv(&lp->rw_wwwh,
575 RW_READ_LOCK - RW_WRITE_LOCK(curthread)) & RW_HAS_WAITERS) {
576 turnstile_t *ts = turnstile_lookup(lp);
577 int nreaders = rw_readers_to_wake(ts);
578 if (nreaders > 0) {
579 uintptr_t delta = nreaders * RW_READ_LOCK;
580 if (ts->ts_waiters == nreaders)
581 delta -= RW_HAS_WAITERS;
582 atomic_add_ip(&lp->rw_wwwh, delta);
583 }
584 turnstile_wakeup(ts, TS_READER_Q, nreaders, NULL);
585 }
586 ASSERT(rw_locked(lp, RW_READER));
587 LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, lp);
588 }
589
590 int
591 rw_tryupgrade(krwlock_t *rwlp)
592 {
593 rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
594 uintptr_t old, new;
595
596 ASSERT(rw_locked(lp, RW_READER));
597
598 do {
599 if (((old = lp->rw_wwwh) & ~RW_HAS_WAITERS) != RW_READ_LOCK)
600 return (0);
601 new = old + RW_WRITE_LOCK(curthread) - RW_READ_LOCK;
602 } while (casip(&lp->rw_wwwh, old, new) != old);
603
604 membar_enter();
605 LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, lp);
606 ASSERT(rw_locked(lp, RW_WRITER));
607 return (1);
608 }
609
610 int
611 rw_read_held(krwlock_t *rwlp)
612 {
613 uintptr_t tmp;
614
615 return (_RW_READ_HELD(rwlp, tmp));
616 }
617
618 int
619 rw_write_held(krwlock_t *rwlp)
620 {
621 return (_RW_WRITE_HELD(rwlp));
622 }
623
624 int
|