Print this page
OS-7753 THREAD_KPRI_RELEASE does nothing of the sort
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  28  */
  29 
  30 #include <sys/param.h>
  31 #include <sys/thread.h>
  32 #include <sys/cmn_err.h>
  33 #include <sys/debug.h>
  34 #include <sys/cpuvar.h>
  35 #include <sys/sobject.h>
  36 #include <sys/turnstile.h>
  37 #include <sys/rwlock.h>
  38 #include <sys/rwlock_impl.h>
  39 #include <sys/atomic.h>
  40 #include <sys/lockstat.h>
  41 
  42 /*
  43  * Big Theory Statement for readers/writer locking primitives.
  44  *
  45  * An rwlock provides exclusive access to a single thread ("writer") or
  46  * concurrent access to multiple threads ("readers").  See rwlock(9F)
  47  * for a full description of the interfaces and programming model.


 252 static int
 253 rw_locked(rwlock_impl_t *lp, krw_t rw)
 254 {
 255         uintptr_t old = lp->rw_wwwh;
 256 
 257         if (rw == RW_READER || rw == RW_READER_STARVEWRITER)
 258                 return ((old & RW_LOCKED) && !(old & RW_WRITE_LOCKED));
 259 
 260         if (rw == RW_WRITER)
 261                 return ((old & RW_OWNER) == (uintptr_t)curthread);
 262 
 263         return (0);
 264 }
 265 
 266 uint_t (*rw_lock_backoff)(uint_t) = NULL;
 267 void (*rw_lock_delay)(uint_t) = NULL;
 268 
 269 /*
 270  * Full-service implementation of rw_enter() to handle all the hard cases.
 271  * Called from the assembly version if anything complicated is going on.
 272  * The only semantic difference between calling rw_enter() and calling
 273  * rw_enter_sleep() directly is that we assume the caller has already done
 274  * a THREAD_KPRI_REQUEST() in the RW_READER cases.
 275  */
 276 void
 277 rw_enter_sleep(rwlock_impl_t *lp, krw_t rw)
 278 {
 279         uintptr_t old, new, lock_value, lock_busy, lock_wait;
 280         hrtime_t sleep_time;
 281         turnstile_t *ts;
 282         uint_t  backoff = 0;
 283         int loop_count = 0;
 284 
 285         if (rw == RW_READER) {
 286                 lock_value = RW_READ_LOCK;
 287                 lock_busy = RW_WRITE_CLAIMED;
 288                 lock_wait = RW_HAS_WAITERS;
 289         } else if (rw == RW_READER_STARVEWRITER) {
 290                 lock_value = RW_READ_LOCK;
 291                 lock_busy = RW_WRITE_LOCKED;
 292                 lock_wait = RW_HAS_WAITERS;
 293         } else {
 294                 lock_value = RW_WRITE_LOCK(curthread);


 325                         return;
 326                 }
 327 
 328                 ts = turnstile_lookup(lp);
 329 
 330                 do {
 331                         if (((old = lp->rw_wwwh) & lock_busy) == 0)
 332                                 break;
 333                         new = old | lock_wait;
 334                 } while (old != new && casip(&lp->rw_wwwh, old, new) != old);
 335 
 336                 if ((old & lock_busy) == 0) {
 337                         /*
 338                          * The lock appears free now; try the dance again
 339                          */
 340                         turnstile_exit(lp);
 341                         continue;
 342                 }
 343 
 344                 /*
 345                  * We really are going to block.  Bump the stats, and drop
 346                  * kpri if we're a reader.
 347                  */
 348                 ASSERT(lp->rw_wwwh & lock_wait);
 349                 ASSERT(lp->rw_wwwh & RW_LOCKED);
 350 
 351                 sleep_time = -gethrtime();
 352                 if (rw != RW_WRITER) {
 353                         THREAD_KPRI_RELEASE();
 354                         CPU_STATS_ADDQ(CPU, sys, rw_rdfails, 1);
 355                         (void) turnstile_block(ts, TS_READER_Q, lp,
 356                             &rw_sobj_ops, NULL, NULL);
 357                 } else {
 358                         CPU_STATS_ADDQ(CPU, sys, rw_wrfails, 1);
 359                         (void) turnstile_block(ts, TS_WRITER_Q, lp,
 360                             &rw_sobj_ops, NULL, NULL);
 361                 }
 362                 sleep_time += gethrtime();
 363 
 364                 LOCKSTAT_RECORD4(LS_RW_ENTER_BLOCK, lp, sleep_time, rw,
 365                     (old & RW_WRITE_LOCKED) ? 1 : 0,
 366                     old >> RW_HOLD_COUNT_SHIFT);
 367 
 368                 /*
 369                  * We wake up holding the lock (and having kpri if we're
 370                  * a reader) via direct handoff from the previous owner.
 371                  */
 372                 break;
 373         }
 374 
 375         ASSERT(rw_locked(lp, rw));
 376 
 377         membar_enter();
 378 
 379         LOCKSTAT_RECORD(LS_RW_ENTER_ACQUIRE, lp, rw);
 380 }
 381 
 382 /*
 383  * Return the number of readers to wake, or zero if we should wake a writer.
 384  * Called only by exiting/downgrading writers (readers don't wake readers).
 385  */
 386 static int
 387 rw_readers_to_wake(turnstile_t *ts)
 388 {
 389         kthread_t *next_writer = ts->ts_sleepq[TS_WRITER_Q].sq_first;
 390         kthread_t *next_reader = ts->ts_sleepq[TS_READER_Q].sq_first;
 391         pri_t wpri = (next_writer != NULL) ? DISP_PRIO(next_writer) : -1;
 392         int count = 0;
 393 
 394         while (next_reader != NULL) {
 395                 if (DISP_PRIO(next_reader) < wpri)
 396                         break;
 397                 next_reader->t_kpri_req++;
 398                 next_reader = next_reader->t_link;
 399                 count++;
 400         }
 401         return (count);
 402 }
 403 
 404 /*
 405  * Full-service implementation of rw_exit() to handle all the hard cases.
 406  * Called from the assembly version if anything complicated is going on.
 407  * There is no semantic difference between calling rw_exit() and calling
 408  * rw_exit_wakeup() directly.
 409  */
 410 void
 411 rw_exit_wakeup(rwlock_impl_t *lp)
 412 {
 413         turnstile_t *ts;
 414         uintptr_t old, new, lock_value;
 415         kthread_t *next_writer;
 416         int nreaders;
 417         uint_t  backoff = 0;


 506                          * since the lock is read-locked there would be no
 507                          * reason for other readers to have blocked unless
 508                          * the RW_WRITE_WANTED bit was set.  In case (B),
 509                          * since there are waiters but no waiting readers,
 510                          * they must all be waiting writers.
 511                          */
 512                         ASSERT(lp->rw_wwwh & RW_WRITE_WANTED);
 513                         new = RW_WRITE_LOCK(next_writer);
 514                         if (ts->ts_waiters > 1)
 515                                 new |= RW_HAS_WAITERS;
 516                         if (next_writer->t_link)
 517                                 new |= RW_WRITE_WANTED;
 518                         lp->rw_wwwh = new;
 519                         membar_enter();
 520                         turnstile_wakeup(ts, TS_WRITER_Q, 1, next_writer);
 521                 }
 522                 break;
 523         }
 524 
 525         if (lock_value == RW_READ_LOCK) {
 526                 THREAD_KPRI_RELEASE();
 527                 LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_READER);
 528         } else {
 529                 LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_WRITER);
 530         }
 531 }
 532 
 533 int
 534 rw_tryenter(krwlock_t *rwlp, krw_t rw)
 535 {
 536         rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
 537         uintptr_t old;
 538 
 539         if (rw != RW_WRITER) {
 540                 uint_t backoff = 0;
 541                 int loop_count = 0;
 542                 THREAD_KPRI_REQUEST();
 543                 for (;;) {
 544                         if ((old = lp->rw_wwwh) & (rw == RW_READER ?
 545                             RW_WRITE_CLAIMED : RW_WRITE_LOCKED)) {
 546                                 THREAD_KPRI_RELEASE();
 547                                 return (0);
 548                         }
 549                         if (casip(&lp->rw_wwwh, old, old + RW_READ_LOCK) == old)
 550                                 break;
 551                         if (rw_lock_delay != NULL) {
 552                                 backoff = rw_lock_backoff(backoff);
 553                                 rw_lock_delay(backoff);
 554                                 if (++loop_count == ncpus_online) {
 555                                         backoff = 0;
 556                                         loop_count = 0;
 557                                 }
 558                         }
 559                 }
 560                 LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw);
 561         } else {
 562                 if (casip(&lp->rw_wwwh, 0, RW_WRITE_LOCK(curthread)) != 0)
 563                         return (0);
 564                 LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw);
 565         }
 566         ASSERT(rw_locked(lp, rw));
 567         membar_enter();
 568         return (1);
 569 }
 570 
 571 void
 572 rw_downgrade(krwlock_t *rwlp)
 573 {
 574         rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
 575 
 576         THREAD_KPRI_REQUEST();
 577         membar_exit();
 578 
 579         if ((lp->rw_wwwh & RW_OWNER) != (uintptr_t)curthread) {
 580                 rw_panic("rw_downgrade: not owner", lp);
 581                 return;
 582         }
 583 
 584         if (atomic_add_ip_nv(&lp->rw_wwwh,
 585             RW_READ_LOCK - RW_WRITE_LOCK(curthread)) & RW_HAS_WAITERS) {
 586                 turnstile_t *ts = turnstile_lookup(lp);
 587                 int nreaders = rw_readers_to_wake(ts);
 588                 if (nreaders > 0) {
 589                         uintptr_t delta = nreaders * RW_READ_LOCK;
 590                         if (ts->ts_waiters == nreaders)
 591                                 delta -= RW_HAS_WAITERS;
 592                         atomic_add_ip(&lp->rw_wwwh, delta);
 593                 }
 594                 turnstile_wakeup(ts, TS_READER_Q, nreaders, NULL);
 595         }
 596         ASSERT(rw_locked(lp, RW_READER));
 597         LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, lp);
 598 }
 599 
 600 int
 601 rw_tryupgrade(krwlock_t *rwlp)
 602 {
 603         rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
 604         uintptr_t old, new;
 605 
 606         ASSERT(rw_locked(lp, RW_READER));
 607 
 608         do {
 609                 if (((old = lp->rw_wwwh) & ~RW_HAS_WAITERS) != RW_READ_LOCK)
 610                         return (0);
 611                 new = old + RW_WRITE_LOCK(curthread) - RW_READ_LOCK;
 612         } while (casip(&lp->rw_wwwh, old, new) != old);
 613 
 614         membar_enter();
 615         THREAD_KPRI_RELEASE();
 616         LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, lp);
 617         ASSERT(rw_locked(lp, RW_WRITER));
 618         return (1);
 619 }
 620 
 621 int
 622 rw_read_held(krwlock_t *rwlp)
 623 {
 624         uintptr_t tmp;
 625 
 626         return (_RW_READ_HELD(rwlp, tmp));
 627 }
 628 
 629 int
 630 rw_write_held(krwlock_t *rwlp)
 631 {
 632         return (_RW_WRITE_HELD(rwlp));
 633 }
 634 
 635 int




   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Joyent, Inc.
  28  */
  29 
  30 #include <sys/param.h>
  31 #include <sys/thread.h>
  32 #include <sys/cmn_err.h>
  33 #include <sys/debug.h>
  34 #include <sys/cpuvar.h>
  35 #include <sys/sobject.h>
  36 #include <sys/turnstile.h>
  37 #include <sys/rwlock.h>
  38 #include <sys/rwlock_impl.h>
  39 #include <sys/atomic.h>
  40 #include <sys/lockstat.h>
  41 
  42 /*
  43  * Big Theory Statement for readers/writer locking primitives.
  44  *
  45  * An rwlock provides exclusive access to a single thread ("writer") or
  46  * concurrent access to multiple threads ("readers").  See rwlock(9F)
  47  * for a full description of the interfaces and programming model.


 252 static int
 253 rw_locked(rwlock_impl_t *lp, krw_t rw)
 254 {
 255         uintptr_t old = lp->rw_wwwh;
 256 
 257         if (rw == RW_READER || rw == RW_READER_STARVEWRITER)
 258                 return ((old & RW_LOCKED) && !(old & RW_WRITE_LOCKED));
 259 
 260         if (rw == RW_WRITER)
 261                 return ((old & RW_OWNER) == (uintptr_t)curthread);
 262 
 263         return (0);
 264 }
 265 
 266 uint_t (*rw_lock_backoff)(uint_t) = NULL;
 267 void (*rw_lock_delay)(uint_t) = NULL;
 268 
 269 /*
 270  * Full-service implementation of rw_enter() to handle all the hard cases.
 271  * Called from the assembly version if anything complicated is going on.



 272  */
 273 void
 274 rw_enter_sleep(rwlock_impl_t *lp, krw_t rw)
 275 {
 276         uintptr_t old, new, lock_value, lock_busy, lock_wait;
 277         hrtime_t sleep_time;
 278         turnstile_t *ts;
 279         uint_t  backoff = 0;
 280         int loop_count = 0;
 281 
 282         if (rw == RW_READER) {
 283                 lock_value = RW_READ_LOCK;
 284                 lock_busy = RW_WRITE_CLAIMED;
 285                 lock_wait = RW_HAS_WAITERS;
 286         } else if (rw == RW_READER_STARVEWRITER) {
 287                 lock_value = RW_READ_LOCK;
 288                 lock_busy = RW_WRITE_LOCKED;
 289                 lock_wait = RW_HAS_WAITERS;
 290         } else {
 291                 lock_value = RW_WRITE_LOCK(curthread);


 322                         return;
 323                 }
 324 
 325                 ts = turnstile_lookup(lp);
 326 
 327                 do {
 328                         if (((old = lp->rw_wwwh) & lock_busy) == 0)
 329                                 break;
 330                         new = old | lock_wait;
 331                 } while (old != new && casip(&lp->rw_wwwh, old, new) != old);
 332 
 333                 if ((old & lock_busy) == 0) {
 334                         /*
 335                          * The lock appears free now; try the dance again
 336                          */
 337                         turnstile_exit(lp);
 338                         continue;
 339                 }
 340 
 341                 /*
 342                  * We really are going to block, so bump the stats.

 343                  */
 344                 ASSERT(lp->rw_wwwh & lock_wait);
 345                 ASSERT(lp->rw_wwwh & RW_LOCKED);
 346 
 347                 sleep_time = -gethrtime();
 348                 if (rw != RW_WRITER) {

 349                         CPU_STATS_ADDQ(CPU, sys, rw_rdfails, 1);
 350                         (void) turnstile_block(ts, TS_READER_Q, lp,
 351                             &rw_sobj_ops, NULL, NULL);
 352                 } else {
 353                         CPU_STATS_ADDQ(CPU, sys, rw_wrfails, 1);
 354                         (void) turnstile_block(ts, TS_WRITER_Q, lp,
 355                             &rw_sobj_ops, NULL, NULL);
 356                 }
 357                 sleep_time += gethrtime();
 358 
 359                 LOCKSTAT_RECORD4(LS_RW_ENTER_BLOCK, lp, sleep_time, rw,
 360                     (old & RW_WRITE_LOCKED) ? 1 : 0,
 361                     old >> RW_HOLD_COUNT_SHIFT);
 362 
 363                 /*
 364                  * We wake up holding the lock via direct handoff from the
 365                  * previous owner.
 366                  */
 367                 break;
 368         }
 369 
 370         ASSERT(rw_locked(lp, rw));
 371 
 372         membar_enter();
 373 
 374         LOCKSTAT_RECORD(LS_RW_ENTER_ACQUIRE, lp, rw);
 375 }
 376 
 377 /*
 378  * Return the number of readers to wake, or zero if we should wake a writer.
 379  * Called only by exiting/downgrading writers (readers don't wake readers).
 380  */
 381 static int
 382 rw_readers_to_wake(turnstile_t *ts)
 383 {
 384         kthread_t *next_writer = ts->ts_sleepq[TS_WRITER_Q].sq_first;
 385         kthread_t *next_reader = ts->ts_sleepq[TS_READER_Q].sq_first;
 386         pri_t wpri = (next_writer != NULL) ? DISP_PRIO(next_writer) : -1;
 387         int count = 0;
 388 
 389         while (next_reader != NULL) {
 390                 if (DISP_PRIO(next_reader) < wpri)
 391                         break;

 392                 next_reader = next_reader->t_link;
 393                 count++;
 394         }
 395         return (count);
 396 }
 397 
 398 /*
 399  * Full-service implementation of rw_exit() to handle all the hard cases.
 400  * Called from the assembly version if anything complicated is going on.
 401  * There is no semantic difference between calling rw_exit() and calling
 402  * rw_exit_wakeup() directly.
 403  */
 404 void
 405 rw_exit_wakeup(rwlock_impl_t *lp)
 406 {
 407         turnstile_t *ts;
 408         uintptr_t old, new, lock_value;
 409         kthread_t *next_writer;
 410         int nreaders;
 411         uint_t  backoff = 0;


 500                          * since the lock is read-locked there would be no
 501                          * reason for other readers to have blocked unless
 502                          * the RW_WRITE_WANTED bit was set.  In case (B),
 503                          * since there are waiters but no waiting readers,
 504                          * they must all be waiting writers.
 505                          */
 506                         ASSERT(lp->rw_wwwh & RW_WRITE_WANTED);
 507                         new = RW_WRITE_LOCK(next_writer);
 508                         if (ts->ts_waiters > 1)
 509                                 new |= RW_HAS_WAITERS;
 510                         if (next_writer->t_link)
 511                                 new |= RW_WRITE_WANTED;
 512                         lp->rw_wwwh = new;
 513                         membar_enter();
 514                         turnstile_wakeup(ts, TS_WRITER_Q, 1, next_writer);
 515                 }
 516                 break;
 517         }
 518 
 519         if (lock_value == RW_READ_LOCK) {

 520                 LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_READER);
 521         } else {
 522                 LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_WRITER);
 523         }
 524 }
 525 
 526 int
 527 rw_tryenter(krwlock_t *rwlp, krw_t rw)
 528 {
 529         rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
 530         uintptr_t old;
 531 
 532         if (rw != RW_WRITER) {
 533                 uint_t backoff = 0;
 534                 int loop_count = 0;

 535                 for (;;) {
 536                         if ((old = lp->rw_wwwh) & (rw == RW_READER ?
 537                             RW_WRITE_CLAIMED : RW_WRITE_LOCKED)) {

 538                                 return (0);
 539                         }
 540                         if (casip(&lp->rw_wwwh, old, old + RW_READ_LOCK) == old)
 541                                 break;
 542                         if (rw_lock_delay != NULL) {
 543                                 backoff = rw_lock_backoff(backoff);
 544                                 rw_lock_delay(backoff);
 545                                 if (++loop_count == ncpus_online) {
 546                                         backoff = 0;
 547                                         loop_count = 0;
 548                                 }
 549                         }
 550                 }
 551                 LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw);
 552         } else {
 553                 if (casip(&lp->rw_wwwh, 0, RW_WRITE_LOCK(curthread)) != 0)
 554                         return (0);
 555                 LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw);
 556         }
 557         ASSERT(rw_locked(lp, rw));
 558         membar_enter();
 559         return (1);
 560 }
 561 
 562 void
 563 rw_downgrade(krwlock_t *rwlp)
 564 {
 565         rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
 566 

 567         membar_exit();
 568 
 569         if ((lp->rw_wwwh & RW_OWNER) != (uintptr_t)curthread) {
 570                 rw_panic("rw_downgrade: not owner", lp);
 571                 return;
 572         }
 573 
 574         if (atomic_add_ip_nv(&lp->rw_wwwh,
 575             RW_READ_LOCK - RW_WRITE_LOCK(curthread)) & RW_HAS_WAITERS) {
 576                 turnstile_t *ts = turnstile_lookup(lp);
 577                 int nreaders = rw_readers_to_wake(ts);
 578                 if (nreaders > 0) {
 579                         uintptr_t delta = nreaders * RW_READ_LOCK;
 580                         if (ts->ts_waiters == nreaders)
 581                                 delta -= RW_HAS_WAITERS;
 582                         atomic_add_ip(&lp->rw_wwwh, delta);
 583                 }
 584                 turnstile_wakeup(ts, TS_READER_Q, nreaders, NULL);
 585         }
 586         ASSERT(rw_locked(lp, RW_READER));
 587         LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, lp);
 588 }
 589 
 590 int
 591 rw_tryupgrade(krwlock_t *rwlp)
 592 {
 593         rwlock_impl_t *lp = (rwlock_impl_t *)rwlp;
 594         uintptr_t old, new;
 595 
 596         ASSERT(rw_locked(lp, RW_READER));
 597 
 598         do {
 599                 if (((old = lp->rw_wwwh) & ~RW_HAS_WAITERS) != RW_READ_LOCK)
 600                         return (0);
 601                 new = old + RW_WRITE_LOCK(curthread) - RW_READ_LOCK;
 602         } while (casip(&lp->rw_wwwh, old, new) != old);
 603 
 604         membar_enter();

 605         LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, lp);
 606         ASSERT(rw_locked(lp, RW_WRITER));
 607         return (1);
 608 }
 609 
 610 int
 611 rw_read_held(krwlock_t *rwlp)
 612 {
 613         uintptr_t tmp;
 614 
 615         return (_RW_READ_HELD(rwlp, tmp));
 616 }
 617 
 618 int
 619 rw_write_held(krwlock_t *rwlp)
 620 {
 621         return (_RW_WRITE_HELD(rwlp));
 622 }
 623 
 624 int