1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Joyent, Inc.
  28  */
  29 
  30 #include "assym.h"
  31 
  32 #include <sys/mutex_impl.h>
  33 #include <sys/asm_linkage.h>
  34 #include <sys/asm_misc.h>
  35 #include <sys/regset.h>
  36 #include <sys/rwlock_impl.h>
  37 #include <sys/lockstat.h>
  38 
  39 /*
  40  * lock_try(lp), ulock_try(lp)
  41  *      - returns non-zero on success.
  42  *      - doesn't block interrupts so don't use this to spin on a lock.
  43  *
  44  * ulock_try() is for a lock in the user address space.
  45  */
  46 
  47         .globl  kernelbase
  48 
  49         ENTRY(lock_try)
  50         movb    $-1, %dl
  51         movzbq  %dl, %rax
  52         xchgb   %dl, (%rdi)
  53         xorb    %dl, %al
  54 .lock_try_lockstat_patch_point:
  55         ret
  56         testb   %al, %al
  57         jnz     0f
  58         ret
  59 0:
  60         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
  61         movq    %rdi, %rsi              /* rsi = lock addr */
  62         movl    $LS_LOCK_TRY_ACQUIRE, %edi /* edi = event */
  63         jmp     lockstat_wrapper
  64         SET_SIZE(lock_try)
  65 
  66         ENTRY(lock_spin_try)
  67         movb    $-1, %dl
  68         movzbq  %dl, %rax
  69         xchgb   %dl, (%rdi)
  70         xorb    %dl, %al
  71         ret
  72         SET_SIZE(lock_spin_try)
  73 
  74         ENTRY(ulock_try)
  75 #ifdef DEBUG
  76         movq    kernelbase(%rip), %rax
  77         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
  78         jb      ulock_pass              /*      uaddr < kernelbase, proceed */
  79 
  80         movq    %rdi, %r12              /* preserve lock ptr for debugging */
  81         leaq    .ulock_panic_msg(%rip), %rdi
  82         pushq   %rbp                    /* align stack properly */
  83         movq    %rsp, %rbp
  84         xorl    %eax, %eax              /* clear for varargs */
  85         call    panic
  86 
  87 #endif /* DEBUG */
  88 
  89 ulock_pass:
  90         movl    $1, %eax
  91         xchgb   %al, (%rdi)
  92         xorb    $1, %al
  93         ret
  94         SET_SIZE(ulock_try)
  95 
  96 #ifdef DEBUG
  97         .data
  98 .ulock_panic_msg:
  99         .string "ulock_try: Argument is above kernelbase"
 100         .text
 101 #endif  /* DEBUG */
 102 
 103 /*
 104  * lock_clear(lp)
 105  *      - unlock lock without changing interrupt priority level.
 106  */
 107 
 108         ENTRY(lock_clear)
 109         movb    $0, (%rdi)
 110 .lock_clear_lockstat_patch_point:
 111         ret
 112         movq    %rdi, %rsi                      /* rsi = lock addr */
 113         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread addr */
 114         movl    $LS_LOCK_CLEAR_RELEASE, %edi    /* edi = event */
 115         jmp     lockstat_wrapper
 116         SET_SIZE(lock_clear)
 117 
 118         ENTRY(ulock_clear)
 119 #ifdef DEBUG
 120         movq    kernelbase(%rip), %rcx
 121         cmpq    %rcx, %rdi              /* test uaddr < kernelbase */
 122         jb      ulock_clr               /*       uaddr < kernelbase, proceed */
 123 
 124         leaq    .ulock_clear_msg(%rip), %rdi
 125         pushq   %rbp                    /* align stack properly */
 126         movq    %rsp, %rbp
 127         xorl    %eax, %eax              /* clear for varargs */
 128         call    panic
 129 #endif
 130 
 131 ulock_clr:
 132         movb    $0, (%rdi)
 133         ret
 134         SET_SIZE(ulock_clear)
 135 
 136 #ifdef DEBUG
 137         .data
 138 .ulock_clear_msg:
 139         .string "ulock_clear: Argument is above kernelbase"
 140         .text
 141 #endif  /* DEBUG */
 142 
 143 
 144 /*
 145  * lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil)
 146  * Drops lp, sets pil to new_pil, stores old pil in *old_pil.
 147  */
 148 
 149         ENTRY(lock_set_spl)
 150         pushq   %rbp
 151         movq    %rsp, %rbp
 152         subq    $32, %rsp
 153         movl    %esi, 8(%rsp)           /* save priority level */
 154         movq    %rdx, 16(%rsp)          /* save old pil ptr */
 155         movq    %rdi, 24(%rsp)          /* save lock pointer */
 156         movl    %esi, %edi              /* pass priority level */
 157         call    splr                    /* raise priority level */
 158         movq    24(%rsp), %rdi          /* rdi = lock addr */
 159         movb    $-1, %dl
 160         xchgb   %dl, (%rdi)             /* try to set lock */
 161         testb   %dl, %dl                /* did we get the lock? ... */
 162         jnz     .lss_miss               /* ... no, go to C for the hard case */
 163         movq    16(%rsp), %rdx          /* rdx = old pil addr */
 164         movw    %ax, (%rdx)             /* store old pil */
 165         leave
 166 .lock_set_spl_lockstat_patch_point:
 167         ret
 168         movq    %rdi, %rsi              /* rsi = lock addr */
 169         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
 170         movl    $LS_LOCK_SET_SPL_ACQUIRE, %edi
 171         jmp     lockstat_wrapper
 172 .lss_miss:
 173         movl    8(%rsp), %esi           /* new_pil */
 174         movq    16(%rsp), %rdx          /* old_pil_addr */
 175         movl    %eax, %ecx              /* original pil */
 176         leave                           /* unwind stack */
 177         jmp     lock_set_spl_spin
 178         SET_SIZE(lock_set_spl)
 179 
 180 /*
 181  * void
 182  * lock_init(lp)
 183  */
 184 
 185         ENTRY(lock_init)
 186         movb    $0, (%rdi)
 187         ret
 188         SET_SIZE(lock_init)
 189 
 190 /*
 191  * void
 192  * lock_set(lp)
 193  */
 194 
 195         ENTRY(lock_set)
 196         movb    $-1, %dl
 197         xchgb   %dl, (%rdi)             /* try to set lock */
 198         testb   %dl, %dl                /* did we get it? */
 199         jnz     lock_set_spin           /* no, go to C for the hard case */
 200 .lock_set_lockstat_patch_point:
 201         ret
 202         movq    %rdi, %rsi              /* rsi = lock addr */
 203         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
 204         movl    $LS_LOCK_SET_ACQUIRE, %edi
 205         jmp     lockstat_wrapper
 206         SET_SIZE(lock_set)
 207 
 208 /*
 209  * lock_clear_splx(lp, s)
 210  */
 211 
 212         ENTRY(lock_clear_splx)
 213         movb    $0, (%rdi)              /* clear lock */
 214 .lock_clear_splx_lockstat_patch_point:
 215         jmp     0f
 216 0:
 217         movl    %esi, %edi              /* arg for splx */
 218         jmp     splx                    /* let splx do its thing */
 219 .lock_clear_splx_lockstat:
 220         pushq   %rbp                    /* align stack properly */
 221         movq    %rsp, %rbp
 222         subq    $16, %rsp               /* space to save args across splx */
 223         movq    %rdi, 8(%rsp)           /* save lock ptr across splx call */
 224         movl    %esi, %edi              /* arg for splx */
 225         call    splx                    /* lower the priority */
 226         movq    8(%rsp), %rsi           /* rsi = lock ptr */
 227         leave                           /* unwind stack */
 228         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
 229         movl    $LS_LOCK_CLEAR_SPLX_RELEASE, %edi
 230         jmp     lockstat_wrapper
 231         SET_SIZE(lock_clear_splx)
 232 
 233 #if defined(__GNUC_AS__)
 234 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL      \
 235         (.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2)
 236 
 237 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT    \
 238         (.lock_clear_splx_lockstat_patch_point + 1)
 239 #else
 240 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL      \
 241         [.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2]
 242 
 243 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT    \
 244         [.lock_clear_splx_lockstat_patch_point + 1]
 245 #endif
 246 
 247 /*
 248  * mutex_enter() and mutex_exit().
 249  *
 250  * These routines handle the simple cases of mutex_enter() (adaptive
 251  * lock, not held) and mutex_exit() (adaptive lock, held, no waiters).
 252  * If anything complicated is going on we punt to mutex_vector_enter().
 253  *
 254  * mutex_tryenter() is similar to mutex_enter() but returns zero if
 255  * the lock cannot be acquired, nonzero on success.
 256  *
 257  * If mutex_exit() gets preempted in the window between checking waiters
 258  * and clearing the lock, we can miss wakeups.  Disabling preemption
 259  * in the mutex code is prohibitively expensive, so instead we detect
 260  * mutex preemption by examining the trapped PC in the interrupt path.
 261  * If we interrupt a thread in mutex_exit() that has not yet cleared
 262  * the lock, cmnint() resets its PC back to the beginning of
 263  * mutex_exit() so it will check again for waiters when it resumes.
 264  *
 265  * The lockstat code below is activated when the lockstat driver
 266  * calls lockstat_hot_patch() to hot-patch the kernel mutex code.
 267  * Note that we don't need to test lockstat_event_mask here -- we won't
 268  * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats.
 269  */
 270 
 271         ENTRY_NP(mutex_enter)
 272         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread ptr */
 273         xorl    %eax, %eax                      /* rax = 0 (unheld adaptive) */
 274         lock
 275         cmpxchgq %rdx, (%rdi)
 276         jnz     mutex_vector_enter
 277 .mutex_enter_lockstat_patch_point:
 278 #if defined(OPTERON_WORKAROUND_6323525)
 279 .mutex_enter_6323525_patch_point:
 280         ret                                     /* nop space for lfence */
 281         nop
 282         nop
 283 .mutex_enter_lockstat_6323525_patch_point:      /* new patch point if lfence */
 284         nop
 285 #else   /* OPTERON_WORKAROUND_6323525 */
 286         ret
 287 #endif  /* OPTERON_WORKAROUND_6323525 */
 288         movq    %rdi, %rsi
 289         movl    $LS_MUTEX_ENTER_ACQUIRE, %edi
 290 /*
 291  * expects %rdx=thread, %rsi=lock, %edi=lockstat event
 292  */
 293         ALTENTRY(lockstat_wrapper)
 294         incb    T_LOCKSTAT(%rdx)                /* curthread->t_lockstat++ */
 295         leaq    lockstat_probemap(%rip), %rax
 296         movl    (%rax, %rdi, DTRACE_IDSIZE), %eax
 297         testl   %eax, %eax                      /* check for non-zero probe */
 298         jz      1f
 299         pushq   %rbp                            /* align stack properly */
 300         movq    %rsp, %rbp
 301         movl    %eax, %edi
 302         movq    lockstat_probe, %rax
 303         INDIRECT_CALL_REG(rax)
 304         leave                                   /* unwind stack */
 305 1:
 306         movq    %gs:CPU_THREAD, %rdx            /* reload thread ptr */
 307         decb    T_LOCKSTAT(%rdx)                /* curthread->t_lockstat-- */
 308         movl    $1, %eax                        /* return success if tryenter */
 309         ret
 310         SET_SIZE(lockstat_wrapper)
 311         SET_SIZE(mutex_enter)
 312 
 313 /*
 314  * expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event
 315  */
 316         ENTRY(lockstat_wrapper_arg)
 317         incb    T_LOCKSTAT(%rcx)                /* curthread->t_lockstat++ */
 318         leaq    lockstat_probemap(%rip), %rax
 319         movl    (%rax, %rdi, DTRACE_IDSIZE), %eax
 320         testl   %eax, %eax                      /* check for non-zero probe */
 321         jz      1f
 322         pushq   %rbp                            /* align stack properly */
 323         movq    %rsp, %rbp
 324         movl    %eax, %edi
 325         movq    lockstat_probe, %rax
 326         INDIRECT_CALL_REG(rax)
 327         leave                                   /* unwind stack */
 328 1:
 329         movq    %gs:CPU_THREAD, %rdx            /* reload thread ptr */
 330         decb    T_LOCKSTAT(%rdx)                /* curthread->t_lockstat-- */
 331         movl    $1, %eax                        /* return success if tryenter */
 332         ret
 333         SET_SIZE(lockstat_wrapper_arg)
 334 
 335 
 336         ENTRY(mutex_tryenter)
 337         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread ptr */
 338         xorl    %eax, %eax                      /* rax = 0 (unheld adaptive) */
 339         lock
 340         cmpxchgq %rdx, (%rdi)
 341         jnz     mutex_vector_tryenter
 342         not     %eax                            /* return success (nonzero) */
 343 #if defined(OPTERON_WORKAROUND_6323525)
 344 .mutex_tryenter_lockstat_patch_point:
 345 .mutex_tryenter_6323525_patch_point:
 346         ret                                     /* nop space for lfence */
 347         nop
 348         nop
 349 .mutex_tryenter_lockstat_6323525_patch_point:   /* new patch point if lfence */
 350         nop
 351 #else   /* OPTERON_WORKAROUND_6323525 */
 352 .mutex_tryenter_lockstat_patch_point:
 353         ret
 354 #endif  /* OPTERON_WORKAROUND_6323525 */
 355         movq    %rdi, %rsi
 356         movl    $LS_MUTEX_ENTER_ACQUIRE, %edi
 357         jmp     lockstat_wrapper
 358         SET_SIZE(mutex_tryenter)
 359 
 360         ENTRY(mutex_adaptive_tryenter)
 361         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread ptr */
 362         xorl    %eax, %eax                      /* rax = 0 (unheld adaptive) */
 363         lock
 364         cmpxchgq %rdx, (%rdi)
 365         jnz     0f
 366         not     %eax                            /* return success (nonzero) */
 367 #if defined(OPTERON_WORKAROUND_6323525)
 368 .mutex_atryenter_6323525_patch_point:
 369         ret                                     /* nop space for lfence */
 370         nop
 371         nop
 372         nop
 373 #else   /* OPTERON_WORKAROUND_6323525 */
 374         ret
 375 #endif  /* OPTERON_WORKAROUND_6323525 */
 376 0:
 377         xorl    %eax, %eax                      /* return failure */
 378         ret
 379         SET_SIZE(mutex_adaptive_tryenter)
 380 
 381         .globl  mutex_owner_running_critical_start
 382 
 383         ENTRY(mutex_owner_running)
 384 mutex_owner_running_critical_start:
 385         movq    (%rdi), %r11            /* get owner field */
 386         andq    $MUTEX_THREAD, %r11     /* remove waiters bit */
 387         cmpq    $0, %r11                /* if free, skip */
 388         je      1f                      /* go return 0 */
 389         movq    T_CPU(%r11), %r8        /* get owner->t_cpu */
 390         movq    CPU_THREAD(%r8), %r9    /* get t_cpu->cpu_thread */
 391 .mutex_owner_running_critical_end:
 392         cmpq    %r11, %r9       /* owner == running thread? */
 393         je      2f              /* yes, go return cpu */
 394 1:
 395         xorq    %rax, %rax      /* return 0 */
 396         ret
 397 2:
 398         movq    %r8, %rax               /* return cpu */
 399         ret
 400         SET_SIZE(mutex_owner_running)
 401 
 402         .globl  mutex_owner_running_critical_size
 403         .type   mutex_owner_running_critical_size, @object
 404         .align  CPTRSIZE
 405 mutex_owner_running_critical_size:
 406         .quad   .mutex_owner_running_critical_end - mutex_owner_running_critical_start
 407         SET_SIZE(mutex_owner_running_critical_size)
 408 
 409         .globl  mutex_exit_critical_start
 410 
 411         ENTRY(mutex_exit)
 412 mutex_exit_critical_start:              /* If interrupted, restart here */
 413         movq    %gs:CPU_THREAD, %rdx
 414         cmpq    %rdx, (%rdi)
 415         jne     mutex_vector_exit               /* wrong type or wrong owner */
 416         movq    $0, (%rdi)                      /* clear owner AND lock */
 417 .mutex_exit_critical_end:
 418 .mutex_exit_lockstat_patch_point:
 419         ret
 420         movq    %rdi, %rsi
 421         movl    $LS_MUTEX_EXIT_RELEASE, %edi
 422         jmp     lockstat_wrapper
 423         SET_SIZE(mutex_exit)
 424 
 425         .globl  mutex_exit_critical_size
 426         .type   mutex_exit_critical_size, @object
 427         .align  CPTRSIZE
 428 mutex_exit_critical_size:
 429         .quad   .mutex_exit_critical_end - mutex_exit_critical_start
 430         SET_SIZE(mutex_exit_critical_size)
 431 
 432 /*
 433  * rw_enter() and rw_exit().
 434  *
 435  * These routines handle the simple cases of rw_enter (write-locking an unheld
 436  * lock or read-locking a lock that's neither write-locked nor write-wanted)
 437  * and rw_exit (no waiters or not the last reader).  If anything complicated
 438  * is going on we punt to rw_enter_sleep() and rw_exit_wakeup(), respectively.
 439  */
 440 
 441         ENTRY(rw_enter)
 442         cmpl    $RW_WRITER, %esi
 443         je      .rw_write_enter
 444         movq    (%rdi), %rax                    /* rax = old rw_wwwh value */
 445         testl   $RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax
 446         jnz     rw_enter_sleep
 447         leaq    RW_READ_LOCK(%rax), %rdx        /* rdx = new rw_wwwh value */
 448         lock
 449         cmpxchgq %rdx, (%rdi)                   /* try to grab read lock */
 450         jnz     rw_enter_sleep
 451 .rw_read_enter_lockstat_patch_point:
 452         ret
 453         movq    %gs:CPU_THREAD, %rcx            /* rcx = thread ptr */
 454         movq    %rdi, %rsi                      /* rsi = lock ptr */
 455         movl    $LS_RW_ENTER_ACQUIRE, %edi
 456         movl    $RW_READER, %edx
 457         jmp     lockstat_wrapper_arg
 458 .rw_write_enter:
 459         movq    %gs:CPU_THREAD, %rdx
 460         orq     $RW_WRITE_LOCKED, %rdx          /* rdx = write-locked value */
 461         xorl    %eax, %eax                      /* rax = unheld value */
 462         lock
 463         cmpxchgq %rdx, (%rdi)                   /* try to grab write lock */
 464         jnz     rw_enter_sleep
 465 
 466 #if defined(OPTERON_WORKAROUND_6323525)
 467 .rw_write_enter_lockstat_patch_point:
 468 .rw_write_enter_6323525_patch_point:
 469         ret
 470         nop
 471         nop
 472 .rw_write_enter_lockstat_6323525_patch_point:
 473         nop
 474 #else   /* OPTERON_WORKAROUND_6323525 */
 475 .rw_write_enter_lockstat_patch_point:
 476         ret
 477 #endif  /* OPTERON_WORKAROUND_6323525 */
 478 
 479         movq    %gs:CPU_THREAD, %rcx            /* rcx = thread ptr */
 480         movq    %rdi, %rsi                      /* rsi = lock ptr */
 481         movl    $LS_RW_ENTER_ACQUIRE, %edi
 482         movl    $RW_WRITER, %edx
 483         jmp     lockstat_wrapper_arg
 484         SET_SIZE(rw_enter)
 485 
 486         ENTRY(rw_exit)
 487         movq    (%rdi), %rax                    /* rax = old rw_wwwh value */
 488         cmpl    $RW_READ_LOCK, %eax             /* single-reader, no waiters? */
 489         jne     .rw_not_single_reader
 490         xorl    %edx, %edx                      /* rdx = new value (unheld) */
 491 .rw_read_exit:
 492         lock
 493         cmpxchgq %rdx, (%rdi)                   /* try to drop read lock */
 494         jnz     rw_exit_wakeup
 495 .rw_read_exit_lockstat_patch_point:
 496         ret
 497         movq    %gs:CPU_THREAD, %rcx            /* rcx = thread ptr */
 498         movq    %rdi, %rsi                      /* rsi = lock ptr */
 499         movl    $LS_RW_EXIT_RELEASE, %edi
 500         movl    $RW_READER, %edx
 501         jmp     lockstat_wrapper_arg
 502 .rw_not_single_reader:
 503         testl   $RW_WRITE_LOCKED, %eax  /* write-locked or write-wanted? */
 504         jnz     .rw_write_exit
 505         leaq    -RW_READ_LOCK(%rax), %rdx       /* rdx = new value */
 506         cmpl    $RW_READ_LOCK, %edx
 507         jge     .rw_read_exit           /* not last reader, safe to drop */
 508         jmp     rw_exit_wakeup                  /* last reader with waiters */
 509 .rw_write_exit:
 510         movq    %gs:CPU_THREAD, %rax            /* rax = thread ptr */
 511         xorl    %edx, %edx                      /* rdx = new value (unheld) */
 512         orq     $RW_WRITE_LOCKED, %rax          /* eax = write-locked value */
 513         lock
 514         cmpxchgq %rdx, (%rdi)                   /* try to drop read lock */
 515         jnz     rw_exit_wakeup
 516 .rw_write_exit_lockstat_patch_point:
 517         ret
 518         movq    %gs:CPU_THREAD, %rcx            /* rcx = thread ptr */
 519         movq    %rdi, %rsi                      /* rsi - lock ptr */
 520         movl    $LS_RW_EXIT_RELEASE, %edi
 521         movl    $RW_WRITER, %edx
 522         jmp     lockstat_wrapper_arg
 523         SET_SIZE(rw_exit)
 524 
 525 #if defined(OPTERON_WORKAROUND_6323525)
 526 
 527 /*
 528  * If it is necessary to patch the lock enter routines with the lfence
 529  * workaround, workaround_6323525_patched is set to a non-zero value so that
 530  * the lockstat_hat_patch routine can patch to the new location of the 'ret'
 531  * instruction.
 532  */
 533         DGDEF3(workaround_6323525_patched, 4, 4)
 534         .long   0
 535 
 536 #define HOT_MUTEX_PATCH(srcaddr, dstaddr, size) \
 537         movq    $size, %rbx;                    \
 538         movq    $dstaddr, %r13;                 \
 539         addq    %rbx, %r13;                     \
 540         movq    $srcaddr, %r12;                 \
 541         addq    %rbx, %r12;                     \
 542 0:                                              \
 543         decq    %r13;                           \
 544         decq    %r12;                           \
 545         movzbl  (%r12), %esi;                   \
 546         movq    $1, %rdx;                       \
 547         movq    %r13, %rdi;                     \
 548         call    hot_patch_kernel_text;          \
 549         decq    %rbx;                           \
 550         testq   %rbx, %rbx;                     \
 551         jg      0b;
 552 
 553 /*
 554  * patch_workaround_6323525: provide workaround for 6323525
 555  *
 556  * The workaround is to place a fencing instruction (lfence) between the
 557  * mutex operation and the subsequent read-modify-write instruction.
 558  *
 559  * This routine hot patches the lfence instruction on top of the space
 560  * reserved by nops in the lock enter routines.
 561  */
 562         ENTRY_NP(patch_workaround_6323525)
 563         pushq   %rbp
 564         movq    %rsp, %rbp
 565         pushq   %r12
 566         pushq   %r13
 567         pushq   %rbx
 568 
 569         /*
 570          * lockstat_hot_patch() to use the alternate lockstat workaround
 571          * 6323525 patch points (points past the lfence instruction to the
 572          * new ret) when workaround_6323525_patched is set.
 573          */
 574         movl    $1, workaround_6323525_patched
 575 
 576         /*
 577          * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter
 578          * routines. The 4 bytes are patched in reverse order so that the
 579          * the existing ret is overwritten last. This provides lock enter
 580          * sanity during the intermediate patching stages.
 581          */
 582         HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
 583         HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
 584         HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
 585         HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
 586 
 587         popq    %rbx
 588         popq    %r13
 589         popq    %r12
 590         movq    %rbp, %rsp
 591         popq    %rbp
 592         ret
 593 _lfence_insn:
 594         lfence
 595         ret
 596         SET_SIZE(patch_workaround_6323525)
 597 
 598 
 599 #endif  /* OPTERON_WORKAROUND_6323525 */
 600 
 601 
 602 #define HOT_PATCH(addr, event, active_instr, normal_instr, len) \
 603         movq    $normal_instr, %rsi;            \
 604         movq    $active_instr, %rdi;            \
 605         leaq    lockstat_probemap(%rip), %rax;  \
 606         movl    _MUL(event, DTRACE_IDSIZE)(%rax), %eax; \
 607         testl   %eax, %eax;                     \
 608         jz      9f;                             \
 609         movq    %rdi, %rsi;                     \
 610 9:                                              \
 611         movq    $len, %rdx;                     \
 612         movq    $addr, %rdi;                    \
 613         call    hot_patch_kernel_text
 614 
 615         ENTRY(lockstat_hot_patch)
 616         pushq   %rbp                    /* align stack properly */
 617         movq    %rsp, %rbp
 618 
 619 #if defined(OPTERON_WORKAROUND_6323525)
 620         cmpl    $0, workaround_6323525_patched
 621         je      1f
 622         HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point,
 623                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 624         HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point,
 625                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 626         HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point,
 627                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 628         jmp     2f
 629 1:
 630         HOT_PATCH(.mutex_enter_lockstat_patch_point,
 631                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 632         HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
 633                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 634         HOT_PATCH(.rw_write_enter_lockstat_patch_point,
 635                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 636 2:
 637 #else   /* OPTERON_WORKAROUND_6323525 */
 638         HOT_PATCH(.mutex_enter_lockstat_patch_point,
 639                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 640         HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
 641                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 642         HOT_PATCH(.rw_write_enter_lockstat_patch_point,
 643                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 644 #endif  /* !OPTERON_WORKAROUND_6323525 */
 645         HOT_PATCH(.mutex_exit_lockstat_patch_point,
 646                 LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
 647         HOT_PATCH(.rw_read_enter_lockstat_patch_point,
 648                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 649         HOT_PATCH(.rw_write_exit_lockstat_patch_point,
 650                 LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
 651         HOT_PATCH(.rw_read_exit_lockstat_patch_point,
 652                 LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
 653         HOT_PATCH(.lock_set_lockstat_patch_point,
 654                 LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 655         HOT_PATCH(.lock_try_lockstat_patch_point,
 656                 LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 657         HOT_PATCH(.lock_clear_lockstat_patch_point,
 658                 LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1)
 659         HOT_PATCH(.lock_set_spl_lockstat_patch_point,
 660                 LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 661 
 662         HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT,
 663                 LS_LOCK_CLEAR_SPLX_RELEASE,
 664                 LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1);
 665         leave                   /* unwind stack */
 666         ret
 667         SET_SIZE(lockstat_hot_patch)
 668 
 669         ENTRY(membar_enter)
 670         ALTENTRY(membar_exit)
 671         ALTENTRY(membar_sync)
 672         mfence                  /* lighter weight than lock; xorq $0,(%rsp) */
 673         ret
 674         SET_SIZE(membar_sync)
 675         SET_SIZE(membar_exit)
 676         SET_SIZE(membar_enter)
 677 
 678         ENTRY(membar_producer)
 679         sfence
 680         ret
 681         SET_SIZE(membar_producer)
 682 
 683         ENTRY(membar_consumer)
 684         lfence
 685         ret
 686         SET_SIZE(membar_consumer)
 687 
 688 /*
 689  * thread_onproc()
 690  * Set thread in onproc state for the specified CPU.
 691  * Also set the thread lock pointer to the CPU's onproc lock.
 692  * Since the new lock isn't held, the store ordering is important.
 693  * If not done in assembler, the compiler could reorder the stores.
 694  */
 695 
 696         ENTRY(thread_onproc)
 697         addq    $CPU_THREAD_LOCK, %rsi  /* pointer to disp_lock while running */
 698         movl    $ONPROC_THREAD, T_STATE(%rdi)   /* set state to TS_ONPROC */
 699         movq    %rsi, T_LOCKP(%rdi)     /* store new lock pointer */
 700         ret
 701         SET_SIZE(thread_onproc)
 702 
 703 /*
 704  * mutex_delay_default(void)
 705  * Spins for approx a few hundred processor cycles and returns to caller.
 706  */
 707 
 708         ENTRY(mutex_delay_default)
 709         movq    $92,%r11
 710 0:      decq    %r11
 711         jg      0b
 712         ret
 713         SET_SIZE(mutex_delay_default)
 714