1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Joyent, Inc.
  28  */
  29 
  30 #include "assym.h"
  31 
  32 #include <sys/mutex_impl.h>
  33 #include <sys/asm_linkage.h>
  34 #include <sys/asm_misc.h>
  35 #include <sys/regset.h>
  36 #include <sys/rwlock_impl.h>
  37 #include <sys/lockstat.h>
  38 
  39 /*
  40  * lock_try(lp), ulock_try(lp)
  41  *      - returns non-zero on success.
  42  *      - doesn't block interrupts so don't use this to spin on a lock.
  43  *
  44  * ulock_try() is for a lock in the user address space.
  45  */
  46 
  47         .globl  kernelbase
  48 
  49         ENTRY(lock_try)
  50         movb    $-1, %dl
  51         movzbq  %dl, %rax
  52         xchgb   %dl, (%rdi)
  53         xorb    %dl, %al
  54 .lock_try_lockstat_patch_point:
  55         ret
  56         testb   %al, %al
  57         jnz     0f
  58         ret
  59 0:
  60         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
  61         movq    %rdi, %rsi              /* rsi = lock addr */
  62         movl    $LS_LOCK_TRY_ACQUIRE, %edi /* edi = event */
  63         jmp     lockstat_wrapper
  64         SET_SIZE(lock_try)
  65 
  66         ENTRY(lock_spin_try)
  67         movb    $-1, %dl
  68         movzbq  %dl, %rax
  69         xchgb   %dl, (%rdi)
  70         xorb    %dl, %al
  71         ret
  72         SET_SIZE(lock_spin_try)
  73 
  74         ENTRY(ulock_try)
  75 #ifdef DEBUG
  76         movq    kernelbase(%rip), %rax
  77         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
  78         jb      ulock_pass              /*      uaddr < kernelbase, proceed */
  79 
  80         movq    %rdi, %r12              /* preserve lock ptr for debugging */
  81         leaq    .ulock_panic_msg(%rip), %rdi
  82         pushq   %rbp                    /* align stack properly */
  83         movq    %rsp, %rbp
  84         xorl    %eax, %eax              /* clear for varargs */
  85         call    panic
  86 
  87 #endif /* DEBUG */
  88 
  89 ulock_pass:
  90         movl    $1, %eax
  91         xchgb   %al, (%rdi)
  92         xorb    $1, %al
  93         ret
  94         SET_SIZE(ulock_try)
  95 
  96 #ifdef DEBUG
  97         .data
  98 .ulock_panic_msg:
  99         .string "ulock_try: Argument is above kernelbase"
 100         .text
 101 #endif  /* DEBUG */
 102 
 103 /*
 104  * lock_clear(lp)
 105  *      - unlock lock without changing interrupt priority level.
 106  */
 107 
 108         ENTRY(lock_clear)
 109         movb    $0, (%rdi)
 110 .lock_clear_lockstat_patch_point:
 111         ret
 112         movq    %rdi, %rsi                      /* rsi = lock addr */
 113         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread addr */
 114         movl    $LS_LOCK_CLEAR_RELEASE, %edi    /* edi = event */
 115         jmp     lockstat_wrapper
 116         SET_SIZE(lock_clear)
 117 
 118         ENTRY(ulock_clear)
 119 #ifdef DEBUG
 120         movq    kernelbase(%rip), %rcx
 121         cmpq    %rcx, %rdi              /* test uaddr < kernelbase */
 122         jb      ulock_clr               /*       uaddr < kernelbase, proceed */
 123 
 124         leaq    .ulock_clear_msg(%rip), %rdi
 125         pushq   %rbp                    /* align stack properly */
 126         movq    %rsp, %rbp
 127         xorl    %eax, %eax              /* clear for varargs */
 128         call    panic
 129 #endif
 130 
 131 ulock_clr:
 132         movb    $0, (%rdi)
 133         ret
 134         SET_SIZE(ulock_clear)
 135 
 136 #ifdef DEBUG
 137         .data
 138 .ulock_clear_msg:
 139         .string "ulock_clear: Argument is above kernelbase"
 140         .text
 141 #endif  /* DEBUG */
 142 
 143 
 144 /*
 145  * lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil)
 146  * Drops lp, sets pil to new_pil, stores old pil in *old_pil.
 147  */
 148 
 149         ENTRY(lock_set_spl)
 150         pushq   %rbp
 151         movq    %rsp, %rbp
 152         subq    $32, %rsp
 153         movl    %esi, 8(%rsp)           /* save priority level */
 154         movq    %rdx, 16(%rsp)          /* save old pil ptr */
 155         movq    %rdi, 24(%rsp)          /* save lock pointer */
 156         movl    %esi, %edi              /* pass priority level */
 157         call    splr                    /* raise priority level */
 158         movq    24(%rsp), %rdi          /* rdi = lock addr */
 159         movb    $-1, %dl
 160         xchgb   %dl, (%rdi)             /* try to set lock */
 161         testb   %dl, %dl                /* did we get the lock? ... */
 162         jnz     .lss_miss               /* ... no, go to C for the hard case */
 163         movq    16(%rsp), %rdx          /* rdx = old pil addr */
 164         movw    %ax, (%rdx)             /* store old pil */
 165         leave
 166 .lock_set_spl_lockstat_patch_point:
 167         ret
 168         movq    %rdi, %rsi              /* rsi = lock addr */
 169         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
 170         movl    $LS_LOCK_SET_SPL_ACQUIRE, %edi
 171         jmp     lockstat_wrapper
 172 .lss_miss:
 173         movl    8(%rsp), %esi           /* new_pil */
 174         movq    16(%rsp), %rdx          /* old_pil_addr */
 175         movl    %eax, %ecx              /* original pil */
 176         leave                           /* unwind stack */
 177         jmp     lock_set_spl_spin
 178         SET_SIZE(lock_set_spl)
 179 
 180 /*
 181  * void
 182  * lock_init(lp)
 183  */
 184 
 185         ENTRY(lock_init)
 186         movb    $0, (%rdi)
 187         ret
 188         SET_SIZE(lock_init)
 189 
 190 /*
 191  * void
 192  * lock_set(lp)
 193  */
 194 
 195         ENTRY(lock_set)
 196         movb    $-1, %dl
 197         xchgb   %dl, (%rdi)             /* try to set lock */
 198         testb   %dl, %dl                /* did we get it? */
 199         jnz     lock_set_spin           /* no, go to C for the hard case */
 200 .lock_set_lockstat_patch_point:
 201         ret
 202         movq    %rdi, %rsi              /* rsi = lock addr */
 203         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
 204         movl    $LS_LOCK_SET_ACQUIRE, %edi
 205         jmp     lockstat_wrapper
 206         SET_SIZE(lock_set)
 207 
 208 /*
 209  * lock_clear_splx(lp, s)
 210  */
 211 
 212         ENTRY(lock_clear_splx)
 213         movb    $0, (%rdi)              /* clear lock */
 214 .lock_clear_splx_lockstat_patch_point:
 215         jmp     0f
 216 0:
 217         movl    %esi, %edi              /* arg for splx */
 218         jmp     splx                    /* let splx do its thing */
 219 .lock_clear_splx_lockstat:
 220         pushq   %rbp                    /* align stack properly */
 221         movq    %rsp, %rbp
 222         subq    $16, %rsp               /* space to save args across splx */
 223         movq    %rdi, 8(%rsp)           /* save lock ptr across splx call */
 224         movl    %esi, %edi              /* arg for splx */
 225         call    splx                    /* lower the priority */
 226         movq    8(%rsp), %rsi           /* rsi = lock ptr */
 227         leave                           /* unwind stack */
 228         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
 229         movl    $LS_LOCK_CLEAR_SPLX_RELEASE, %edi
 230         jmp     lockstat_wrapper
 231         SET_SIZE(lock_clear_splx)
 232 
 233 #if defined(__GNUC_AS__)
 234 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL      \
 235         (.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2)
 236 
 237 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT    \
 238         (.lock_clear_splx_lockstat_patch_point + 1)
 239 #else
 240 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL      \
 241         [.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2]
 242 
 243 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT    \
 244         [.lock_clear_splx_lockstat_patch_point + 1]
 245 #endif
 246 
 247 /*
 248  * mutex_enter() and mutex_exit().
 249  *
 250  * These routines handle the simple cases of mutex_enter() (adaptive
 251  * lock, not held) and mutex_exit() (adaptive lock, held, no waiters).
 252  * If anything complicated is going on we punt to mutex_vector_enter().
 253  *
 254  * mutex_tryenter() is similar to mutex_enter() but returns zero if
 255  * the lock cannot be acquired, nonzero on success.
 256  *
 257  * If mutex_exit() gets preempted in the window between checking waiters
 258  * and clearing the lock, we can miss wakeups.  Disabling preemption
 259  * in the mutex code is prohibitively expensive, so instead we detect
 260  * mutex preemption by examining the trapped PC in the interrupt path.
 261  * If we interrupt a thread in mutex_exit() that has not yet cleared
 262  * the lock, cmnint() resets its PC back to the beginning of
 263  * mutex_exit() so it will check again for waiters when it resumes.
 264  *
 265  * The lockstat code below is activated when the lockstat driver
 266  * calls lockstat_hot_patch() to hot-patch the kernel mutex code.
 267  * Note that we don't need to test lockstat_event_mask here -- we won't
 268  * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats.
 269  */
 270 
 271         ENTRY_NP(mutex_enter)
 272         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread ptr */
 273         xorl    %eax, %eax                      /* rax = 0 (unheld adaptive) */
 274         lock
 275         cmpxchgq %rdx, (%rdi)
 276         jnz     mutex_vector_enter
 277 .mutex_enter_lockstat_patch_point:
 278 #if defined(OPTERON_WORKAROUND_6323525)
 279 .mutex_enter_6323525_patch_point:
 280         ret                                     /* nop space for lfence */
 281         nop
 282         nop
 283 .mutex_enter_lockstat_6323525_patch_point:      /* new patch point if lfence */
 284         nop
 285 #else   /* OPTERON_WORKAROUND_6323525 */
 286         ret
 287 #endif  /* OPTERON_WORKAROUND_6323525 */
 288         movq    %rdi, %rsi
 289         movl    $LS_MUTEX_ENTER_ACQUIRE, %edi
 290 /*
 291  * expects %rdx=thread, %rsi=lock, %edi=lockstat event
 292  */
 293         ALTENTRY(lockstat_wrapper)
 294         incb    T_LOCKSTAT(%rdx)                /* curthread->t_lockstat++ */
 295         leaq    lockstat_probemap(%rip), %rax
 296         movl    (%rax, %rdi, DTRACE_IDSIZE), %eax
 297         testl   %eax, %eax                      /* check for non-zero probe */
 298         jz      1f
 299         pushq   %rbp                            /* align stack properly */
 300         movq    %rsp, %rbp
 301         movl    %eax, %edi
 302         movq    lockstat_probe, %rax
 303         INDIRECT_CALL_REG(rax)
 304         leave                                   /* unwind stack */
 305 1:
 306         movq    %gs:CPU_THREAD, %rdx            /* reload thread ptr */
 307         decb    T_LOCKSTAT(%rdx)                /* curthread->t_lockstat-- */
 308         movl    $1, %eax                        /* return success if tryenter */
 309         ret
 310         SET_SIZE(lockstat_wrapper)
 311         SET_SIZE(mutex_enter)
 312 
 313 /*
 314  * expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event
 315  */
 316         ENTRY(lockstat_wrapper_arg)
 317         incb    T_LOCKSTAT(%rcx)                /* curthread->t_lockstat++ */
 318         leaq    lockstat_probemap(%rip), %rax
 319         movl    (%rax, %rdi, DTRACE_IDSIZE), %eax
 320         testl   %eax, %eax                      /* check for non-zero probe */
 321         jz      1f
 322         pushq   %rbp                            /* align stack properly */
 323         movq    %rsp, %rbp
 324         movl    %eax, %edi
 325         movq    lockstat_probe, %rax
 326         INDIRECT_CALL_REG(rax)
 327         leave                                   /* unwind stack */
 328 1:
 329         movq    %gs:CPU_THREAD, %rdx            /* reload thread ptr */
 330         decb    T_LOCKSTAT(%rdx)                /* curthread->t_lockstat-- */
 331         movl    $1, %eax                        /* return success if tryenter */
 332         ret
 333         SET_SIZE(lockstat_wrapper_arg)
 334 
 335 
 336         ENTRY(mutex_tryenter)
 337         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread ptr */
 338         xorl    %eax, %eax                      /* rax = 0 (unheld adaptive) */
 339         lock
 340         cmpxchgq %rdx, (%rdi)
 341         jnz     mutex_vector_tryenter
 342         not     %eax                            /* return success (nonzero) */
 343 #if defined(OPTERON_WORKAROUND_6323525)
 344 .mutex_tryenter_lockstat_patch_point:
 345 .mutex_tryenter_6323525_patch_point:
 346         ret                                     /* nop space for lfence */
 347         nop
 348         nop
 349 .mutex_tryenter_lockstat_6323525_patch_point:   /* new patch point if lfence */
 350         nop
 351 #else   /* OPTERON_WORKAROUND_6323525 */
 352 .mutex_tryenter_lockstat_patch_point:
 353         ret
 354 #endif  /* OPTERON_WORKAROUND_6323525 */
 355         movq    %rdi, %rsi
 356         movl    $LS_MUTEX_ENTER_ACQUIRE, %edi
 357         jmp     lockstat_wrapper
 358         SET_SIZE(mutex_tryenter)
 359 
 360         ENTRY(mutex_adaptive_tryenter)
 361         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread ptr */
 362         xorl    %eax, %eax                      /* rax = 0 (unheld adaptive) */
 363         lock
 364         cmpxchgq %rdx, (%rdi)
 365         jnz     0f
 366         not     %eax                            /* return success (nonzero) */
 367 #if defined(OPTERON_WORKAROUND_6323525)
 368 .mutex_atryenter_6323525_patch_point:
 369         ret                                     /* nop space for lfence */
 370         nop
 371         nop
 372         nop
 373 #else   /* OPTERON_WORKAROUND_6323525 */
 374         ret
 375 #endif  /* OPTERON_WORKAROUND_6323525 */
 376 0:
 377         xorl    %eax, %eax                      /* return failure */
 378         ret
 379         SET_SIZE(mutex_adaptive_tryenter)
 380 
 381         .globl  mutex_owner_running_critical_start
 382 
 383         ENTRY(mutex_owner_running)
 384 mutex_owner_running_critical_start:
 385         movq    (%rdi), %r11            /* get owner field */
 386         andq    $MUTEX_THREAD, %r11     /* remove waiters bit */
 387         cmpq    $0, %r11                /* if free, skip */
 388         je      1f                      /* go return 0 */
 389         movq    T_CPU(%r11), %r8        /* get owner->t_cpu */
 390         movq    CPU_THREAD(%r8), %r9    /* get t_cpu->cpu_thread */
 391 .mutex_owner_running_critical_end:
 392         cmpq    %r11, %r9       /* owner == running thread? */
 393         je      2f              /* yes, go return cpu */
 394 1:
 395         xorq    %rax, %rax      /* return 0 */
 396         ret
 397 2:
 398         movq    %r8, %rax               /* return cpu */
 399         ret
 400         SET_SIZE(mutex_owner_running)
 401 
 402         .globl  mutex_owner_running_critical_size
 403         .type   mutex_owner_running_critical_size, @object
 404         .align  CPTRSIZE
 405 mutex_owner_running_critical_size:
 406         .quad   .mutex_owner_running_critical_end - mutex_owner_running_critical_start
 407         SET_SIZE(mutex_owner_running_critical_size)
 408 
 409         .globl  mutex_exit_critical_start
 410 
 411         ENTRY(mutex_exit)
 412 mutex_exit_critical_start:              /* If interrupted, restart here */
 413         movq    %gs:CPU_THREAD, %rdx
 414         cmpq    %rdx, (%rdi)
 415         jne     mutex_vector_exit               /* wrong type or wrong owner */
 416         movq    $0, (%rdi)                      /* clear owner AND lock */
 417 .mutex_exit_critical_end:
 418 .mutex_exit_lockstat_patch_point:
 419         ret
 420         movq    %rdi, %rsi
 421         movl    $LS_MUTEX_EXIT_RELEASE, %edi
 422         jmp     lockstat_wrapper
 423         SET_SIZE(mutex_exit)
 424 
 425         .globl  mutex_exit_critical_size
 426         .type   mutex_exit_critical_size, @object
 427         .align  CPTRSIZE
 428 mutex_exit_critical_size:
 429         .quad   .mutex_exit_critical_end - mutex_exit_critical_start
 430         SET_SIZE(mutex_exit_critical_size)
 431 
 432 /*
 433  * rw_enter() and rw_exit().
 434  *
 435  * These routines handle the simple cases of rw_enter (write-locking an unheld
 436  * lock or read-locking a lock that's neither write-locked nor write-wanted)
 437  * and rw_exit (no waiters or not the last reader).  If anything complicated
 438  * is going on we punt to rw_enter_sleep() and rw_exit_wakeup(), respectively.
 439  */
 440 
 441         ENTRY(rw_enter)
 442         cmpl    $RW_WRITER, %esi
 443         je      .rw_write_enter
 444         movq    (%rdi), %rax                    /* rax = old rw_wwwh value */
 445         testl   $RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax
 446         jnz     rw_enter_sleep
 447         leaq    RW_READ_LOCK(%rax), %rdx        /* rdx = new rw_wwwh value */
 448         lock
 449         cmpxchgq %rdx, (%rdi)                   /* try to grab read lock */
 450         jnz     rw_enter_sleep
 451 .rw_read_enter_lockstat_patch_point:
 452         ret
 453         movq    %gs:CPU_THREAD, %rcx            /* rcx = thread ptr */
 454         movq    %rdi, %rsi                      /* rsi = lock ptr */
 455         movl    $LS_RW_ENTER_ACQUIRE, %edi
 456         movl    $RW_READER, %edx
 457         jmp     lockstat_wrapper_arg
 458 .rw_write_enter:
 459         movq    %gs:CPU_THREAD, %rdx
 460         orq     $RW_WRITE_LOCKED, %rdx          /* rdx = write-locked value */
 461         xorl    %eax, %eax                      /* rax = unheld value */
 462         lock
 463         cmpxchgq %rdx, (%rdi)                   /* try to grab write lock */
 464         jnz     rw_enter_sleep
 465 
 466 #if defined(OPTERON_WORKAROUND_6323525)
 467 .rw_write_enter_lockstat_patch_point:
 468 .rw_write_enter_6323525_patch_point:
 469         ret
 470         nop
 471         nop
 472 .rw_write_enter_lockstat_6323525_patch_point:
 473         nop
 474 #else   /* OPTERON_WORKAROUND_6323525 */
 475 .rw_write_enter_lockstat_patch_point:
 476         ret
 477 #endif  /* OPTERON_WORKAROUND_6323525 */
 478 
 479         movq    %gs:CPU_THREAD, %rcx            /* rcx = thread ptr */
 480         movq    %rdi, %rsi                      /* rsi = lock ptr */
 481         movl    $LS_RW_ENTER_ACQUIRE, %edi
 482         movl    $RW_WRITER, %edx
 483         jmp     lockstat_wrapper_arg
 484         SET_SIZE(rw_enter)
 485 
 486         ENTRY(rw_exit)
 487         movq    (%rdi), %rax                    /* rax = old rw_wwwh value */
 488         cmpl    $RW_READ_LOCK, %eax             /* single-reader, no waiters? */
 489         jne     .rw_not_single_reader
 490         xorl    %edx, %edx                      /* rdx = new value (unheld) */
 491 .rw_read_exit:
 492         lock
 493         cmpxchgq %rdx, (%rdi)                   /* try to drop read lock */
 494         jnz     rw_exit_wakeup
 495 .rw_read_exit_lockstat_patch_point:
 496         ret
 497         movq    %rdi, %rsi                      /* rsi = lock ptr */
 498         movl    $LS_RW_EXIT_RELEASE, %edi
 499         movl    $RW_READER, %edx
 500         jmp     lockstat_wrapper_arg
 501 .rw_not_single_reader:
 502         testl   $RW_WRITE_LOCKED, %eax  /* write-locked or write-wanted? */
 503         jnz     .rw_write_exit
 504         leaq    -RW_READ_LOCK(%rax), %rdx       /* rdx = new value */
 505         cmpl    $RW_READ_LOCK, %edx
 506         jge     .rw_read_exit           /* not last reader, safe to drop */
 507         jmp     rw_exit_wakeup                  /* last reader with waiters */
 508 .rw_write_exit:
 509         movq    %gs:CPU_THREAD, %rax            /* rax = thread ptr */
 510         xorl    %edx, %edx                      /* rdx = new value (unheld) */
 511         orq     $RW_WRITE_LOCKED, %rax          /* eax = write-locked value */
 512         lock
 513         cmpxchgq %rdx, (%rdi)                   /* try to drop read lock */
 514         jnz     rw_exit_wakeup
 515 .rw_write_exit_lockstat_patch_point:
 516         ret
 517         movq    %gs:CPU_THREAD, %rcx            /* rcx = thread ptr */
 518         movq    %rdi, %rsi                      /* rsi - lock ptr */
 519         movl    $LS_RW_EXIT_RELEASE, %edi
 520         movl    $RW_WRITER, %edx
 521         jmp     lockstat_wrapper_arg
 522         SET_SIZE(rw_exit)
 523 
 524 #if defined(OPTERON_WORKAROUND_6323525)
 525 
 526 /*
 527  * If it is necessary to patch the lock enter routines with the lfence
 528  * workaround, workaround_6323525_patched is set to a non-zero value so that
 529  * the lockstat_hat_patch routine can patch to the new location of the 'ret'
 530  * instruction.
 531  */
 532         DGDEF3(workaround_6323525_patched, 4, 4)
 533         .long   0
 534 
 535 #define HOT_MUTEX_PATCH(srcaddr, dstaddr, size) \
 536         movq    $size, %rbx;                    \
 537         movq    $dstaddr, %r13;                 \
 538         addq    %rbx, %r13;                     \
 539         movq    $srcaddr, %r12;                 \
 540         addq    %rbx, %r12;                     \
 541 0:                                              \
 542         decq    %r13;                           \
 543         decq    %r12;                           \
 544         movzbl  (%r12), %esi;                   \
 545         movq    $1, %rdx;                       \
 546         movq    %r13, %rdi;                     \
 547         call    hot_patch_kernel_text;          \
 548         decq    %rbx;                           \
 549         testq   %rbx, %rbx;                     \
 550         jg      0b;
 551 
 552 /*
 553  * patch_workaround_6323525: provide workaround for 6323525
 554  *
 555  * The workaround is to place a fencing instruction (lfence) between the
 556  * mutex operation and the subsequent read-modify-write instruction.
 557  *
 558  * This routine hot patches the lfence instruction on top of the space
 559  * reserved by nops in the lock enter routines.
 560  */
 561         ENTRY_NP(patch_workaround_6323525)
 562         pushq   %rbp
 563         movq    %rsp, %rbp
 564         pushq   %r12
 565         pushq   %r13
 566         pushq   %rbx
 567 
 568         /*
 569          * lockstat_hot_patch() to use the alternate lockstat workaround
 570          * 6323525 patch points (points past the lfence instruction to the
 571          * new ret) when workaround_6323525_patched is set.
 572          */
 573         movl    $1, workaround_6323525_patched
 574 
 575         /*
 576          * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter
 577          * routines. The 4 bytes are patched in reverse order so that the
 578          * the existing ret is overwritten last. This provides lock enter
 579          * sanity during the intermediate patching stages.
 580          */
 581         HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
 582         HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
 583         HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
 584         HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
 585 
 586         popq    %rbx
 587         popq    %r13
 588         popq    %r12
 589         movq    %rbp, %rsp
 590         popq    %rbp
 591         ret
 592 _lfence_insn:
 593         lfence
 594         ret
 595         SET_SIZE(patch_workaround_6323525)
 596 
 597 
 598 #endif  /* OPTERON_WORKAROUND_6323525 */
 599 
 600 
 601 #define HOT_PATCH(addr, event, active_instr, normal_instr, len) \
 602         movq    $normal_instr, %rsi;            \
 603         movq    $active_instr, %rdi;            \
 604         leaq    lockstat_probemap(%rip), %rax;  \
 605         movl    _MUL(event, DTRACE_IDSIZE)(%rax), %eax; \
 606         testl   %eax, %eax;                     \
 607         jz      9f;                             \
 608         movq    %rdi, %rsi;                     \
 609 9:                                              \
 610         movq    $len, %rdx;                     \
 611         movq    $addr, %rdi;                    \
 612         call    hot_patch_kernel_text
 613 
 614         ENTRY(lockstat_hot_patch)
 615         pushq   %rbp                    /* align stack properly */
 616         movq    %rsp, %rbp
 617 
 618 #if defined(OPTERON_WORKAROUND_6323525)
 619         cmpl    $0, workaround_6323525_patched
 620         je      1f
 621         HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point,
 622                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 623         HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point,
 624                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 625         HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point,
 626                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 627         jmp     2f
 628 1:
 629         HOT_PATCH(.mutex_enter_lockstat_patch_point,
 630                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 631         HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
 632                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 633         HOT_PATCH(.rw_write_enter_lockstat_patch_point,
 634                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 635 2:
 636 #else   /* OPTERON_WORKAROUND_6323525 */
 637         HOT_PATCH(.mutex_enter_lockstat_patch_point,
 638                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 639         HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
 640                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 641         HOT_PATCH(.rw_write_enter_lockstat_patch_point,
 642                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 643 #endif  /* !OPTERON_WORKAROUND_6323525 */
 644         HOT_PATCH(.mutex_exit_lockstat_patch_point,
 645                 LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
 646         HOT_PATCH(.rw_read_enter_lockstat_patch_point,
 647                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 648         HOT_PATCH(.rw_write_exit_lockstat_patch_point,
 649                 LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
 650         HOT_PATCH(.rw_read_exit_lockstat_patch_point,
 651                 LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
 652         HOT_PATCH(.lock_set_lockstat_patch_point,
 653                 LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 654         HOT_PATCH(.lock_try_lockstat_patch_point,
 655                 LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 656         HOT_PATCH(.lock_clear_lockstat_patch_point,
 657                 LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1)
 658         HOT_PATCH(.lock_set_spl_lockstat_patch_point,
 659                 LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 660 
 661         HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT,
 662                 LS_LOCK_CLEAR_SPLX_RELEASE,
 663                 LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1);
 664         leave                   /* unwind stack */
 665         ret
 666         SET_SIZE(lockstat_hot_patch)
 667 
 668         ENTRY(membar_enter)
 669         ALTENTRY(membar_exit)
 670         ALTENTRY(membar_sync)
 671         mfence                  /* lighter weight than lock; xorq $0,(%rsp) */
 672         ret
 673         SET_SIZE(membar_sync)
 674         SET_SIZE(membar_exit)
 675         SET_SIZE(membar_enter)
 676 
 677         ENTRY(membar_producer)
 678         sfence
 679         ret
 680         SET_SIZE(membar_producer)
 681 
 682         ENTRY(membar_consumer)
 683         lfence
 684         ret
 685         SET_SIZE(membar_consumer)
 686 
 687 /*
 688  * thread_onproc()
 689  * Set thread in onproc state for the specified CPU.
 690  * Also set the thread lock pointer to the CPU's onproc lock.
 691  * Since the new lock isn't held, the store ordering is important.
 692  * If not done in assembler, the compiler could reorder the stores.
 693  */
 694 
 695         ENTRY(thread_onproc)
 696         addq    $CPU_THREAD_LOCK, %rsi  /* pointer to disp_lock while running */
 697         movl    $ONPROC_THREAD, T_STATE(%rdi)   /* set state to TS_ONPROC */
 698         movq    %rsi, T_LOCKP(%rdi)     /* store new lock pointer */
 699         ret
 700         SET_SIZE(thread_onproc)
 701 
 702 /*
 703  * mutex_delay_default(void)
 704  * Spins for approx a few hundred processor cycles and returns to caller.
 705  */
 706 
 707         ENTRY(mutex_delay_default)
 708         movq    $92,%r11
 709 0:      decq    %r11
 710         jg      0b
 711         ret
 712         SET_SIZE(mutex_delay_default)
 713