1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Joyent, Inc.
  28  */
  29 
  30 #include "assym.h"
  31 
  32 #include <sys/mutex_impl.h>
  33 #include <sys/asm_linkage.h>
  34 #include <sys/asm_misc.h>
  35 #include <sys/regset.h>
  36 #include <sys/rwlock_impl.h>
  37 #include <sys/lockstat.h>
  38 
  39 /*
  40  * lock_try(lp), ulock_try(lp)
  41  *      - returns non-zero on success.
  42  *      - doesn't block interrupts so don't use this to spin on a lock.
  43  *
  44  * ulock_try() is for a lock in the user address space.
  45  */
  46 
  47         .globl  kernelbase
  48 
  49         ENTRY(lock_try)
  50         movb    $-1, %dl
  51         movzbq  %dl, %rax
  52         xchgb   %dl, (%rdi)
  53         xorb    %dl, %al
  54 .lock_try_lockstat_patch_point:
  55         ret
  56         testb   %al, %al
  57         jnz     0f
  58         ret
  59 0:
  60         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
  61         movq    %rdi, %rsi              /* rsi = lock addr */
  62         movl    $LS_LOCK_TRY_ACQUIRE, %edi /* edi = event */
  63         jmp     lockstat_wrapper
  64         SET_SIZE(lock_try)
  65 
  66         ENTRY(lock_spin_try)
  67         movb    $-1, %dl
  68         movzbq  %dl, %rax
  69         xchgb   %dl, (%rdi)
  70         xorb    %dl, %al
  71         ret
  72         SET_SIZE(lock_spin_try)
  73 
  74         ENTRY(ulock_try)
  75 #ifdef DEBUG
  76         movq    kernelbase(%rip), %rax
  77         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
  78         jb      ulock_pass              /*      uaddr < kernelbase, proceed */
  79 
  80         movq    %rdi, %r12              /* preserve lock ptr for debugging */
  81         leaq    .ulock_panic_msg(%rip), %rdi
  82         pushq   %rbp                    /* align stack properly */
  83         movq    %rsp, %rbp
  84         xorl    %eax, %eax              /* clear for varargs */
  85         call    panic
  86 
  87 #endif /* DEBUG */
  88 
  89 ulock_pass:
  90         movl    $1, %eax
  91         xchgb   %al, (%rdi)
  92         xorb    $1, %al
  93         ret
  94         SET_SIZE(ulock_try)
  95 
  96 #ifdef DEBUG
  97         .data
  98 .ulock_panic_msg:
  99         .string "ulock_try: Argument is above kernelbase"
 100         .text
 101 #endif  /* DEBUG */
 102 
 103 /*
 104  * lock_clear(lp)
 105  *      - unlock lock without changing interrupt priority level.
 106  */
 107 
 108         ENTRY(lock_clear)
 109         movb    $0, (%rdi)
 110 .lock_clear_lockstat_patch_point:
 111         ret
 112         movq    %rdi, %rsi                      /* rsi = lock addr */
 113         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread addr */
 114         movl    $LS_LOCK_CLEAR_RELEASE, %edi    /* edi = event */
 115         jmp     lockstat_wrapper
 116         SET_SIZE(lock_clear)
 117 
 118         ENTRY(ulock_clear)
 119 #ifdef DEBUG
 120         movq    kernelbase(%rip), %rcx
 121         cmpq    %rcx, %rdi              /* test uaddr < kernelbase */
 122         jb      ulock_clr               /*       uaddr < kernelbase, proceed */
 123 
 124         leaq    .ulock_clear_msg(%rip), %rdi
 125         pushq   %rbp                    /* align stack properly */
 126         movq    %rsp, %rbp
 127         xorl    %eax, %eax              /* clear for varargs */
 128         call    panic
 129 #endif
 130 
 131 ulock_clr:
 132         movb    $0, (%rdi)
 133         ret
 134         SET_SIZE(ulock_clear)
 135 
 136 #ifdef DEBUG
 137         .data
 138 .ulock_clear_msg:
 139         .string "ulock_clear: Argument is above kernelbase"
 140         .text
 141 #endif  /* DEBUG */
 142 
 143 
 144 /*
 145  * lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil)
 146  * Drops lp, sets pil to new_pil, stores old pil in *old_pil.
 147  */
 148 
 149         ENTRY(lock_set_spl)
 150         pushq   %rbp
 151         movq    %rsp, %rbp
 152         subq    $32, %rsp
 153         movl    %esi, 8(%rsp)           /* save priority level */
 154         movq    %rdx, 16(%rsp)          /* save old pil ptr */
 155         movq    %rdi, 24(%rsp)          /* save lock pointer */
 156         movl    %esi, %edi              /* pass priority level */
 157         call    splr                    /* raise priority level */
 158         movq    24(%rsp), %rdi          /* rdi = lock addr */
 159         movb    $-1, %dl
 160         xchgb   %dl, (%rdi)             /* try to set lock */
 161         testb   %dl, %dl                /* did we get the lock? ... */
 162         jnz     .lss_miss               /* ... no, go to C for the hard case */
 163         movq    16(%rsp), %rdx          /* rdx = old pil addr */
 164         movw    %ax, (%rdx)             /* store old pil */
 165         leave
 166 .lock_set_spl_lockstat_patch_point:
 167         ret
 168         movq    %rdi, %rsi              /* rsi = lock addr */
 169         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
 170         movl    $LS_LOCK_SET_SPL_ACQUIRE, %edi
 171         jmp     lockstat_wrapper
 172 .lss_miss:
 173         movl    8(%rsp), %esi           /* new_pil */
 174         movq    16(%rsp), %rdx          /* old_pil_addr */
 175         movl    %eax, %ecx              /* original pil */
 176         leave                           /* unwind stack */
 177         jmp     lock_set_spl_spin
 178         SET_SIZE(lock_set_spl)
 179 
 180 /*
 181  * void
 182  * lock_init(lp)
 183  */
 184 
 185         ENTRY(lock_init)
 186         movb    $0, (%rdi)
 187         ret
 188         SET_SIZE(lock_init)
 189 
 190 /*
 191  * void
 192  * lock_set(lp)
 193  */
 194 
 195         ENTRY(lock_set)
 196         movb    $-1, %dl
 197         xchgb   %dl, (%rdi)             /* try to set lock */
 198         testb   %dl, %dl                /* did we get it? */
 199         jnz     lock_set_spin           /* no, go to C for the hard case */
 200 .lock_set_lockstat_patch_point:
 201         ret
 202         movq    %rdi, %rsi              /* rsi = lock addr */
 203         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
 204         movl    $LS_LOCK_SET_ACQUIRE, %edi
 205         jmp     lockstat_wrapper
 206         SET_SIZE(lock_set)
 207 
 208 /*
 209  * lock_clear_splx(lp, s)
 210  */
 211 
 212         ENTRY(lock_clear_splx)
 213         movb    $0, (%rdi)              /* clear lock */
 214 .lock_clear_splx_lockstat_patch_point:
 215         jmp     0f
 216 0:
 217         movl    %esi, %edi              /* arg for splx */
 218         jmp     splx                    /* let splx do its thing */
 219 .lock_clear_splx_lockstat:
 220         pushq   %rbp                    /* align stack properly */
 221         movq    %rsp, %rbp
 222         subq    $16, %rsp               /* space to save args across splx */
 223         movq    %rdi, 8(%rsp)           /* save lock ptr across splx call */
 224         movl    %esi, %edi              /* arg for splx */
 225         call    splx                    /* lower the priority */
 226         movq    8(%rsp), %rsi           /* rsi = lock ptr */
 227         leave                           /* unwind stack */
 228         movq    %gs:CPU_THREAD, %rdx    /* rdx = thread addr */
 229         movl    $LS_LOCK_CLEAR_SPLX_RELEASE, %edi
 230         jmp     lockstat_wrapper
 231         SET_SIZE(lock_clear_splx)
 232 
 233 #if defined(__GNUC_AS__)
 234 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL      \
 235         (.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2)
 236 
 237 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT    \
 238         (.lock_clear_splx_lockstat_patch_point + 1)
 239 #else
 240 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL      \
 241         [.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2]
 242 
 243 #define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT    \
 244         [.lock_clear_splx_lockstat_patch_point + 1]
 245 #endif
 246 
 247 /*
 248  * mutex_enter() and mutex_exit().
 249  *
 250  * These routines handle the simple cases of mutex_enter() (adaptive
 251  * lock, not held) and mutex_exit() (adaptive lock, held, no waiters).
 252  * If anything complicated is going on we punt to mutex_vector_enter().
 253  *
 254  * mutex_tryenter() is similar to mutex_enter() but returns zero if
 255  * the lock cannot be acquired, nonzero on success.
 256  *
 257  * If mutex_exit() gets preempted in the window between checking waiters
 258  * and clearing the lock, we can miss wakeups.  Disabling preemption
 259  * in the mutex code is prohibitively expensive, so instead we detect
 260  * mutex preemption by examining the trapped PC in the interrupt path.
 261  * If we interrupt a thread in mutex_exit() that has not yet cleared
 262  * the lock, cmnint() resets its PC back to the beginning of
 263  * mutex_exit() so it will check again for waiters when it resumes.
 264  *
 265  * The lockstat code below is activated when the lockstat driver
 266  * calls lockstat_hot_patch() to hot-patch the kernel mutex code.
 267  * Note that we don't need to test lockstat_event_mask here -- we won't
 268  * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats.
 269  */
 270 
 271         ENTRY_NP(mutex_enter)
 272         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread ptr */
 273         xorl    %eax, %eax                      /* rax = 0 (unheld adaptive) */
 274         lock
 275         cmpxchgq %rdx, (%rdi)
 276         jnz     mutex_vector_enter
 277 .mutex_enter_lockstat_patch_point:
 278 #if defined(OPTERON_WORKAROUND_6323525)
 279 .mutex_enter_6323525_patch_point:
 280         ret                                     /* nop space for lfence */
 281         nop
 282         nop
 283 .mutex_enter_lockstat_6323525_patch_point:      /* new patch point if lfence */ 
 284         nop
 285 #else   /* OPTERON_WORKAROUND_6323525 */
 286         ret
 287 #endif  /* OPTERON_WORKAROUND_6323525 */
 288         movq    %rdi, %rsi
 289         movl    $LS_MUTEX_ENTER_ACQUIRE, %edi
 290 /*
 291  * expects %rdx=thread, %rsi=lock, %edi=lockstat event
 292  */
 293         ALTENTRY(lockstat_wrapper)
 294         incb    T_LOCKSTAT(%rdx)                /* curthread->t_lockstat++ */
 295         leaq    lockstat_probemap(%rip), %rax
 296         movl    (%rax, %rdi, DTRACE_IDSIZE), %eax
 297         testl   %eax, %eax                      /* check for non-zero probe */
 298         jz      1f
 299         pushq   %rbp                            /* align stack properly */
 300         movq    %rsp, %rbp
 301         movl    %eax, %edi
 302         movq    lockstat_probe, %rax
 303         INDIRECT_CALL_REG(rax)
 304         leave                                   /* unwind stack */
 305 1:
 306         movq    %gs:CPU_THREAD, %rdx            /* reload thread ptr */
 307         decb    T_LOCKSTAT(%rdx)                /* curthread->t_lockstat-- */
 308         movl    $1, %eax                        /* return success if tryenter */
 309         ret
 310         SET_SIZE(lockstat_wrapper)
 311         SET_SIZE(mutex_enter)
 312 
 313 /*
 314  * expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event
 315  */
 316         ENTRY(lockstat_wrapper_arg)
 317         incb    T_LOCKSTAT(%rcx)                /* curthread->t_lockstat++ */
 318         leaq    lockstat_probemap(%rip), %rax
 319         movl    (%rax, %rdi, DTRACE_IDSIZE), %eax
 320         testl   %eax, %eax                      /* check for non-zero probe */
 321         jz      1f
 322         pushq   %rbp                            /* align stack properly */
 323         movq    %rsp, %rbp
 324         movl    %eax, %edi
 325         movq    lockstat_probe, %rax
 326         INDIRECT_CALL_REG(rax)
 327         leave                                   /* unwind stack */
 328 1:
 329         movq    %gs:CPU_THREAD, %rdx            /* reload thread ptr */
 330         decb    T_LOCKSTAT(%rdx)                /* curthread->t_lockstat-- */
 331         movl    $1, %eax                        /* return success if tryenter */
 332         ret
 333         SET_SIZE(lockstat_wrapper_arg)
 334 
 335 
 336         ENTRY(mutex_tryenter)
 337         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread ptr */
 338         xorl    %eax, %eax                      /* rax = 0 (unheld adaptive) */
 339         lock
 340         cmpxchgq %rdx, (%rdi)
 341         jnz     mutex_vector_tryenter
 342         not     %eax                            /* return success (nonzero) */
 343 #if defined(OPTERON_WORKAROUND_6323525)
 344 .mutex_tryenter_lockstat_patch_point:
 345 .mutex_tryenter_6323525_patch_point:
 346         ret                                     /* nop space for lfence */
 347         nop
 348         nop
 349 .mutex_tryenter_lockstat_6323525_patch_point:   /* new patch point if lfence */
 350         nop
 351 #else   /* OPTERON_WORKAROUND_6323525 */
 352 .mutex_tryenter_lockstat_patch_point:
 353         ret
 354 #endif  /* OPTERON_WORKAROUND_6323525 */
 355         movq    %rdi, %rsi
 356         movl    $LS_MUTEX_ENTER_ACQUIRE, %edi
 357         jmp     lockstat_wrapper
 358         SET_SIZE(mutex_tryenter)
 359 
 360         ENTRY(mutex_adaptive_tryenter)
 361         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread ptr */
 362         xorl    %eax, %eax                      /* rax = 0 (unheld adaptive) */
 363         lock
 364         cmpxchgq %rdx, (%rdi)
 365         jnz     0f
 366         not     %eax                            /* return success (nonzero) */
 367 #if defined(OPTERON_WORKAROUND_6323525)
 368 .mutex_atryenter_6323525_patch_point:
 369         ret                                     /* nop space for lfence */
 370         nop
 371         nop
 372         nop
 373 #else   /* OPTERON_WORKAROUND_6323525 */
 374         ret
 375 #endif  /* OPTERON_WORKAROUND_6323525 */
 376 0:
 377         xorl    %eax, %eax                      /* return failure */
 378         ret
 379         SET_SIZE(mutex_adaptive_tryenter)
 380 
 381         .globl  mutex_owner_running_critical_start
 382 
 383         ENTRY(mutex_owner_running)
 384 mutex_owner_running_critical_start:
 385         movq    (%rdi), %r11            /* get owner field */
 386         andq    $MUTEX_THREAD, %r11     /* remove waiters bit */
 387         cmpq    $0, %r11                /* if free, skip */
 388         je      1f                      /* go return 0 */
 389         movq    T_CPU(%r11), %r8        /* get owner->t_cpu */
 390         movq    CPU_THREAD(%r8), %r9    /* get t_cpu->cpu_thread */
 391 .mutex_owner_running_critical_end:
 392         cmpq    %r11, %r9       /* owner == running thread? */
 393         je      2f              /* yes, go return cpu */
 394 1:
 395         xorq    %rax, %rax      /* return 0 */
 396         ret
 397 2:
 398         movq    %r8, %rax               /* return cpu */
 399         ret
 400         SET_SIZE(mutex_owner_running)
 401 
 402         .globl  mutex_owner_running_critical_size
 403         .type   mutex_owner_running_critical_size, @object
 404         .align  CPTRSIZE
 405 mutex_owner_running_critical_size:
 406         .quad   .mutex_owner_running_critical_end - mutex_owner_running_critical_start
 407         SET_SIZE(mutex_owner_running_critical_size)
 408 
 409         .globl  mutex_exit_critical_start
 410 
 411         ENTRY(mutex_exit)
 412 mutex_exit_critical_start:              /* If interrupted, restart here */
 413         movq    %gs:CPU_THREAD, %rdx
 414         cmpq    %rdx, (%rdi)
 415         jne     mutex_vector_exit               /* wrong type or wrong owner */
 416         movq    $0, (%rdi)                      /* clear owner AND lock */
 417 .mutex_exit_critical_end:
 418 .mutex_exit_lockstat_patch_point:
 419         ret
 420         movq    %rdi, %rsi
 421         movl    $LS_MUTEX_EXIT_RELEASE, %edi
 422         jmp     lockstat_wrapper
 423         SET_SIZE(mutex_exit)
 424 
 425         .globl  mutex_exit_critical_size
 426         .type   mutex_exit_critical_size, @object
 427         .align  CPTRSIZE
 428 mutex_exit_critical_size:
 429         .quad   .mutex_exit_critical_end - mutex_exit_critical_start
 430         SET_SIZE(mutex_exit_critical_size)
 431 
 432 /*
 433  * rw_enter() and rw_exit().
 434  *
 435  * These routines handle the simple cases of rw_enter (write-locking an unheld
 436  * lock or read-locking a lock that's neither write-locked nor write-wanted)
 437  * and rw_exit (no waiters or not the last reader).  If anything complicated
 438  * is going on we punt to rw_enter_sleep() and rw_exit_wakeup(), respectively.
 439  */
 440 
 441         ENTRY(rw_enter)
 442         movq    %gs:CPU_THREAD, %rdx            /* rdx = thread ptr */
 443         cmpl    $RW_WRITER, %esi
 444         je      .rw_write_enter
 445         incl    T_KPRI_REQ(%rdx)                /* THREAD_KPRI_REQUEST() */
 446         movq    (%rdi), %rax                    /* rax = old rw_wwwh value */
 447         testl   $RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax
 448         jnz     rw_enter_sleep
 449         leaq    RW_READ_LOCK(%rax), %rdx        /* rdx = new rw_wwwh value */
 450         lock
 451         cmpxchgq %rdx, (%rdi)                   /* try to grab read lock */
 452         jnz     rw_enter_sleep
 453 .rw_read_enter_lockstat_patch_point:
 454         ret
 455         movq    %gs:CPU_THREAD, %rcx            /* rcx = thread ptr */
 456         movq    %rdi, %rsi                      /* rsi = lock ptr */
 457         movl    $LS_RW_ENTER_ACQUIRE, %edi
 458         movl    $RW_READER, %edx
 459         jmp     lockstat_wrapper_arg
 460 .rw_write_enter:
 461         orq     $RW_WRITE_LOCKED, %rdx          /* rdx = write-locked value */
 462         xorl    %eax, %eax                      /* rax = unheld value */
 463         lock
 464         cmpxchgq %rdx, (%rdi)                   /* try to grab write lock */
 465         jnz     rw_enter_sleep
 466 
 467 #if defined(OPTERON_WORKAROUND_6323525)
 468 .rw_write_enter_lockstat_patch_point:
 469 .rw_write_enter_6323525_patch_point:
 470         ret
 471         nop
 472         nop
 473 .rw_write_enter_lockstat_6323525_patch_point:
 474         nop
 475 #else   /* OPTERON_WORKAROUND_6323525 */
 476 .rw_write_enter_lockstat_patch_point:
 477         ret
 478 #endif  /* OPTERON_WORKAROUND_6323525 */
 479 
 480         movq    %gs:CPU_THREAD, %rcx            /* rcx = thread ptr */
 481         movq    %rdi, %rsi                      /* rsi = lock ptr */
 482         movl    $LS_RW_ENTER_ACQUIRE, %edi
 483         movl    $RW_WRITER, %edx
 484         jmp     lockstat_wrapper_arg
 485         SET_SIZE(rw_enter)
 486 
 487         ENTRY(rw_exit)
 488         movq    (%rdi), %rax                    /* rax = old rw_wwwh value */
 489         cmpl    $RW_READ_LOCK, %eax             /* single-reader, no waiters? */
 490         jne     .rw_not_single_reader
 491         xorl    %edx, %edx                      /* rdx = new value (unheld) */
 492 .rw_read_exit:
 493         lock
 494         cmpxchgq %rdx, (%rdi)                   /* try to drop read lock */
 495         jnz     rw_exit_wakeup
 496         movq    %gs:CPU_THREAD, %rcx            /* rcx = thread ptr */
 497         decl    T_KPRI_REQ(%rcx)                /* THREAD_KPRI_RELEASE() */
 498 .rw_read_exit_lockstat_patch_point:
 499         ret
 500         movq    %rdi, %rsi                      /* rsi = lock ptr */
 501         movl    $LS_RW_EXIT_RELEASE, %edi
 502         movl    $RW_READER, %edx
 503         jmp     lockstat_wrapper_arg
 504 .rw_not_single_reader:
 505         testl   $RW_WRITE_LOCKED, %eax  /* write-locked or write-wanted? */
 506         jnz     .rw_write_exit
 507         leaq    -RW_READ_LOCK(%rax), %rdx       /* rdx = new value */
 508         cmpl    $RW_READ_LOCK, %edx
 509         jge     .rw_read_exit           /* not last reader, safe to drop */
 510         jmp     rw_exit_wakeup                  /* last reader with waiters */
 511 .rw_write_exit:
 512         movq    %gs:CPU_THREAD, %rax            /* rax = thread ptr */
 513         xorl    %edx, %edx                      /* rdx = new value (unheld) */
 514         orq     $RW_WRITE_LOCKED, %rax          /* eax = write-locked value */
 515         lock
 516         cmpxchgq %rdx, (%rdi)                   /* try to drop read lock */
 517         jnz     rw_exit_wakeup
 518 .rw_write_exit_lockstat_patch_point:
 519         ret
 520         movq    %gs:CPU_THREAD, %rcx            /* rcx = thread ptr */
 521         movq    %rdi, %rsi                      /* rsi - lock ptr */
 522         movl    $LS_RW_EXIT_RELEASE, %edi
 523         movl    $RW_WRITER, %edx
 524         jmp     lockstat_wrapper_arg
 525         SET_SIZE(rw_exit)
 526 
 527 #if defined(OPTERON_WORKAROUND_6323525)
 528 
 529 /*
 530  * If it is necessary to patch the lock enter routines with the lfence
 531  * workaround, workaround_6323525_patched is set to a non-zero value so that
 532  * the lockstat_hat_patch routine can patch to the new location of the 'ret'
 533  * instruction.
 534  */
 535         DGDEF3(workaround_6323525_patched, 4, 4)
 536         .long   0
 537 
 538 #define HOT_MUTEX_PATCH(srcaddr, dstaddr, size) \
 539         movq    $size, %rbx;                    \
 540         movq    $dstaddr, %r13;                 \
 541         addq    %rbx, %r13;                     \
 542         movq    $srcaddr, %r12;                 \
 543         addq    %rbx, %r12;                     \
 544 0:                                              \
 545         decq    %r13;                           \
 546         decq    %r12;                           \
 547         movzbl  (%r12), %esi;                   \
 548         movq    $1, %rdx;                       \
 549         movq    %r13, %rdi;                     \
 550         call    hot_patch_kernel_text;          \
 551         decq    %rbx;                           \
 552         testq   %rbx, %rbx;                     \
 553         jg      0b;
 554 
 555 /*
 556  * patch_workaround_6323525: provide workaround for 6323525
 557  *
 558  * The workaround is to place a fencing instruction (lfence) between the
 559  * mutex operation and the subsequent read-modify-write instruction.
 560  *
 561  * This routine hot patches the lfence instruction on top of the space
 562  * reserved by nops in the lock enter routines.
 563  */
 564         ENTRY_NP(patch_workaround_6323525)
 565         pushq   %rbp
 566         movq    %rsp, %rbp
 567         pushq   %r12
 568         pushq   %r13
 569         pushq   %rbx
 570 
 571         /*
 572          * lockstat_hot_patch() to use the alternate lockstat workaround
 573          * 6323525 patch points (points past the lfence instruction to the
 574          * new ret) when workaround_6323525_patched is set.
 575          */
 576         movl    $1, workaround_6323525_patched
 577 
 578         /*
 579          * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter
 580          * routines. The 4 bytes are patched in reverse order so that the
 581          * the existing ret is overwritten last. This provides lock enter
 582          * sanity during the intermediate patching stages.
 583          */
 584         HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
 585         HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
 586         HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
 587         HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
 588 
 589         popq    %rbx
 590         popq    %r13
 591         popq    %r12
 592         movq    %rbp, %rsp
 593         popq    %rbp
 594         ret
 595 _lfence_insn:
 596         lfence
 597         ret
 598         SET_SIZE(patch_workaround_6323525)
 599 
 600 
 601 #endif  /* OPTERON_WORKAROUND_6323525 */
 602 
 603 
 604 #define HOT_PATCH(addr, event, active_instr, normal_instr, len) \
 605         movq    $normal_instr, %rsi;            \
 606         movq    $active_instr, %rdi;            \
 607         leaq    lockstat_probemap(%rip), %rax;  \
 608         movl    _MUL(event, DTRACE_IDSIZE)(%rax), %eax; \
 609         testl   %eax, %eax;                     \
 610         jz      9f;                             \
 611         movq    %rdi, %rsi;                     \
 612 9:                                              \
 613         movq    $len, %rdx;                     \
 614         movq    $addr, %rdi;                    \
 615         call    hot_patch_kernel_text
 616 
 617         ENTRY(lockstat_hot_patch)
 618         pushq   %rbp                    /* align stack properly */
 619         movq    %rsp, %rbp
 620 
 621 #if defined(OPTERON_WORKAROUND_6323525)
 622         cmpl    $0, workaround_6323525_patched
 623         je      1f
 624         HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point,
 625                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 626         HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point,
 627                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 628         HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point,
 629                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 630         jmp     2f
 631 1:
 632         HOT_PATCH(.mutex_enter_lockstat_patch_point,
 633                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 634         HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
 635                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 636         HOT_PATCH(.rw_write_enter_lockstat_patch_point,
 637                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 638 2:
 639 #else   /* OPTERON_WORKAROUND_6323525 */
 640         HOT_PATCH(.mutex_enter_lockstat_patch_point,
 641                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 642         HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
 643                 LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 644         HOT_PATCH(.rw_write_enter_lockstat_patch_point,
 645                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 646 #endif  /* !OPTERON_WORKAROUND_6323525 */
 647         HOT_PATCH(.mutex_exit_lockstat_patch_point,
 648                 LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
 649         HOT_PATCH(.rw_read_enter_lockstat_patch_point,
 650                 LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 651         HOT_PATCH(.rw_write_exit_lockstat_patch_point,
 652                 LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
 653         HOT_PATCH(.rw_read_exit_lockstat_patch_point,
 654                 LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
 655         HOT_PATCH(.lock_set_lockstat_patch_point,
 656                 LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 657         HOT_PATCH(.lock_try_lockstat_patch_point,
 658                 LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 659         HOT_PATCH(.lock_clear_lockstat_patch_point,
 660                 LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1)
 661         HOT_PATCH(.lock_set_spl_lockstat_patch_point,
 662                 LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
 663 
 664         HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT,
 665                 LS_LOCK_CLEAR_SPLX_RELEASE,
 666                 LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1);
 667         leave                   /* unwind stack */
 668         ret
 669         SET_SIZE(lockstat_hot_patch)
 670 
 671         ENTRY(membar_enter)
 672         ALTENTRY(membar_exit)
 673         ALTENTRY(membar_sync)
 674         mfence                  /* lighter weight than lock; xorq $0,(%rsp) */
 675         ret
 676         SET_SIZE(membar_sync)
 677         SET_SIZE(membar_exit)
 678         SET_SIZE(membar_enter)
 679 
 680         ENTRY(membar_producer)
 681         sfence
 682         ret
 683         SET_SIZE(membar_producer)
 684 
 685         ENTRY(membar_consumer)
 686         lfence
 687         ret
 688         SET_SIZE(membar_consumer)
 689 
 690 /*
 691  * thread_onproc()
 692  * Set thread in onproc state for the specified CPU.
 693  * Also set the thread lock pointer to the CPU's onproc lock.
 694  * Since the new lock isn't held, the store ordering is important.
 695  * If not done in assembler, the compiler could reorder the stores.
 696  */
 697 
 698         ENTRY(thread_onproc)
 699         addq    $CPU_THREAD_LOCK, %rsi  /* pointer to disp_lock while running */
 700         movl    $ONPROC_THREAD, T_STATE(%rdi)   /* set state to TS_ONPROC */
 701         movq    %rsi, T_LOCKP(%rdi)     /* store new lock pointer */
 702         ret
 703         SET_SIZE(thread_onproc)
 704 
 705 /*
 706  * mutex_delay_default(void)
 707  * Spins for approx a few hundred processor cycles and returns to caller.
 708  */
 709 
 710         ENTRY(mutex_delay_default)
 711         movq    $92,%r11
 712 0:      decq    %r11
 713         jg      0b
 714         ret
 715         SET_SIZE(mutex_delay_default)
 716