1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Joyent, Inc.
  28  */
  29 
  30 /
  31 / In-line functions for amd64 kernels.
  32 /
  33 
  34 /
  35 / return current thread pointer
  36 /
  37 / NOTE: the "0x18" should be replaced by the computed value of the
  38 /       offset of "cpu_thread" from the beginning of the struct cpu.
  39 /       Including "assym.h" does not work, however, since that stuff
  40 /       is PSM-specific and is only visible to the 'unix' build anyway.
  41 /       Same with current cpu pointer, where "0xc" should be replaced
  42 /       by the computed value of the offset of "cpu_self".
  43 /       Ugh -- what a disaster.
  44 /
  45         .inline threadp,0
  46         movq    %gs:0x18, %rax
  47         .end
  48 
  49 /
  50 / return current cpu pointer
  51 /
  52         .inline curcpup,0
  53         movq    %gs:0x10, %rax
  54         .end
  55 
  56 /
  57 / return caller
  58 /
  59         .inline caller,0
  60         movq    8(%rbp), %rax
  61         .end
  62 
  63 /
  64 / convert ipl to spl.  This is the identity function for i86
  65 /
  66         .inline ipltospl,0
  67         movq    %rdi, %rax
  68         .end
  69 
  70 /
  71 / Networking byte order functions (too bad, Intel has the wrong byte order)
  72 /
  73 
  74         .inline htonll,4
  75         movq    %rdi, %rax
  76         bswapq  %rax
  77         .end
  78 
  79         .inline ntohll,4
  80         movq    %rdi, %rax
  81         bswapq  %rax
  82         .end
  83 
  84         .inline htonl,4
  85         movl    %edi, %eax
  86         bswap   %eax
  87         .end
  88 
  89         .inline ntohl,4
  90         movl    %edi, %eax
  91         bswap   %eax
  92         .end
  93 
  94         .inline htons,4
  95         movl    %edi, %eax
  96         bswap   %eax
  97         shrl    $16, %eax
  98         .end
  99 
 100         .inline ntohs,4
 101         movl    %edi, %eax
 102         bswap   %eax
 103         shrl    $16, %eax
 104         .end
 105 
 106 /*
 107  * multiply two long numbers and yield a u_lonlong_t result
 108  * Provided to manipulate hrtime_t values.
 109  */
 110         /* XX64 These don't work correctly with SOS9 build 13.0 yet
 111         .inline mul32, 8
 112         xorl    %edx, %edx
 113         movl    %edi, %eax
 114         mull    %esi
 115         shlq    $32, %rdx
 116         orq     %rdx, %rax
 117         ret
 118         .end
 119         */
 120 /*
 121  * Unlock hres_lock and increment the count value. (See clock.h)
 122  */
 123         .inline unlock_hres_lock, 0
 124         lock
 125         incl    hres_lock
 126         .end
 127 
 128         .inline atomic_orb,8
 129         movl    %esi, %eax
 130         lock
 131         orb     %al,(%rdi)
 132         .end
 133 
 134         .inline atomic_andb,8
 135         movl    %esi, %eax
 136         lock
 137         andb    %al,(%rdi)
 138         .end
 139 
 140 /*
 141  * atomic inc/dec operations.
 142  *      void atomic_inc16(uint16_t *addr) { ++*addr; }
 143  *      void atomic_dec16(uint16_t *addr) { --*addr; }
 144  */
 145         .inline atomic_inc16,4
 146         lock
 147         incw    (%rdi)
 148         .end
 149 
 150         .inline atomic_dec16,4
 151         lock
 152         decw    (%rdi)
 153         .end
 154 
 155 /*
 156  * atomic bit clear
 157  */
 158         .inline atomic_btr32,8
 159         lock
 160         btrl %esi, (%rdi)
 161         setc %al
 162         .end
 163 
 164 /*
 165  * Call the pause instruction.  To the Pentium 4 Xeon processor, it acts as
 166  * a hint that the code sequence is a busy spin-wait loop.  Without a pause
 167  * instruction in these loops, the P4 Xeon processor may suffer a severe
 168  * penalty when exiting the loop because the processor detects a possible
 169  * memory violation.  Inserting the pause instruction significantly reduces
 170  * the likelihood of a memory order violation, improving performance.
 171  * The pause instruction is a NOP on all other IA-32 processors.
 172  */
 173         .inline ht_pause, 0
 174         pause
 175         .end
 176 
 177 /*
 178  * inlines for update_sregs().
 179  */
 180         .inline __set_ds, 0
 181         movw    %di, %ds
 182         .end
 183 
 184         .inline __set_es, 0
 185         movw    %di, %es
 186         .end
 187 
 188         .inline __set_fs, 0
 189         movw    %di, %fs
 190         .end
 191 
 192         .inline __set_gs, 0
 193         movw    %di, %gs
 194         .end
 195 
 196 /*
 197  * prefetch 64 bytes
 198  */
 199 
 200         .inline prefetch_read_many,8
 201         prefetcht0      (%rdi)
 202         prefetcht0      32(%rdi)
 203         .end
 204 
 205         .inline prefetch_read_once,8
 206         prefetchnta     (%rdi)
 207         prefetchnta     32(%rdi)
 208         .end
 209 
 210         .inline prefetch_write_many,8
 211         prefetcht0      (%rdi)
 212         prefetcht0      32(%rdi)
 213         .end
 214 
 215         .inline prefetch_write_once,8
 216         prefetcht0      (%rdi)
 217         prefetcht0      32(%rdi)
 218         .end