Print this page
de-linting of .s files
first

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/intel/ia32/ml/sseblk.s
          +++ new/usr/src/uts/intel/ia32/ml/sseblk.s
↓ open down ↓ 15 lines elided ↑ open up ↑
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26      -#pragma ident   "%Z%%M% %I%     %E% SMI"
       26 +/*
       27 + * Copyright 2019 Joyent, Inc.
       28 + */
  27   29  
  28   30  #include <sys/asm_linkage.h>
  29   31  #include <sys/regset.h>
  30   32  #include <sys/privregs.h>
  31   33  
  32      -#if defined(__lint)
  33      -#include <sys/types.h>
  34      -#include <sys/archsystm.h>
  35      -#else
  36   34  #include "assym.h"
  37      -#endif
  38   35  
  39   36  /*
  40   37   * Do block operations using Streaming SIMD extensions
  41   38   */
  42   39  
  43   40  #if defined(DEBUG)
  44      -#if defined(__amd64)
  45   41  #define ASSERT_KPREEMPT_DISABLED(t, r32, msg)   \
  46   42          movq    %gs:CPU_THREAD, t;              \
  47   43          movsbl  T_PREEMPT(t), r32;              \
  48   44          testl   r32, r32;                       \
  49   45          jne     5f;                             \
  50   46          pushq   %rbp;                           \
  51   47          movq    %rsp, %rbp;                     \
  52   48          leaq    msg(%rip), %rdi;                \
  53   49          xorl    %eax, %eax;                     \
  54   50          call    panic;                          \
  55   51  5:
  56      -#elif defined(__i386)
  57      -#define ASSERT_KPREEMPT_DISABLED(t, r32, msg)   \
  58      -        movl    %gs:CPU_THREAD, t;              \
  59      -        movsbl  T_PREEMPT(t), r32;              \
  60      -        testl   r32, r32;                       \
  61      -        jne     5f;                             \
  62      -        pushl   %ebp;                           \
  63      -        movl    %esp, %ebp;                     \
  64      -        pushl   $msg;                           \
  65      -        call    panic;                          \
  66      -5:
  67      -#endif  /* __i386 */
  68   52  #else   /* DEBUG */
  69   53  #define ASSERT_KPREEMPT_DISABLED(t, r32, msg)
  70   54  #endif  /* DEBUG */
  71   55  
  72   56  #define BLOCKSHIFT      6
  73   57  #define BLOCKSIZE       64      /* (1 << BLOCKSHIFT) */
  74   58  #define BLOCKMASK       63      /* (BLOCKSIZE - 1) */
  75   59  
  76   60  #if (1 << BLOCKSHIFT) != BLOCKSIZE || BLOCKMASK != (BLOCKSIZE - 1)
  77   61  #error  "mucked up constants"
  78   62  #endif
  79   63  
  80      -#if defined(__lint)
  81      -
  82      -/*ARGSUSED*/
  83      -void
  84      -hwblkclr(void *addr, size_t size)
  85      -{}
  86      -
  87      -#else   /* __lint */
  88      -
  89      -#if defined(__amd64)
  90      -#define ADD     addq
  91      -#define SUB     subq
  92      -#else
  93      -#define ADD     addl
  94      -#define SUB     subl
  95      -#endif
  96      -
  97   64  #define SAVE_XMM0(r)                            \
  98   65          SAVE_XMM_PROLOG(r, 1);                  \
  99   66          movdqa  %xmm0, (r)
 100   67  
 101   68  #define ZERO_LOOP_INIT_XMM(dst)                 \
 102   69          pxor    %xmm0, %xmm0
 103   70  
 104   71  #define ZERO_LOOP_BODY_XMM(dst, cnt)            \
 105   72          movntdq %xmm0, (dst);                   \
 106   73          movntdq %xmm0, 0x10(dst);               \
 107   74          movntdq %xmm0, 0x20(dst);               \
 108   75          movntdq %xmm0, 0x30(dst);               \
 109      -        ADD     $BLOCKSIZE, dst;                \
 110      -        SUB     $1, cnt
       76 +        addq    $BLOCKSIZE, dst;                \
       77 +        subq    $1, cnt
 111   78  
 112   79  #define ZERO_LOOP_FINI_XMM(dst)                 \
 113   80          mfence
 114   81  
 115   82  #define RSTOR_XMM0(r)                           \
 116   83          movdqa  0x0(r), %xmm0;                  \
 117   84          RSTOR_XMM_EPILOG(r, 1)
 118   85  
 119      -#if defined(__amd64)
 120      -
 121   86          /*
 122   87           * %rdi         dst
 123   88           * %rsi         size
 124   89           * %rax         saved %cr0 (#if DEBUG then %eax is t->t_preempt)
 125   90           * %r8          pointer to %xmm register save area
 126   91           */
 127   92          ENTRY(hwblkclr)
 128   93          pushq   %rbp
 129   94          movq    %rsp, %rbp
 130   95          testl   $BLOCKMASK, %edi        /* address must be BLOCKSIZE aligned */
↓ open down ↓ 20 lines elided ↑ open up ↑
 151  116          jnz     2f
 152  117          RSTOR_XMM0(%r8)
 153  118  2:      movq    %rax, %cr0
 154  119          leave
 155  120          ret
 156  121  .dobzero:
 157  122          leave
 158  123          jmp     bzero
 159  124          SET_SIZE(hwblkclr)
 160  125  
 161      -#elif defined(__i386)
 162  126  
 163      -        /*
 164      -         * %eax         dst
 165      -         * %ecx         size in bytes, loop count
 166      -         * %ebx         saved %cr0 (#if DEBUG then t->t_preempt)
 167      -         * %edi         pointer to %xmm register save area
 168      -         */
 169      -        ENTRY(hwblkclr)
 170      -        movl    4(%esp), %eax
 171      -        movl    8(%esp), %ecx
 172      -        testl   $BLOCKMASK, %eax        /* address must be BLOCKSIZE aligned */
 173      -        jne     .dobzero
 174      -        cmpl    $BLOCKSIZE, %ecx        /* size must be at least BLOCKSIZE */
 175      -        jl      .dobzero
 176      -        testl   $BLOCKMASK, %ecx        /* .. and be a multiple of BLOCKSIZE */
 177      -        jne     .dobzero
 178      -        shrl    $BLOCKSHIFT, %ecx
 179      -        movl    0xc(%esp), %edx
 180      -        pushl   %ebx
 181      -
 182      -        pushl   %esi
 183      -        ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled)
 184      -        popl    %esi
 185      -        movl    %cr0, %ebx
 186      -        clts
 187      -        testl   $CR0_TS, %ebx
 188      -        jnz     1f
 189      -
 190      -        pushl   %edi
 191      -        SAVE_XMM0(%edi)
 192      -1:      ZERO_LOOP_INIT_XMM(%eax)
 193      -9:      ZERO_LOOP_BODY_XMM(%eax, %ecx)
 194      -        jnz     9b
 195      -        ZERO_LOOP_FINI_XMM(%eax)
 196      -
 197      -        testl   $CR0_TS, %ebx
 198      -        jnz     2f
 199      -        RSTOR_XMM0(%edi)
 200      -        popl    %edi
 201      -2:      movl    %ebx, %cr0
 202      -        popl    %ebx
 203      -        ret
 204      -.dobzero:
 205      -        jmp     bzero
 206      -        SET_SIZE(hwblkclr)
 207      -
 208      -#endif  /* __i386 */
 209      -#endif  /* __lint */
 210      -
 211      -
 212      -#if defined(__lint)
 213      -
 214      -/*ARGSUSED*/
 215      -void
 216      -hwblkpagecopy(const void *src, void *dst)
 217      -{}
 218      -
 219      -#else   /* __lint */
 220      -
 221  127  #define PREFETCH_START(src)                     \
 222  128          prefetchnta     0x0(src);               \
 223  129          prefetchnta     0x40(src)
 224  130  
 225  131  #define SAVE_XMMS(r)                            \
 226  132          SAVE_XMM_PROLOG(r, 8);                  \
 227  133          movdqa  %xmm0, (r);                     \
 228  134          movdqa  %xmm1, 0x10(r);                 \
 229  135          movdqa  %xmm2, 0x20(r);                 \
 230  136          movdqa  %xmm3, 0x30(r);                 \
↓ open down ↓ 6 lines elided ↑ open up ↑
 237  143          prefetchnta     0x80(src);              \
 238  144          prefetchnta     0xc0(src);              \
 239  145          movdqa  0x0(src), %xmm0;                \
 240  146          movdqa  0x10(src), %xmm1;               \
 241  147          movdqa  0x20(src), %xmm2;               \
 242  148          movdqa  0x30(src), %xmm3;               \
 243  149          movdqa  0x40(src), %xmm4;               \
 244  150          movdqa  0x50(src), %xmm5;               \
 245  151          movdqa  0x60(src), %xmm6;               \
 246  152          movdqa  0x70(src), %xmm7;               \
 247      -        ADD     $0x80, src
      153 +        addq    $0x80, src
 248  154  
 249  155  #define COPY_LOOP_BODY_XMM(src, dst, cnt)       \
 250  156          prefetchnta     0x80(src);              \
 251  157          prefetchnta     0xc0(src);              \
 252  158          prefetchnta     0x100(src);             \
 253  159          prefetchnta     0x140(src);             \
 254  160          movntdq %xmm0, (dst);                   \
 255  161          movntdq %xmm1, 0x10(dst);               \
 256  162          movntdq %xmm2, 0x20(dst);               \
 257  163          movntdq %xmm3, 0x30(dst);               \
 258  164          movdqa  0x0(src), %xmm0;                \
 259  165          movdqa  0x10(src), %xmm1;               \
 260  166          movntdq %xmm4, 0x40(dst);               \
 261  167          movntdq %xmm5, 0x50(dst);               \
 262  168          movdqa  0x20(src), %xmm2;               \
 263  169          movdqa  0x30(src), %xmm3;               \
 264  170          movntdq %xmm6, 0x60(dst);               \
 265  171          movntdq %xmm7, 0x70(dst);               \
 266  172          movdqa  0x40(src), %xmm4;               \
 267  173          movdqa  0x50(src), %xmm5;               \
 268      -        ADD     $0x80, dst;                     \
      174 +        addq    $0x80, dst;                     \
 269  175          movdqa  0x60(src), %xmm6;               \
 270  176          movdqa  0x70(src), %xmm7;               \
 271      -        ADD     $0x80, src;                     \
      177 +        addq    $0x80, src;                     \
 272  178          subl    $1, cnt
 273  179  
 274  180  #define COPY_LOOP_FINI_XMM(dst)                 \
 275  181          movntdq %xmm0, 0x0(dst);                \
 276  182          movntdq %xmm1, 0x10(dst);               \
 277  183          movntdq %xmm2, 0x20(dst);               \
 278  184          movntdq %xmm3, 0x30(dst);               \
 279  185          movntdq %xmm4, 0x40(dst);               \
 280  186          movntdq %xmm5, 0x50(dst);               \
 281  187          movntdq %xmm6, 0x60(dst);               \
↓ open down ↓ 3 lines elided ↑ open up ↑
 285  191          movdqa  0x0(r), %xmm0;                  \
 286  192          movdqa  0x10(r), %xmm1;                 \
 287  193          movdqa  0x20(r), %xmm2;                 \
 288  194          movdqa  0x30(r), %xmm3;                 \
 289  195          movdqa  0x40(r), %xmm4;                 \
 290  196          movdqa  0x50(r), %xmm5;                 \
 291  197          movdqa  0x60(r), %xmm6;                 \
 292  198          movdqa  0x70(r), %xmm7;                 \
 293  199          RSTOR_XMM_EPILOG(r, 8)
 294  200  
 295      -#if defined(__amd64)
 296      -
 297  201          /*
 298  202           * %rdi         src
 299  203           * %rsi         dst
 300  204           * %rdx         #if DEBUG then curthread
 301  205           * %ecx         loop count
 302  206           * %rax         saved %cr0 (#if DEBUG then %eax is t->t_prempt)
 303  207           * %r8          pointer to %xmm register save area
 304  208           */
 305  209          ENTRY(hwblkpagecopy)
 306  210          pushq   %rbp
↓ open down ↓ 16 lines elided ↑ open up ↑
 323  227          COPY_LOOP_FINI_XMM(%rsi)
 324  228          testl   $CR0_TS, %eax
 325  229          jnz     5f
 326  230          RSTOR_XMMS(%r8)
 327  231  5:      movq    %rax, %cr0
 328  232          mfence
 329  233          leave
 330  234          ret
 331  235          SET_SIZE(hwblkpagecopy)
 332  236  
 333      -#elif defined(__i386)
 334      -
 335      -        /*
 336      -         * %eax         src
 337      -         * %edx         dst
 338      -         * %ecx         loop count
 339      -         * %ebx         saved %cr0 (#if DEBUG then t->t_prempt)
 340      -         * %edi         pointer to %xmm register save area
 341      -         * %esi         #if DEBUG temporary thread pointer
 342      -         */
 343      -        ENTRY(hwblkpagecopy)
 344      -        movl    4(%esp), %eax
 345      -        movl    8(%esp), %edx
 346      -        PREFETCH_START(%eax)
 347      -        pushl   %ebx
 348      -        /*
 349      -         * PAGESIZE is 4096, each loop moves 128 bytes, but the initial
 350      -         * load and final store save us one loop count
 351      -         */
 352      -        movl    $_CONST(32 - 1), %ecx
 353      -        pushl   %esi
 354      -        ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled)
 355      -        popl    %esi
 356      -        movl    %cr0, %ebx
 357      -        clts
 358      -        testl   $CR0_TS, %ebx
 359      -        jnz     3f
 360      -        pushl   %edi
 361      -        SAVE_XMMS(%edi)
 362      -3:      COPY_LOOP_INIT_XMM(%eax)
 363      -4:      COPY_LOOP_BODY_XMM(%eax, %edx, %ecx)
 364      -        jnz     4b
 365      -        COPY_LOOP_FINI_XMM(%edx)
 366      -        testl   $CR0_TS, %ebx
 367      -        jnz     5f
 368      -        RSTOR_XMMS(%edi)
 369      -        popl    %edi
 370      -5:      movl    %ebx, %cr0
 371      -        popl    %ebx
 372      -        mfence
 373      -        ret
 374      -        SET_SIZE(hwblkpagecopy)
 375      -
 376      -#endif  /* __i386 */
 377      -#endif  /* __lint */
 378      -
 379      -#if defined(__lint)
 380      -
 381      -/*
 382      - * Version of hwblkclr which doesn't use XMM registers.
 383      - * Note that it requires aligned dst and len.
 384      - *
 385      - * XXPV This needs to be performance tuned at some point.
 386      - *      Is 4 the best number of iterations to unroll?
 387      - */
 388      -/*ARGSUSED*/
 389      -void
 390      -block_zero_no_xmm(void *dst, int len)
 391      -{}
 392      -
 393      -#else   /* __lint */
 394      -
 395      -#if defined(__amd64)
 396      -
 397  237          ENTRY(block_zero_no_xmm)
 398  238          pushq   %rbp
 399  239          movq    %rsp, %rbp
 400  240          xorl    %eax, %eax
 401  241          addq    %rsi, %rdi
 402  242          negq    %rsi
 403  243  1:
 404  244          movnti  %rax, (%rdi, %rsi)
 405  245          movnti  %rax, 8(%rdi, %rsi)
 406  246          movnti  %rax, 16(%rdi, %rsi)
 407  247          movnti  %rax, 24(%rdi, %rsi)
 408  248          addq    $32, %rsi
 409  249          jnz     1b
 410  250          mfence
 411  251          leave
 412  252          ret
 413  253          SET_SIZE(block_zero_no_xmm)
 414  254  
 415      -#elif defined(__i386)
 416  255  
 417      -        ENTRY(block_zero_no_xmm)
 418      -        pushl   %ebp
 419      -        movl    %esp, %ebp
 420      -        xorl    %eax, %eax
 421      -        movl    8(%ebp), %edx
 422      -        movl    12(%ebp), %ecx
 423      -        addl    %ecx, %edx
 424      -        negl    %ecx
 425      -1:
 426      -        movnti  %eax, (%edx, %ecx)
 427      -        movnti  %eax, 4(%edx, %ecx)
 428      -        movnti  %eax, 8(%edx, %ecx)
 429      -        movnti  %eax, 12(%edx, %ecx)
 430      -        addl    $16, %ecx
 431      -        jnz     1b
 432      -        mfence
 433      -        leave
 434      -        ret
 435      -        SET_SIZE(block_zero_no_xmm)
 436      -
 437      -#endif  /* __i386 */
 438      -#endif  /* __lint */
 439      -
 440      -
 441      -#if defined(__lint)
 442      -
 443      -/*
 444      - * Version of page copy which doesn't use XMM registers.
 445      - *
 446      - * XXPV This needs to be performance tuned at some point.
 447      - *      Is 4 the right number of iterations to unroll?
 448      - *      Is the load/store order optimal? Should it use prefetch?
 449      - */
 450      -/*ARGSUSED*/
 451      -void
 452      -page_copy_no_xmm(void *dst, void *src)
 453      -{}
 454      -
 455      -#else   /* __lint */
 456      -
 457      -#if defined(__amd64)
 458      -
 459  256          ENTRY(page_copy_no_xmm)
 460  257          movq    $MMU_STD_PAGESIZE, %rcx
 461  258          addq    %rcx, %rdi
 462  259          addq    %rcx, %rsi
 463  260          negq    %rcx
 464  261  1:
 465  262          movq    (%rsi, %rcx), %rax
 466  263          movnti  %rax, (%rdi, %rcx)
 467  264          movq    8(%rsi, %rcx), %rax
 468  265          movnti  %rax, 8(%rdi, %rcx)
 469  266          movq    16(%rsi, %rcx), %rax
 470  267          movnti  %rax, 16(%rdi, %rcx)
 471  268          movq    24(%rsi, %rcx), %rax
 472  269          movnti  %rax, 24(%rdi, %rcx)
 473  270          addq    $32, %rcx
 474  271          jnz     1b
 475  272          mfence
 476  273          ret
 477  274          SET_SIZE(page_copy_no_xmm)
 478  275  
 479      -#elif defined(__i386)
 480      -
 481      -        ENTRY(page_copy_no_xmm)
 482      -        pushl   %esi
 483      -        movl    $MMU_STD_PAGESIZE, %ecx
 484      -        movl    8(%esp), %edx
 485      -        movl    12(%esp), %esi
 486      -        addl    %ecx, %edx
 487      -        addl    %ecx, %esi
 488      -        negl    %ecx
 489      -1:
 490      -        movl    (%esi, %ecx), %eax
 491      -        movnti  %eax, (%edx, %ecx)
 492      -        movl    4(%esi, %ecx), %eax
 493      -        movnti  %eax, 4(%edx, %ecx)
 494      -        movl    8(%esi, %ecx), %eax
 495      -        movnti  %eax, 8(%edx, %ecx)
 496      -        movl    12(%esi, %ecx), %eax
 497      -        movnti  %eax, 12(%edx, %ecx)
 498      -        addl    $16, %ecx
 499      -        jnz     1b
 500      -        mfence
 501      -        popl    %esi
 502      -        ret
 503      -        SET_SIZE(page_copy_no_xmm)
 504      -
 505      -#endif  /* __i386 */
 506      -#endif  /* __lint */
 507      -
 508      -#if defined(DEBUG) && !defined(__lint)
      276 +#if defined(DEBUG)
 509  277          .text
 510  278  .not_disabled:
 511  279          .string "sseblk: preemption not disabled!"
 512  280  #endif
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX