Print this page
11787 Kernel needs to be built with retpolines
11788 Kernel needs to generally use RSB stuffing
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: John Levon <john.levon@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/intel/ia32/ml/copy.s
          +++ new/usr/src/uts/intel/ia32/ml/copy.s
↓ open down ↓ 28 lines elided ↑ open up ↑
  29   29   */
  30   30  
  31   31  /*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.        */
  32   32  /*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T          */
  33   33  /*         All Rights Reserved                                          */
  34   34  
  35   35  /*       Copyright (c) 1987, 1988 Microsoft Corporation                 */
  36   36  /*         All Rights Reserved                                          */
  37   37  
  38   38  /*
  39      - * Copyright (c) 2018 Joyent, Inc.
       39 + * Copyright 2019 Joyent, Inc.
  40   40   */
  41   41  
  42   42  #include <sys/errno.h>
  43   43  #include <sys/asm_linkage.h>
  44   44  
  45   45  #if defined(__lint)
  46   46  #include <sys/types.h>
  47   47  #include <sys/systm.h>
  48   48  #else   /* __lint */
  49   49  #include "assym.h"
↓ open down ↓ 71 lines elided ↑ open up ↑
 121  121   * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
 122  122   * where that gets masked off.
 123  123   */
 124  124  
 125  125  /*
 126  126   * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
 127  127   * "rep smovq" for large sizes. Performance data shows that many calls to
 128  128   * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
 129  129   * these small sizes unrolled code is used. For medium sizes loops writing
 130  130   * 64-bytes per loop are used. Transition points were determined experimentally.
 131      - */ 
      131 + */
 132  132  #define BZERO_USE_REP   (1024)
 133  133  #define BCOPY_DFLT_REP  (128)
 134  134  #define BCOPY_NHM_REP   (768)
 135  135  
 136  136  /*
 137  137   * Copy a block of storage, returning an error code if `from' or
 138  138   * `to' takes a kernel pagefault which cannot be resolved.
 139  139   * Returns errno value on pagefault error, 0 if all ok
 140  140   */
 141  141  
↓ open down ↓ 30 lines elided ↑ open up ↑
 172  172  
 173  173          .globl  kernelbase
 174  174          .globl  postbootkernelbase
 175  175  
 176  176  #if defined(__amd64)
 177  177  
 178  178          ENTRY(kcopy)
 179  179          pushq   %rbp
 180  180          movq    %rsp, %rbp
 181  181  #ifdef DEBUG
 182      -        cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
      182 +        cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 183  183          jb      0f
 184  184          cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 185  185          jnb     1f
 186  186  0:      leaq    .kcopy_panic_msg(%rip), %rdi
 187  187          xorl    %eax, %eax
 188  188          call    panic
 189  189  1:
 190  190  #endif
 191  191          /*
 192  192           * pass lofault value as 4th argument to do_copy_fault
↓ open down ↓ 31 lines elided ↑ open up ↑
 224  224          movl    postbootkernelbase, %eax
 225  225          cmpl    %eax, ARG_FROM(%ebp)
 226  226          jb      0f
 227  227          cmpl    %eax, ARG_TO(%ebp)
 228  228          jnb     1f
 229  229  0:      pushl   $.kcopy_panic_msg
 230  230          call    panic
 231  231  1:      popl    %ebp
 232  232  #endif
 233  233          lea     _kcopy_copyerr, %eax    /* lofault value */
 234      -        movl    %gs:CPU_THREAD, %edx    
      234 +        movl    %gs:CPU_THREAD, %edx
 235  235  
 236  236  do_copy_fault:
 237  237          pushl   %ebp
 238  238          movl    %esp, %ebp              /* setup stack frame */
 239  239          pushl   %esi
 240  240          pushl   %edi                    /* save registers */
 241  241  
 242  242          movl    T_LOFAULT(%edx), %edi
 243  243          pushl   %edi                    /* save the current lofault */
 244  244          movl    %eax, T_LOFAULT(%edx)   /* new lofault */
↓ open down ↓ 58 lines elided ↑ open up ↑
 303  303          movq    (src, cnt, 8), %rax;            \
 304  304          movq    0x8(src, cnt, 8), %r8;          \
 305  305          movnti  %rax, (dst, cnt, 8);            \
 306  306          movnti  %r8, 0x8(dst, cnt, 8);          \
 307  307          addq    $2, cnt
 308  308  
 309  309          ENTRY(kcopy_nta)
 310  310          pushq   %rbp
 311  311          movq    %rsp, %rbp
 312  312  #ifdef DEBUG
 313      -        cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
      313 +        cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 314  314          jb      0f
 315  315          cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 316  316          jnb     1f
 317  317  0:      leaq    .kcopy_panic_msg(%rip), %rdi
 318  318          xorl    %eax, %eax
 319  319          call    panic
 320  320  1:
 321  321  #endif
 322  322  
 323  323          movq    %gs:CPU_THREAD, %r9
↓ open down ↓ 75 lines elided ↑ open up ↑
 399  399          ENTRY(kcopy_nta)
 400  400          jmp     kcopy
 401  401  
 402  402          lea     _kcopy_nta_copyerr, %eax        /* lofault value */
 403  403          ALTENTRY(do_copy_fault_nta)
 404  404          pushl   %ebp
 405  405          movl    %esp, %ebp              /* setup stack frame */
 406  406          pushl   %esi
 407  407          pushl   %edi
 408  408  
 409      -        movl    %gs:CPU_THREAD, %edx    
      409 +        movl    %gs:CPU_THREAD, %edx
 410  410          movl    T_LOFAULT(%edx), %edi
 411  411          pushl   %edi                    /* save the current lofault */
 412  412          movl    %eax, T_LOFAULT(%edx)   /* new lofault */
 413  413  
 414  414          /* COPY_LOOP_BODY needs to use %esi */
 415  415          movl    ARG_COUNT(%ebp), %ecx
 416  416          movl    ARG_FROM(%ebp), %edi
 417  417          movl    ARG_TO(%ebp), %eax
 418  418          COPY_LOOP_INIT(%edi, %eax, %ecx)
 419  419  1:      COPY_LOOP_BODY(%edi, %eax, %ecx)
↓ open down ↓ 28 lines elided ↑ open up ↑
 448  448  #else   /* __lint */
 449  449  
 450  450  #if defined(__amd64)
 451  451  
 452  452          ENTRY(bcopy)
 453  453  #ifdef DEBUG
 454  454          orq     %rdx, %rdx              /* %rdx = count */
 455  455          jz      1f
 456  456          cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 457  457          jb      0f
 458      -        cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */         
      458 +        cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 459  459          jnb     1f
 460  460  0:      leaq    .bcopy_panic_msg(%rip), %rdi
 461  461          jmp     call_panic              /* setup stack and call panic */
 462  462  1:
 463  463  #endif
 464  464          /*
 465  465           * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
 466  466           * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
 467  467           * uses these registers in future they must be saved and restored.
 468  468           */
↓ open down ↓ 6 lines elided ↑ open up ↑
 475  475          /*
 476  476           * Performance data shows many caller's copy small buffers. So for
 477  477           * best perf for these sizes unrolled code is used. Store data without
 478  478           * worrying about alignment.
 479  479           */
 480  480          leaq    L(fwdPxQx)(%rip), %r10
 481  481          addq    %rdx, %rdi
 482  482          addq    %rdx, %rsi
 483  483          movslq  (%r10,%rdx,4), %rcx
 484  484          leaq    (%rcx,%r10,1), %r10
 485      -        jmpq    *%r10
      485 +        INDIRECT_JMP_REG(r10)
 486  486  
 487  487          .p2align 4
 488  488  L(fwdPxQx):
 489  489          .int       L(P0Q0)-L(fwdPxQx)   /* 0 */
 490  490          .int       L(P1Q0)-L(fwdPxQx)
 491  491          .int       L(P2Q0)-L(fwdPxQx)
 492  492          .int       L(P3Q0)-L(fwdPxQx)
 493  493          .int       L(P4Q0)-L(fwdPxQx)
 494  494          .int       L(P5Q0)-L(fwdPxQx)
 495  495          .int       L(P6Q0)-L(fwdPxQx)
 496      -        .int       L(P7Q0)-L(fwdPxQx) 
      496 +        .int       L(P7Q0)-L(fwdPxQx)
 497  497  
 498  498          .int       L(P0Q1)-L(fwdPxQx)   /* 8 */
 499  499          .int       L(P1Q1)-L(fwdPxQx)
 500  500          .int       L(P2Q1)-L(fwdPxQx)
 501  501          .int       L(P3Q1)-L(fwdPxQx)
 502  502          .int       L(P4Q1)-L(fwdPxQx)
 503  503          .int       L(P5Q1)-L(fwdPxQx)
 504  504          .int       L(P6Q1)-L(fwdPxQx)
 505      -        .int       L(P7Q1)-L(fwdPxQx) 
      505 +        .int       L(P7Q1)-L(fwdPxQx)
 506  506  
 507  507          .int       L(P0Q2)-L(fwdPxQx)   /* 16 */
 508  508          .int       L(P1Q2)-L(fwdPxQx)
 509  509          .int       L(P2Q2)-L(fwdPxQx)
 510  510          .int       L(P3Q2)-L(fwdPxQx)
 511  511          .int       L(P4Q2)-L(fwdPxQx)
 512  512          .int       L(P5Q2)-L(fwdPxQx)
 513  513          .int       L(P6Q2)-L(fwdPxQx)
 514      -        .int       L(P7Q2)-L(fwdPxQx) 
      514 +        .int       L(P7Q2)-L(fwdPxQx)
 515  515  
 516  516          .int       L(P0Q3)-L(fwdPxQx)   /* 24 */
 517  517          .int       L(P1Q3)-L(fwdPxQx)
 518  518          .int       L(P2Q3)-L(fwdPxQx)
 519  519          .int       L(P3Q3)-L(fwdPxQx)
 520  520          .int       L(P4Q3)-L(fwdPxQx)
 521  521          .int       L(P5Q3)-L(fwdPxQx)
 522  522          .int       L(P6Q3)-L(fwdPxQx)
 523      -        .int       L(P7Q3)-L(fwdPxQx) 
      523 +        .int       L(P7Q3)-L(fwdPxQx)
 524  524  
 525  525          .int       L(P0Q4)-L(fwdPxQx)   /* 32 */
 526  526          .int       L(P1Q4)-L(fwdPxQx)
 527  527          .int       L(P2Q4)-L(fwdPxQx)
 528  528          .int       L(P3Q4)-L(fwdPxQx)
 529  529          .int       L(P4Q4)-L(fwdPxQx)
 530  530          .int       L(P5Q4)-L(fwdPxQx)
 531  531          .int       L(P6Q4)-L(fwdPxQx)
 532      -        .int       L(P7Q4)-L(fwdPxQx) 
      532 +        .int       L(P7Q4)-L(fwdPxQx)
 533  533  
 534  534          .int       L(P0Q5)-L(fwdPxQx)   /* 40 */
 535  535          .int       L(P1Q5)-L(fwdPxQx)
 536  536          .int       L(P2Q5)-L(fwdPxQx)
 537  537          .int       L(P3Q5)-L(fwdPxQx)
 538  538          .int       L(P4Q5)-L(fwdPxQx)
 539  539          .int       L(P5Q5)-L(fwdPxQx)
 540  540          .int       L(P6Q5)-L(fwdPxQx)
 541      -        .int       L(P7Q5)-L(fwdPxQx) 
      541 +        .int       L(P7Q5)-L(fwdPxQx)
 542  542  
 543  543          .int       L(P0Q6)-L(fwdPxQx)   /* 48 */
 544  544          .int       L(P1Q6)-L(fwdPxQx)
 545  545          .int       L(P2Q6)-L(fwdPxQx)
 546  546          .int       L(P3Q6)-L(fwdPxQx)
 547  547          .int       L(P4Q6)-L(fwdPxQx)
 548  548          .int       L(P5Q6)-L(fwdPxQx)
 549  549          .int       L(P6Q6)-L(fwdPxQx)
 550      -        .int       L(P7Q6)-L(fwdPxQx) 
      550 +        .int       L(P7Q6)-L(fwdPxQx)
 551  551  
 552  552          .int       L(P0Q7)-L(fwdPxQx)   /* 56 */
 553  553          .int       L(P1Q7)-L(fwdPxQx)
 554  554          .int       L(P2Q7)-L(fwdPxQx)
 555  555          .int       L(P3Q7)-L(fwdPxQx)
 556  556          .int       L(P4Q7)-L(fwdPxQx)
 557  557          .int       L(P5Q7)-L(fwdPxQx)
 558  558          .int       L(P6Q7)-L(fwdPxQx)
 559      -        .int       L(P7Q7)-L(fwdPxQx) 
      559 +        .int       L(P7Q7)-L(fwdPxQx)
 560  560  
 561  561          .int       L(P0Q8)-L(fwdPxQx)   /* 64 */
 562  562          .int       L(P1Q8)-L(fwdPxQx)
 563  563          .int       L(P2Q8)-L(fwdPxQx)
 564  564          .int       L(P3Q8)-L(fwdPxQx)
 565  565          .int       L(P4Q8)-L(fwdPxQx)
 566  566          .int       L(P5Q8)-L(fwdPxQx)
 567  567          .int       L(P6Q8)-L(fwdPxQx)
 568  568          .int       L(P7Q8)-L(fwdPxQx)
 569  569  
↓ open down ↓ 27 lines elided ↑ open up ↑
 597  597          mov    %r8, -0x20(%rsi)
 598  598  L(P0Q3):
 599  599          mov    -0x18(%rdi), %rcx
 600  600          mov    %rcx, -0x18(%rsi)
 601  601  L(P0Q2):
 602  602          mov    -0x10(%rdi), %r10
 603  603          mov    %r10, -0x10(%rsi)
 604  604  L(P0Q1):
 605  605          mov    -0x8(%rdi), %r8
 606  606          mov    %r8, -0x8(%rsi)
 607      -L(P0Q0):                                   
 608      -        ret   
      607 +L(P0Q0):
      608 +        ret
 609  609  
 610  610          .p2align 4
 611  611  L(P1Q9):
 612  612          mov    -0x49(%rdi), %r8
 613  613          mov    %r8, -0x49(%rsi)
 614  614  L(P1Q8):
 615  615          mov    -0x41(%rdi), %rcx
 616  616          mov    %rcx, -0x41(%rsi)
 617  617  L(P1Q7):
 618  618          mov    -0x39(%rdi), %r10
↓ open down ↓ 12 lines elided ↑ open up ↑
 631  631          mov    %r8, -0x19(%rsi)
 632  632  L(P1Q2):
 633  633          mov    -0x11(%rdi), %rcx
 634  634          mov    %rcx, -0x11(%rsi)
 635  635  L(P1Q1):
 636  636          mov    -0x9(%rdi), %r10
 637  637          mov    %r10, -0x9(%rsi)
 638  638  L(P1Q0):
 639  639          movzbq -0x1(%rdi), %r8
 640  640          mov    %r8b, -0x1(%rsi)
 641      -        ret   
      641 +        ret
 642  642  
 643  643          .p2align 4
 644  644  L(P2Q9):
 645  645          mov    -0x4a(%rdi), %r8
 646  646          mov    %r8, -0x4a(%rsi)
 647  647  L(P2Q8):
 648  648          mov    -0x42(%rdi), %rcx
 649  649          mov    %rcx, -0x42(%rsi)
 650  650  L(P2Q7):
 651  651          mov    -0x3a(%rdi), %r10
↓ open down ↓ 12 lines elided ↑ open up ↑
 664  664          mov    %r8, -0x1a(%rsi)
 665  665  L(P2Q2):
 666  666          mov    -0x12(%rdi), %rcx
 667  667          mov    %rcx, -0x12(%rsi)
 668  668  L(P2Q1):
 669  669          mov    -0xa(%rdi), %r10
 670  670          mov    %r10, -0xa(%rsi)
 671  671  L(P2Q0):
 672  672          movzwq -0x2(%rdi), %r8
 673  673          mov    %r8w, -0x2(%rsi)
 674      -        ret   
      674 +        ret
 675  675  
 676  676          .p2align 4
 677  677  L(P3Q9):
 678  678          mov    -0x4b(%rdi), %r8
 679  679          mov    %r8, -0x4b(%rsi)
 680  680  L(P3Q8):
 681  681          mov    -0x43(%rdi), %rcx
 682  682          mov    %rcx, -0x43(%rsi)
 683  683  L(P3Q7):
 684  684          mov    -0x3b(%rdi), %r10
↓ open down ↓ 10 lines elided ↑ open up ↑
 695  695  L(P3Q3):
 696  696          mov    -0x1b(%rdi), %r8
 697  697          mov    %r8, -0x1b(%rsi)
 698  698  L(P3Q2):
 699  699          mov    -0x13(%rdi), %rcx
 700  700          mov    %rcx, -0x13(%rsi)
 701  701  L(P3Q1):
 702  702          mov    -0xb(%rdi), %r10
 703  703          mov    %r10, -0xb(%rsi)
 704  704          /*
 705      -         * These trailing loads/stores have to do all their loads 1st, 
      705 +         * These trailing loads/stores have to do all their loads 1st,
 706  706           * then do the stores.
 707  707           */
 708  708  L(P3Q0):
 709  709          movzwq -0x3(%rdi), %r8
 710  710          movzbq -0x1(%rdi), %r10
 711  711          mov    %r8w, -0x3(%rsi)
 712  712          mov    %r10b, -0x1(%rsi)
 713      -        ret   
      713 +        ret
 714  714  
 715  715          .p2align 4
 716  716  L(P4Q9):
 717  717          mov    -0x4c(%rdi), %r8
 718  718          mov    %r8, -0x4c(%rsi)
 719  719  L(P4Q8):
 720  720          mov    -0x44(%rdi), %rcx
 721  721          mov    %rcx, -0x44(%rsi)
 722  722  L(P4Q7):
 723  723          mov    -0x3c(%rdi), %r10
↓ open down ↓ 12 lines elided ↑ open up ↑
 736  736          mov    %r8, -0x1c(%rsi)
 737  737  L(P4Q2):
 738  738          mov    -0x14(%rdi), %rcx
 739  739          mov    %rcx, -0x14(%rsi)
 740  740  L(P4Q1):
 741  741          mov    -0xc(%rdi), %r10
 742  742          mov    %r10, -0xc(%rsi)
 743  743  L(P4Q0):
 744  744          mov    -0x4(%rdi), %r8d
 745  745          mov    %r8d, -0x4(%rsi)
 746      -        ret   
      746 +        ret
 747  747  
 748  748          .p2align 4
 749  749  L(P5Q9):
 750  750          mov    -0x4d(%rdi), %r8
 751  751          mov    %r8, -0x4d(%rsi)
 752  752  L(P5Q8):
 753  753          mov    -0x45(%rdi), %rcx
 754  754          mov    %rcx, -0x45(%rsi)
 755  755  L(P5Q7):
 756  756          mov    -0x3d(%rdi), %r10
↓ open down ↓ 14 lines elided ↑ open up ↑
 771  771          mov    -0x15(%rdi), %rcx
 772  772          mov    %rcx, -0x15(%rsi)
 773  773  L(P5Q1):
 774  774          mov    -0xd(%rdi), %r10
 775  775          mov    %r10, -0xd(%rsi)
 776  776  L(P5Q0):
 777  777          mov    -0x5(%rdi), %r8d
 778  778          movzbq -0x1(%rdi), %r10
 779  779          mov    %r8d, -0x5(%rsi)
 780  780          mov    %r10b, -0x1(%rsi)
 781      -        ret   
      781 +        ret
 782  782  
 783  783          .p2align 4
 784  784  L(P6Q9):
 785  785          mov    -0x4e(%rdi), %r8
 786  786          mov    %r8, -0x4e(%rsi)
 787  787  L(P6Q8):
 788  788          mov    -0x46(%rdi), %rcx
 789  789          mov    %rcx, -0x46(%rsi)
 790  790  L(P6Q7):
 791  791          mov    -0x3e(%rdi), %r10
↓ open down ↓ 14 lines elided ↑ open up ↑
 806  806          mov    -0x16(%rdi), %rcx
 807  807          mov    %rcx, -0x16(%rsi)
 808  808  L(P6Q1):
 809  809          mov    -0xe(%rdi), %r10
 810  810          mov    %r10, -0xe(%rsi)
 811  811  L(P6Q0):
 812  812          mov    -0x6(%rdi), %r8d
 813  813          movzwq -0x2(%rdi), %r10
 814  814          mov    %r8d, -0x6(%rsi)
 815  815          mov    %r10w, -0x2(%rsi)
 816      -        ret   
      816 +        ret
 817  817  
 818  818          .p2align 4
 819  819  L(P7Q9):
 820  820          mov    -0x4f(%rdi), %r8
 821  821          mov    %r8, -0x4f(%rsi)
 822  822  L(P7Q8):
 823  823          mov    -0x47(%rdi), %rcx
 824  824          mov    %rcx, -0x47(%rsi)
 825  825  L(P7Q7):
 826  826          mov    -0x3f(%rdi), %r10
↓ open down ↓ 16 lines elided ↑ open up ↑
 843  843  L(P7Q1):
 844  844          mov    -0xf(%rdi), %r10
 845  845          mov    %r10, -0xf(%rsi)
 846  846  L(P7Q0):
 847  847          mov    -0x7(%rdi), %r8d
 848  848          movzwq -0x3(%rdi), %r10
 849  849          movzbq -0x1(%rdi), %rcx
 850  850          mov    %r8d, -0x7(%rsi)
 851  851          mov    %r10w, -0x3(%rsi)
 852  852          mov    %cl, -0x1(%rsi)
 853      -        ret   
      853 +        ret
 854  854  
 855  855          /*
 856  856           * For large sizes rep smovq is fastest.
 857  857           * Transition point determined experimentally as measured on
 858  858           * Intel Xeon processors (incl. Nehalem and previous generations) and
 859  859           * AMD Opteron. The transition value is patched at boot time to avoid
 860  860           * memory reference hit.
 861  861           */
 862  862          .globl bcopy_patch_start
 863  863  bcopy_patch_start:
↓ open down ↓ 67 lines elided ↑ open up ↑
 931  931  
 932  932          /*
 933  933           * Copy remaining bytes (0-63)
 934  934           */
 935  935  L(do_remainder):
 936  936          leaq    L(fwdPxQx)(%rip), %r10
 937  937          addq    %rdx, %rdi
 938  938          addq    %rdx, %rsi
 939  939          movslq  (%r10,%rdx,4), %rcx
 940  940          leaq    (%rcx,%r10,1), %r10
 941      -        jmpq    *%r10
      941 +        INDIRECT_JMP_REG(r10)
 942  942  
 943  943          /*
 944  944           * Use rep smovq. Clear remainder via unrolled code
 945  945           */
 946  946          .p2align 4
 947  947  L(use_rep):
 948  948          xchgq   %rdi, %rsi              /* %rsi = source, %rdi = destination */
 949  949          movq    %rdx, %rcx              /* %rcx = count */
 950  950          shrq    $3, %rcx                /* 8-byte word count */
 951  951          rep
↓ open down ↓ 93 lines elided ↑ open up ↑
1045 1045  
1046 1046          ENTRY(kzero)
1047 1047  #ifdef DEBUG
1048 1048          cmpq    postbootkernelbase(%rip), %rdi  /* %rdi = addr */
1049 1049          jnb     0f
1050 1050          leaq    .kzero_panic_msg(%rip), %rdi
1051 1051          jmp     call_panic              /* setup stack and call panic */
1052 1052  0:
1053 1053  #endif
1054 1054          /*
1055      -         * pass lofault value as 3rd argument for fault return 
     1055 +         * pass lofault value as 3rd argument for fault return
1056 1056           */
1057 1057          leaq    _kzeroerr(%rip), %rdx
1058 1058  
1059 1059          movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
1060 1060          movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
1061 1061          movq    %rdx, T_LOFAULT(%r9)    /* new lofault */
1062 1062          call    bzero_altentry
1063 1063          xorl    %eax, %eax
1064 1064          movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
1065 1065          ret
↓ open down ↓ 22 lines elided ↑ open up ↑
1088 1088          pushl   $.kzero_panic_msg
1089 1089          call    panic
1090 1090  0:      popl    %ebp
1091 1091  #endif
1092 1092          lea     _kzeroerr, %eax         /* kzeroerr is lofault value */
1093 1093  
1094 1094          pushl   %ebp                    /* save stack base */
1095 1095          movl    %esp, %ebp              /* set new stack base */
1096 1096          pushl   %edi                    /* save %edi */
1097 1097  
1098      -        mov     %gs:CPU_THREAD, %edx    
     1098 +        mov     %gs:CPU_THREAD, %edx
1099 1099          movl    T_LOFAULT(%edx), %edi
1100 1100          pushl   %edi                    /* save the current lofault */
1101 1101          movl    %eax, T_LOFAULT(%edx)   /* new lofault */
1102 1102  
1103 1103          movl    ARG_COUNT(%ebp), %ecx   /* get size in bytes */
1104 1104          movl    ARG_ADDR(%ebp), %edi    /* %edi <- address of bytes to clear */
1105 1105          shrl    $2, %ecx                /* Count of double words to zero */
1106 1106          xorl    %eax, %eax              /* sstol val */
1107 1107          rep
1108 1108            sstol                 /* %ecx contains words to clear (%eax=0) */
↓ open down ↓ 54 lines elided ↑ open up ↑
1163 1163  
1164 1164          /*
1165 1165           * Performance data shows many caller's are zeroing small buffers. So
1166 1166           * for best perf for these sizes unrolled code is used. Store zeros
1167 1167           * without worrying about alignment.
1168 1168           */
1169 1169          leaq    L(setPxQx)(%rip), %r10
1170 1170          addq    %rsi, %rdi
1171 1171          movslq  (%r10,%rsi,4), %rcx
1172 1172          leaq    (%rcx,%r10,1), %r10
1173      -        jmpq    *%r10
     1173 +        INDIRECT_JMP_REG(r10)
1174 1174  
1175 1175          .p2align 4
1176 1176  L(setPxQx):
1177 1177          .int       L(P0Q0)-L(setPxQx)   /* 0 */
1178 1178          .int       L(P1Q0)-L(setPxQx)
1179 1179          .int       L(P2Q0)-L(setPxQx)
1180 1180          .int       L(P3Q0)-L(setPxQx)
1181 1181          .int       L(P4Q0)-L(setPxQx)
1182 1182          .int       L(P5Q0)-L(setPxQx)
1183 1183          .int       L(P6Q0)-L(setPxQx)
1184      -        .int       L(P7Q0)-L(setPxQx) 
     1184 +        .int       L(P7Q0)-L(setPxQx)
1185 1185  
1186 1186          .int       L(P0Q1)-L(setPxQx)   /* 8 */
1187 1187          .int       L(P1Q1)-L(setPxQx)
1188 1188          .int       L(P2Q1)-L(setPxQx)
1189 1189          .int       L(P3Q1)-L(setPxQx)
1190 1190          .int       L(P4Q1)-L(setPxQx)
1191 1191          .int       L(P5Q1)-L(setPxQx)
1192 1192          .int       L(P6Q1)-L(setPxQx)
1193      -        .int       L(P7Q1)-L(setPxQx) 
     1193 +        .int       L(P7Q1)-L(setPxQx)
1194 1194  
1195 1195          .int       L(P0Q2)-L(setPxQx)   /* 16 */
1196 1196          .int       L(P1Q2)-L(setPxQx)
1197 1197          .int       L(P2Q2)-L(setPxQx)
1198 1198          .int       L(P3Q2)-L(setPxQx)
1199 1199          .int       L(P4Q2)-L(setPxQx)
1200 1200          .int       L(P5Q2)-L(setPxQx)
1201 1201          .int       L(P6Q2)-L(setPxQx)
1202      -        .int       L(P7Q2)-L(setPxQx) 
     1202 +        .int       L(P7Q2)-L(setPxQx)
1203 1203  
1204 1204          .int       L(P0Q3)-L(setPxQx)   /* 24 */
1205 1205          .int       L(P1Q3)-L(setPxQx)
1206 1206          .int       L(P2Q3)-L(setPxQx)
1207 1207          .int       L(P3Q3)-L(setPxQx)
1208 1208          .int       L(P4Q3)-L(setPxQx)
1209 1209          .int       L(P5Q3)-L(setPxQx)
1210 1210          .int       L(P6Q3)-L(setPxQx)
1211      -        .int       L(P7Q3)-L(setPxQx) 
     1211 +        .int       L(P7Q3)-L(setPxQx)
1212 1212  
1213 1213          .int       L(P0Q4)-L(setPxQx)   /* 32 */
1214 1214          .int       L(P1Q4)-L(setPxQx)
1215 1215          .int       L(P2Q4)-L(setPxQx)
1216 1216          .int       L(P3Q4)-L(setPxQx)
1217 1217          .int       L(P4Q4)-L(setPxQx)
1218 1218          .int       L(P5Q4)-L(setPxQx)
1219 1219          .int       L(P6Q4)-L(setPxQx)
1220      -        .int       L(P7Q4)-L(setPxQx) 
     1220 +        .int       L(P7Q4)-L(setPxQx)
1221 1221  
1222 1222          .int       L(P0Q5)-L(setPxQx)   /* 40 */
1223 1223          .int       L(P1Q5)-L(setPxQx)
1224 1224          .int       L(P2Q5)-L(setPxQx)
1225 1225          .int       L(P3Q5)-L(setPxQx)
1226 1226          .int       L(P4Q5)-L(setPxQx)
1227 1227          .int       L(P5Q5)-L(setPxQx)
1228 1228          .int       L(P6Q5)-L(setPxQx)
1229      -        .int       L(P7Q5)-L(setPxQx) 
     1229 +        .int       L(P7Q5)-L(setPxQx)
1230 1230  
1231 1231          .int       L(P0Q6)-L(setPxQx)   /* 48 */
1232 1232          .int       L(P1Q6)-L(setPxQx)
1233 1233          .int       L(P2Q6)-L(setPxQx)
1234 1234          .int       L(P3Q6)-L(setPxQx)
1235 1235          .int       L(P4Q6)-L(setPxQx)
1236 1236          .int       L(P5Q6)-L(setPxQx)
1237 1237          .int       L(P6Q6)-L(setPxQx)
1238      -        .int       L(P7Q6)-L(setPxQx) 
     1238 +        .int       L(P7Q6)-L(setPxQx)
1239 1239  
1240 1240          .int       L(P0Q7)-L(setPxQx)   /* 56 */
1241 1241          .int       L(P1Q7)-L(setPxQx)
1242 1242          .int       L(P2Q7)-L(setPxQx)
1243 1243          .int       L(P3Q7)-L(setPxQx)
1244 1244          .int       L(P4Q7)-L(setPxQx)
1245 1245          .int       L(P5Q7)-L(setPxQx)
1246 1246          .int       L(P6Q7)-L(setPxQx)
1247      -        .int       L(P7Q7)-L(setPxQx) 
     1247 +        .int       L(P7Q7)-L(setPxQx)
1248 1248  
1249 1249          .int       L(P0Q8)-L(setPxQx)   /* 64 */
1250 1250          .int       L(P1Q8)-L(setPxQx)
1251 1251          .int       L(P2Q8)-L(setPxQx)
1252 1252          .int       L(P3Q8)-L(setPxQx)
1253 1253          .int       L(P4Q8)-L(setPxQx)
1254 1254          .int       L(P5Q8)-L(setPxQx)
1255 1255          .int       L(P6Q8)-L(setPxQx)
1256 1256          .int       L(P7Q8)-L(setPxQx)
1257 1257  
↓ open down ↓ 9 lines elided ↑ open up ↑
1267 1267          .p2align 4
1268 1268  L(P0Q9): mov    %rax, -0x48(%rdi)
1269 1269  L(P0Q8): mov    %rax, -0x40(%rdi)
1270 1270  L(P0Q7): mov    %rax, -0x38(%rdi)
1271 1271  L(P0Q6): mov    %rax, -0x30(%rdi)
1272 1272  L(P0Q5): mov    %rax, -0x28(%rdi)
1273 1273  L(P0Q4): mov    %rax, -0x20(%rdi)
1274 1274  L(P0Q3): mov    %rax, -0x18(%rdi)
1275 1275  L(P0Q2): mov    %rax, -0x10(%rdi)
1276 1276  L(P0Q1): mov    %rax, -0x8(%rdi)
1277      -L(P0Q0): 
     1277 +L(P0Q0):
1278 1278           ret
1279 1279  
1280 1280          .p2align 4
1281 1281  L(P1Q9): mov    %rax, -0x49(%rdi)
1282 1282  L(P1Q8): mov    %rax, -0x41(%rdi)
1283 1283  L(P1Q7): mov    %rax, -0x39(%rdi)
1284 1284  L(P1Q6): mov    %rax, -0x31(%rdi)
1285 1285  L(P1Q5): mov    %rax, -0x29(%rdi)
1286 1286  L(P1Q4): mov    %rax, -0x21(%rdi)
1287 1287  L(P1Q3): mov    %rax, -0x19(%rdi)
↓ open down ↓ 127 lines elided ↑ open up ↑
1415 1415          cmp     $BZERO_USE_REP, %rsi
1416 1416          ja      L(use_rep)
1417 1417  
1418 1418          /*
1419 1419           * zero 64-bytes per loop
1420 1420           */
1421 1421          .p2align 4
1422 1422  L(bzero_loop):
1423 1423          leaq    -0x40(%rsi), %rsi
1424 1424          cmpq    $0x40, %rsi
1425      -        movq    %rax, (%rdi) 
1426      -        movq    %rax, 0x8(%rdi) 
1427      -        movq    %rax, 0x10(%rdi) 
1428      -        movq    %rax, 0x18(%rdi) 
1429      -        movq    %rax, 0x20(%rdi) 
1430      -        movq    %rax, 0x28(%rdi) 
1431      -        movq    %rax, 0x30(%rdi) 
1432      -        movq    %rax, 0x38(%rdi) 
     1425 +        movq    %rax, (%rdi)
     1426 +        movq    %rax, 0x8(%rdi)
     1427 +        movq    %rax, 0x10(%rdi)
     1428 +        movq    %rax, 0x18(%rdi)
     1429 +        movq    %rax, 0x20(%rdi)
     1430 +        movq    %rax, 0x28(%rdi)
     1431 +        movq    %rax, 0x30(%rdi)
     1432 +        movq    %rax, 0x38(%rdi)
1433 1433          leaq    0x40(%rdi), %rdi
1434 1434          jae     L(bzero_loop)
1435 1435  
1436 1436          /*
1437 1437           * Clear any remaining bytes..
1438 1438           */
1439 1439  9:
1440 1440          leaq    L(setPxQx)(%rip), %r10
1441 1441          addq    %rsi, %rdi
1442 1442          movslq  (%r10,%rsi,4), %rcx
1443 1443          leaq    (%rcx,%r10,1), %r10
1444      -        jmpq    *%r10
     1444 +        INDIRECT_JMP_REG(r10)
1445 1445  
1446 1446          /*
1447 1447           * Use rep sstoq. Clear any remainder via unrolled code
1448 1448           */
1449 1449          .p2align 4
1450 1450  L(use_rep):
1451 1451          movq    %rsi, %rcx              /* get size in bytes */
1452 1452          shrq    $3, %rcx                /* count of 8-byte words to zero */
1453 1453          rep
1454 1454            sstoq                         /* %rcx = words to clear (%rax=0) */
↓ open down ↓ 102 lines elided ↑ open up ↑
1557 1557          leaq    _copyin_err(%rip), %rcx
1558 1558  
1559 1559          movq    %gs:CPU_THREAD, %r9
1560 1560          cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1561 1561          jae     3f                      /* take copyop if uaddr > kernelbase */
1562 1562          SMAP_DISABLE_INSTR(0)
1563 1563          jmp     do_copy_fault           /* Takes care of leave for us */
1564 1564  
1565 1565  _copyin_err:
1566 1566          SMAP_ENABLE_INSTR(2)
1567      -        movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */  
     1567 +        movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1568 1568          addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1569 1569  3:
1570 1570          movq    T_COPYOPS(%r9), %rax
1571 1571          cmpq    $0, %rax
1572 1572          jz      2f
1573 1573          /*
1574 1574           * reload args for the copyop
1575 1575           */
1576 1576          movq    (%rsp), %rdi
1577 1577          movq    0x8(%rsp), %rsi
1578 1578          movq    0x10(%rsp), %rdx
1579 1579          leave
1580      -        jmp     *CP_COPYIN(%rax)
     1580 +        movq    CP_COPYIN(%rax), %rax
     1581 +        INDIRECT_JMP_REG(rax)
1581 1582  
1582      -2:      movl    $-1, %eax       
     1583 +2:      movl    $-1, %eax
1583 1584          leave
1584 1585          ret
1585 1586          SET_SIZE(copyin)
1586 1587  
1587 1588  #elif defined(__i386)
1588 1589  
1589 1590  #define ARG_UADDR       4
1590 1591  #define ARG_KADDR       8
1591 1592  
1592 1593          ENTRY(copyin)
↓ open down ↓ 80 lines elided ↑ open up ↑
1673 1674          leaq    _xcopyin_err(%rip), %rcx        /* doesn't set rflags */
1674 1675          jnz     6f                      /* use regular access */
1675 1676          /*
1676 1677           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1677 1678           */
1678 1679          cmpq    $XCOPY_MIN_SIZE, %rdx
1679 1680          jae     5f
1680 1681  6:
1681 1682          SMAP_DISABLE_INSTR(1)
1682 1683          jmp     do_copy_fault
1683      -        
     1684 +
1684 1685          /*
1685 1686           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1686 1687           * count is COUNT_ALIGN_SIZE aligned.
1687 1688           */
1688 1689  5:
1689 1690          movq    %rdi, %r10
1690 1691          orq     %rsi, %r10
1691 1692          andq    $NTA_ALIGN_MASK, %r10
1692 1693          orq     %rdx, %r10
1693 1694          andq    $COUNT_ALIGN_MASK, %r10
1694      -        jnz     6b      
     1695 +        jnz     6b
1695 1696          leaq    _xcopyin_nta_err(%rip), %rcx    /* doesn't set rflags */
1696 1697          SMAP_DISABLE_INSTR(2)
1697 1698          jmp     do_copy_fault_nta       /* use non-temporal access */
1698      -        
     1699 +
1699 1700  4:
1700 1701          movl    $EFAULT, %eax
1701 1702          jmp     3f
1702 1703  
1703 1704          /*
1704 1705           * A fault during do_copy_fault or do_copy_fault_nta is
1705 1706           * indicated through an errno value in %rax and we iret from the
1706 1707           * trap handler to here.
1707 1708           */
1708 1709  _xcopyin_err:
↓ open down ↓ 6 lines elided ↑ open up ↑
1715 1716          cmpq    $0, %r8
1716 1717          jz      2f
1717 1718  
1718 1719          /*
1719 1720           * reload args for the copyop
1720 1721           */
1721 1722          movq    (%rsp), %rdi
1722 1723          movq    0x8(%rsp), %rsi
1723 1724          movq    0x10(%rsp), %rdx
1724 1725          leave
1725      -        jmp     *CP_XCOPYIN(%r8)
     1726 +        movq    CP_XCOPYIN(%r8), %r8
     1727 +        INDIRECT_JMP_REG(r8)
1726 1728  
1727 1729  2:      leave
1728 1730          ret
1729 1731          SET_SIZE(xcopyin_nta)
1730 1732  
1731 1733  #elif defined(__i386)
1732 1734  
1733 1735  #define ARG_UADDR       4
1734 1736  #define ARG_KADDR       8
1735 1737  #define ARG_COUNT       12
↓ open down ↓ 12 lines elided ↑ open up ↑
1748 1750          jz      do_copy_fault
1749 1751  
1750 1752          cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
1751 1753          jnz     do_copy_fault
1752 1754  
1753 1755          /*
1754 1756           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1755 1757           */
1756 1758          cmpl    $XCOPY_MIN_SIZE, ARG_COUNT(%esp)
1757 1759          jb      do_copy_fault
1758      -        
     1760 +
1759 1761          /*
1760 1762           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1761 1763           * count is COUNT_ALIGN_SIZE aligned.
1762 1764           */
1763 1765          movl    ARG_UADDR(%esp), %ecx
1764 1766          orl     ARG_KADDR(%esp), %ecx
1765 1767          andl    $NTA_ALIGN_MASK, %ecx
1766 1768          orl     ARG_COUNT(%esp), %ecx
1767 1769          andl    $COUNT_ALIGN_MASK, %ecx
1768 1770          jnz     do_copy_fault
↓ open down ↓ 14 lines elided ↑ open up ↑
1783 1785          popl    %edi
1784 1786          movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1785 1787          popl    %esi
1786 1788          popl    %ebp
1787 1789  3:
1788 1790          cmpl    $0, T_COPYOPS(%edx)
1789 1791          jz      2f
1790 1792          movl    T_COPYOPS(%edx), %eax
1791 1793          jmp     *CP_XCOPYIN(%eax)
1792 1794  
1793      -2:      rep;    ret     /* use 2 byte return instruction when branch target */
     1795 +2:      rep;    ret     /* use 2 byte return instruction when branch target */
1794 1796                          /* AMD Software Optimization Guide - Section 6.2 */
1795 1797          SET_SIZE(xcopyin_nta)
1796 1798  
1797 1799  #undef  ARG_UADDR
1798 1800  #undef  ARG_KADDR
1799 1801  #undef  ARG_COUNT
1800 1802  #undef  ARG_CACHED
1801 1803  
1802 1804  #endif  /* __i386 */
1803 1805  #endif  /* __lint */
↓ open down ↓ 54 lines elided ↑ open up ↑
1858 1860          cmpq    $0, %rax
1859 1861          jz      2f
1860 1862  
1861 1863          /*
1862 1864           * reload args for the copyop
1863 1865           */
1864 1866          movq    (%rsp), %rdi
1865 1867          movq    0x8(%rsp), %rsi
1866 1868          movq    0x10(%rsp), %rdx
1867 1869          leave
1868      -        jmp     *CP_COPYOUT(%rax)
     1870 +        movq    CP_COPYOUT(%rax), %rax
     1871 +        INDIRECT_JMP_REG(rax)
1869 1872  
1870 1873  2:      movl    $-1, %eax
1871 1874          leave
1872 1875          ret
1873 1876          SET_SIZE(copyout)
1874 1877  
1875 1878  #elif defined(__i386)
1876 1879  
1877 1880  #define ARG_KADDR       4
1878 1881  #define ARG_UADDR       8
↓ open down ↓ 7 lines elided ↑ open up ↑
1886 1889          movl    %esp, %ebp
1887 1890          pushl   $.copyout_panic_msg
1888 1891          call    panic
1889 1892  1:
1890 1893  #endif
1891 1894          lea     _copyout_err, %eax
1892 1895          movl    %gs:CPU_THREAD, %edx
1893 1896          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1894 1897          jb      do_copy_fault
1895 1898          jmp     3f
1896      -        
     1899 +
1897 1900  _copyout_err:
1898 1901          popl    %ecx
1899 1902          popl    %edi
1900 1903          movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1901 1904          popl    %esi
1902 1905          popl    %ebp
1903 1906  3:
1904 1907          movl    T_COPYOPS(%edx), %eax
1905 1908          cmpl    $0, %eax
1906 1909          jz      2f
↓ open down ↓ 52 lines elided ↑ open up ↑
1959 1962          leaq    _xcopyout_err(%rip), %rcx
1960 1963          jnz     6f
1961 1964          /*
1962 1965           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1963 1966           */
1964 1967          cmpq    $XCOPY_MIN_SIZE, %rdx
1965 1968          jae     5f
1966 1969  6:
1967 1970          SMAP_DISABLE_INSTR(4)
1968 1971          jmp     do_copy_fault
1969      -        
     1972 +
1970 1973          /*
1971 1974           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1972 1975           * count is COUNT_ALIGN_SIZE aligned.
1973 1976           */
1974 1977  5:
1975 1978          movq    %rdi, %r10
1976 1979          orq     %rsi, %r10
1977 1980          andq    $NTA_ALIGN_MASK, %r10
1978 1981          orq     %rdx, %r10
1979 1982          andq    $COUNT_ALIGN_MASK, %r10
1980      -        jnz     6b      
     1983 +        jnz     6b
1981 1984          leaq    _xcopyout_nta_err(%rip), %rcx
1982 1985          SMAP_DISABLE_INSTR(5)
1983 1986          call    do_copy_fault_nta
1984 1987          SMAP_ENABLE_INSTR(5)
1985 1988          ret
1986 1989  
1987 1990  4:
1988 1991          movl    $EFAULT, %eax
1989 1992          jmp     3f
1990 1993  
↓ open down ↓ 12 lines elided ↑ open up ↑
2003 2006          cmpq    $0, %r8
2004 2007          jz      2f
2005 2008  
2006 2009          /*
2007 2010           * reload args for the copyop
2008 2011           */
2009 2012          movq    (%rsp), %rdi
2010 2013          movq    0x8(%rsp), %rsi
2011 2014          movq    0x10(%rsp), %rdx
2012 2015          leave
2013      -        jmp     *CP_XCOPYOUT(%r8)
     2016 +        movq    CP_XCOPYOUT(%r8), %r8
     2017 +        INDIRECT_JMP_REG(r8)
2014 2018  
2015 2019  2:      leave
2016 2020          ret
2017 2021          SET_SIZE(xcopyout_nta)
2018 2022  
2019 2023  #elif defined(__i386)
2020 2024  
2021 2025  #define ARG_KADDR       4
2022 2026  #define ARG_UADDR       8
2023 2027  #define ARG_COUNT       12
↓ open down ↓ 10 lines elided ↑ open up ↑
2034 2038          jz      do_copy_fault
2035 2039  
2036 2040          cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
2037 2041          jnz     do_copy_fault
2038 2042  
2039 2043          /*
2040 2044           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
2041 2045           */
2042 2046          cmpl    $XCOPY_MIN_SIZE, %edx
2043 2047          jb      do_copy_fault
2044      -        
     2048 +
2045 2049          /*
2046 2050           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2047 2051           * count is COUNT_ALIGN_SIZE aligned.
2048 2052           */
2049 2053          movl    ARG_UADDR(%esp), %ecx
2050 2054          orl     ARG_KADDR(%esp), %ecx
2051 2055          andl    $NTA_ALIGN_MASK, %ecx
2052 2056          orl     ARG_COUNT(%esp), %ecx
2053 2057          andl    $COUNT_ALIGN_MASK, %ecx
2054 2058          jnz     do_copy_fault
↓ open down ↓ 135 lines elided ↑ open up ↑
2190 2194  #endif
2191 2195          /* get the current lofault address */
2192 2196          movl    %gs:CPU_THREAD, %eax
2193 2197          movl    T_LOFAULT(%eax), %eax
2194 2198  do_copystr:
2195 2199          pushl   %ebp                    /* setup stack frame */
2196 2200          movl    %esp, %ebp
2197 2201          pushl   %ebx                    /* save registers */
2198 2202          pushl   %edi
2199 2203  
2200      -        movl    %gs:CPU_THREAD, %ebx    
     2204 +        movl    %gs:CPU_THREAD, %ebx
2201 2205          movl    T_LOFAULT(%ebx), %edi
2202 2206          pushl   %edi                    /* save the current lofault */
2203 2207          movl    %eax, T_LOFAULT(%ebx)   /* new lofault */
2204 2208  
2205 2209          movl    ARG_MAXLEN(%ebp), %ecx
2206 2210          cmpl    $0, %ecx
2207 2211          je      copystr_enametoolong    /* maxlength == 0 */
2208 2212  
2209 2213          movl    ARG_FROM(%ebp), %ebx    /* source address */
2210 2214          movl    ARG_TO(%ebp), %edx      /* destination address */
2211 2215  
2212 2216  copystr_loop:
2213 2217          decl    %ecx
2214 2218          movb    (%ebx), %al
2215      -        incl    %ebx    
     2219 +        incl    %ebx
2216 2220          movb    %al, (%edx)
2217 2221          incl    %edx
2218 2222          cmpb    $0, %al
2219 2223          je      copystr_null            /* null char */
2220 2224          cmpl    $0, %ecx
2221 2225          jne     copystr_loop
2222 2226  
2223 2227  copystr_enametoolong:
2224 2228          movl    $ENAMETOOLONG, %eax
2225 2229          jmp     copystr_out
↓ open down ↓ 4 lines elided ↑ open up ↑
2230 2234  copystr_out:
2231 2235          cmpl    $0, ARG_LENCOPIED(%ebp) /* want length? */
2232 2236          je      copystr_done            /* no */
2233 2237          movl    ARG_MAXLEN(%ebp), %edx
2234 2238          subl    %ecx, %edx              /* compute length and store it */
2235 2239          movl    ARG_LENCOPIED(%ebp), %ecx
2236 2240          movl    %edx, (%ecx)
2237 2241  
2238 2242  copystr_done:
2239 2243          popl    %edi
2240      -        movl    %gs:CPU_THREAD, %ebx    
     2244 +        movl    %gs:CPU_THREAD, %ebx
2241 2245          movl    %edi, T_LOFAULT(%ebx)   /* restore the original lofault */
2242 2246  
2243 2247          popl    %edi
2244 2248          popl    %ebx
2245 2249          popl    %ebp
2246      -        ret     
     2250 +        ret
2247 2251          SET_SIZE(copystr)
2248 2252  
2249 2253  #undef  ARG_FROM
2250 2254  #undef  ARG_TO
2251 2255  #undef  ARG_MAXLEN
2252 2256  #undef  ARG_LENCOPIED
2253 2257  
2254 2258  #endif  /* __i386 */
2255 2259  #endif  /* __lint */
2256 2260  
↓ open down ↓ 60 lines elided ↑ open up ↑
2317 2321          jz      2f
2318 2322  
2319 2323          /*
2320 2324           * reload args for the copyop
2321 2325           */
2322 2326          movq    (%rsp), %rdi
2323 2327          movq    0x8(%rsp), %rsi
2324 2328          movq    0x10(%rsp), %rdx
2325 2329          movq    0x18(%rsp), %rcx
2326 2330          leave
2327      -        jmp     *CP_COPYINSTR(%rax)
2328      -        
     2331 +        movq    CP_COPYINSTR(%rax), %rax
     2332 +        INDIRECT_JMP_REG(rax)
     2333 +
2329 2334  2:      movl    $EFAULT, %eax           /* return EFAULT */
2330 2335          leave
2331 2336          ret
2332 2337          SET_SIZE(copyinstr)
2333 2338  
2334 2339  #elif defined(__i386)
2335 2340  
2336 2341  #define ARG_UADDR       4
2337 2342  #define ARG_KADDR       8
2338 2343  
↓ open down ↓ 9 lines elided ↑ open up ↑
2348 2353  1:
2349 2354  #endif
2350 2355          lea     _copyinstr_error, %eax
2351 2356          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2352 2357          jb      do_copystr
2353 2358          movl    %gs:CPU_THREAD, %edx
2354 2359          jmp     3f
2355 2360  
2356 2361  _copyinstr_error:
2357 2362          popl    %edi
2358      -        movl    %gs:CPU_THREAD, %edx    
     2363 +        movl    %gs:CPU_THREAD, %edx
2359 2364          movl    %edi, T_LOFAULT(%edx)   /* original lofault */
2360 2365  
2361 2366          popl    %edi
2362 2367          popl    %ebx
2363 2368          popl    %ebp
2364 2369  3:
2365 2370          movl    T_COPYOPS(%edx), %eax
2366 2371          cmpl    $0, %eax
2367 2372          jz      2f
2368 2373          jmp     *CP_COPYINSTR(%eax)
2369      -        
     2374 +
2370 2375  2:      movl    $EFAULT, %eax           /* return EFAULT */
2371 2376          ret
2372 2377          SET_SIZE(copyinstr)
2373 2378  
2374 2379  #undef  ARG_UADDR
2375 2380  #undef  ARG_KADDR
2376 2381  
2377 2382  #endif  /* __i386 */
2378 2383  #endif  /* __lint */
2379 2384  
↓ open down ↓ 59 lines elided ↑ open up ↑
2439 2444          jz      2f
2440 2445  
2441 2446          /*
2442 2447           * reload args for the copyop
2443 2448           */
2444 2449          movq    (%rsp), %rdi
2445 2450          movq    0x8(%rsp), %rsi
2446 2451          movq    0x10(%rsp), %rdx
2447 2452          movq    0x18(%rsp), %rcx
2448 2453          leave
2449      -        jmp     *CP_COPYOUTSTR(%rax)
2450      -        
     2454 +        movq    CP_COPYOUTSTR(%rax), %rax
     2455 +        INDIRECT_JMP_REG(rax)
     2456 +
2451 2457  2:      movl    $EFAULT, %eax           /* return EFAULT */
2452 2458          leave
2453 2459          ret
2454      -        SET_SIZE(copyoutstr)    
2455      -        
     2460 +        SET_SIZE(copyoutstr)
     2461 +
2456 2462  #elif defined(__i386)
2457 2463  
2458 2464  #define ARG_KADDR       4
2459 2465  #define ARG_UADDR       8
2460 2466  
2461 2467          ENTRY(copyoutstr)
2462 2468          movl    kernelbase, %ecx
2463 2469  #ifdef DEBUG
2464 2470          cmpl    %ecx, ARG_KADDR(%esp)
2465 2471          jnb     1f
↓ open down ↓ 4 lines elided ↑ open up ↑
2470 2476  1:
2471 2477  #endif
2472 2478          lea     _copyoutstr_error, %eax
2473 2479          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2474 2480          jb      do_copystr
2475 2481          movl    %gs:CPU_THREAD, %edx
2476 2482          jmp     3f
2477 2483  
2478 2484  _copyoutstr_error:
2479 2485          popl    %edi
2480      -        movl    %gs:CPU_THREAD, %edx    
     2486 +        movl    %gs:CPU_THREAD, %edx
2481 2487          movl    %edi, T_LOFAULT(%edx)   /* restore the original lofault */
2482 2488  
2483 2489          popl    %edi
2484 2490          popl    %ebx
2485 2491          popl    %ebp
2486 2492  3:
2487 2493          movl    T_COPYOPS(%edx), %eax
2488 2494          cmpl    $0, %eax
2489 2495          jz      2f
2490 2496          jmp     *CP_COPYOUTSTR(%eax)
2491 2497  
2492 2498  2:      movl    $EFAULT, %eax           /* return EFAULT */
2493 2499          ret
2494 2500          SET_SIZE(copyoutstr)
2495      -        
     2501 +
2496 2502  #undef  ARG_KADDR
2497 2503  #undef  ARG_UADDR
2498 2504  
2499 2505  #endif  /* __i386 */
2500 2506  #endif  /* __lint */
2501 2507  
2502 2508  /*
2503 2509   * Since all of the fuword() variants are so similar, we have a macro to spit
2504 2510   * them out.  This allows us to create DTrace-unobservable functions easily.
2505 2511   */
2506      -        
     2512 +
2507 2513  #if defined(__lint)
2508 2514  
2509 2515  #if defined(__amd64)
2510 2516  
2511 2517  /* ARGSUSED */
2512 2518  int
2513 2519  fuword64(const void *addr, uint64_t *dst)
2514 2520  { return (0); }
2515 2521  
2516 2522  #endif
↓ open down ↓ 38 lines elided ↑ open up ↑
2555 2561          xorl    %eax, %eax;                     \
2556 2562          SMAP_ENABLE_INSTR(EN1)                  \
2557 2563          ret;                                    \
2558 2564  _flt_/**/NAME:                                  \
2559 2565          SMAP_ENABLE_INSTR(EN2)                  \
2560 2566          movq    $0, T_LOFAULT(%r9);             \
2561 2567  1:                                              \
2562 2568          movq    T_COPYOPS(%r9), %rax;           \
2563 2569          cmpq    $0, %rax;                       \
2564 2570          jz      2f;                             \
2565      -        jmp     *COPYOP(%rax);                  \
     2571 +        movq    COPYOP(%rax), %rax;             \
     2572 +        INDIRECT_JMP_REG(rax);                  \
2566 2573  2:                                              \
2567 2574          movl    $-1, %eax;                      \
2568 2575          ret;                                    \
2569 2576          SET_SIZE(NAME)
2570      -        
     2577 +
2571 2578          FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2572 2579          FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2573 2580          FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2574 2581          FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2575 2582  
2576 2583  #elif defined(__i386)
2577 2584  
2578 2585  #define FUWORD(NAME, INSTR, REG, COPYOP)        \
2579 2586          ENTRY(NAME)                             \
2580 2587          movl    %gs:CPU_THREAD, %ecx;           \
↓ open down ↓ 83 lines elided ↑ open up ↑
2664 2671          xorl    %eax, %eax;                     \
2665 2672          SMAP_ENABLE_INSTR(EN1)                  \
2666 2673          ret;                                    \
2667 2674  _flt_/**/NAME:                                  \
2668 2675          SMAP_ENABLE_INSTR(EN2)                  \
2669 2676          movq    $0, T_LOFAULT(%r9);             \
2670 2677  1:                                              \
2671 2678          movq    T_COPYOPS(%r9), %rax;           \
2672 2679          cmpq    $0, %rax;                       \
2673 2680          jz      3f;                             \
2674      -        jmp     *COPYOP(%rax);                  \
     2681 +        movq    COPYOP(%rax), %rax;             \
     2682 +        INDIRECT_JMP_REG(rax);                  \
2675 2683  3:                                              \
2676 2684          movl    $-1, %eax;                      \
2677 2685          ret;                                    \
2678 2686          SET_SIZE(NAME)
2679 2687  
2680 2688          SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2681 2689          SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2682 2690          SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2683 2691          SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2684 2692  
↓ open down ↓ 491 lines elided ↑ open up ↑
3176 3184          ret
3177 3185          SET_SIZE(smap_enable)
3178 3186  
3179 3187  #endif /* __amd64 || __i386 */
3180 3188  
3181 3189  #endif /* __lint */
3182 3190  
3183 3191  #ifndef __lint
3184 3192  
3185 3193  .data
3186      -.align  4
     3194 +.align  4
3187 3195  .globl  _smap_enable_patch_count
3188 3196  .type   _smap_enable_patch_count,@object
3189 3197  .size   _smap_enable_patch_count, 4
3190 3198  _smap_enable_patch_count:
3191 3199          .long   SMAP_ENABLE_COUNT
3192 3200  
3193 3201  .globl  _smap_disable_patch_count
3194 3202  .type   _smap_disable_patch_count,@object
3195 3203  .size   _smap_disable_patch_count, 4
3196 3204  _smap_disable_patch_count:
3197 3205          .long SMAP_DISABLE_COUNT
3198 3206  
3199 3207  #endif /* __lint */
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX