10908 Simplify SMAP relocations with krtld
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 2009, Intel Corporation 28 * All rights reserved. 29 */ 30 31 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 33 /* All Rights Reserved */ 34 35 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 36 /* All Rights Reserved */ 37 38 /* 39 * Copyright 2016 Joyent, Inc. 40 */ 41 42 #include <sys/errno.h> 43 #include <sys/asm_linkage.h> 44 45 #if defined(__lint) 46 #include <sys/types.h> 47 #include <sys/systm.h> 48 #else /* __lint */ 49 #include "assym.h" 50 #endif /* __lint */ 51 52 #define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ 53 #define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ 54 /* 55 * Non-temopral access (NTA) alignment requirement 56 */ 57 #define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */ 58 #define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1) 59 #define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */ 60 #define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1) 61 62 /* 63 * With the introduction of Broadwell, Intel has introduced supervisor mode 64 * access protection -- SMAP. SMAP forces the kernel to set certain bits to 65 * enable access of user pages (AC in rflags, defines as PS_ACHK in 66 * <sys/psw.h>). One of the challenges is that the implementation of many of the 67 * userland copy routines directly use the kernel ones. For example, copyin and 68 * copyout simply go and jump to the do_copy_fault label and traditionally let 69 * those deal with the return for them. In fact, changing that is a can of frame 70 * pointers. 71 * 72 * Rules and Constraints: 73 * 74 * 1. For anything that's not in copy.s, we have it do explicit calls to the 75 * smap related code. It usually is in a position where it is able to. This is 76 * restricted to the following three places: DTrace, resume() in swtch.s and 77 * on_fault/no_fault. If you want to add it somewhere else, we should be 78 * thinking twice. 79 * 80 * 2. We try to toggle this at the smallest window possible. This means that if 81 * we take a fault, need to try to use a copyop in copyin() or copyout(), or any 82 * other function, we will always leave with SMAP enabled (the kernel cannot 83 * access user pages). 84 * 85 * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are 86 * explicitly only allowed to be called while in an on_fault()/no_fault() handler, 87 * which already takes care of ensuring that SMAP is enabled and disabled. Note 88 * this means that when under an on_fault()/no_fault() handler, one must not 89 * call the non-*_noeer() routines. 90 * 91 * 4. The first thing we should do after coming out of an lofault handler is to 92 * make sure that we call smap_enable again to ensure that we are safely 93 * protected, as more often than not, we will have disabled smap to get there. 94 * 95 * 5. The SMAP functions, smap_enable and smap_disable may not touch any 96 * registers beyond those done by the call and ret. These routines may be called 97 * from arbitrary contexts in copy.s where we have slightly more special ABIs in 98 * place. 99 * 100 * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and 101 * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and 102 * smap_disable()). If the number of these is changed, you must update the 103 * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below. 104 * 105 * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is 106 * no known technical reason preventing it from being enabled. 107 * 108 * 8. Generally this .s file is processed by a K&R style cpp. This means that it 109 * really has a lot of feelings about whitespace. In particular, if you have a 110 * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'. 111 * 112 * 9. The smap_enable and smap_disable functions should not generally be called. 113 * They exist such that DTrace and on_trap() may use them, that's it. 114 * 115 * 10. In general, the kernel has its own value for rflags that gets used. This 116 * is maintained in a few different places which vary based on how the thread 117 * comes into existence and whether it's a user thread. In general, when the 118 * kernel takes a trap, it always will set ourselves to a known set of flags, 119 * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that 120 * PS_ACHK is cleared for us. In addition, when using the sysenter instruction, 121 * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for 122 * where that gets masked off. 123 */ 124 125 /* 126 * The optimal 64-bit bcopy and kcopy for modern x86 processors uses 127 * "rep smovq" for large sizes. Performance data shows that many calls to 128 * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for 129 * these small sizes unrolled code is used. For medium sizes loops writing 130 * 64-bytes per loop are used. Transition points were determined experimentally. 131 */ 132 #define BZERO_USE_REP (1024) 133 #define BCOPY_DFLT_REP (128) 134 #define BCOPY_NHM_REP (768) 135 136 /* 137 * Copy a block of storage, returning an error code if `from' or 138 * `to' takes a kernel pagefault which cannot be resolved. 139 * Returns errno value on pagefault error, 0 if all ok 140 */ 141 142 /* 143 * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to 144 * additional call instructions. 145 */ 146 #if defined(__amd64) 147 #define SMAP_DISABLE_COUNT 16 148 #define SMAP_ENABLE_COUNT 26 149 #elif defined(__i386) 150 #define SMAP_DISABLE_COUNT 0 151 #define SMAP_ENABLE_COUNT 0 152 #endif 153 154 #define SMAP_DISABLE_INSTR(ITER) \ 155 .globl _smap_disable_patch_/**/ITER; \ 156 _smap_disable_patch_/**/ITER/**/:; \ 157 nop; nop; nop; 158 159 #define SMAP_ENABLE_INSTR(ITER) \ 160 .globl _smap_enable_patch_/**/ITER; \ 161 _smap_enable_patch_/**/ITER/**/:; \ 162 nop; nop; nop; 163 164 #if defined(__lint) 165 166 /* ARGSUSED */ 167 int 168 kcopy(const void *from, void *to, size_t count) 169 { return (0); } 170 171 #else /* __lint */ 172 173 .globl kernelbase 174 .globl postbootkernelbase 175 176 #if defined(__amd64) 177 178 ENTRY(kcopy) 179 pushq %rbp 180 movq %rsp, %rbp 181 #ifdef DEBUG 182 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 183 jb 0f 184 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 185 jnb 1f 186 0: leaq .kcopy_panic_msg(%rip), %rdi 187 xorl %eax, %eax 188 call panic 189 1: 190 #endif 191 /* 192 * pass lofault value as 4th argument to do_copy_fault 193 */ 194 leaq _kcopy_copyerr(%rip), %rcx 195 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 196 197 do_copy_fault: 198 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 199 movq %rcx, T_LOFAULT(%r9) /* new lofault */ 200 call bcopy_altentry 201 xorl %eax, %eax /* return 0 (success) */ 202 SMAP_ENABLE_INSTR(0) 203 204 /* 205 * A fault during do_copy_fault is indicated through an errno value 206 * in %rax and we iretq from the trap handler to here. 207 */ 208 _kcopy_copyerr: 209 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 210 leave 211 ret 212 SET_SIZE(kcopy) 213 214 #elif defined(__i386) 215 216 #define ARG_FROM 8 217 #define ARG_TO 12 218 #define ARG_COUNT 16 219 220 ENTRY(kcopy) 221 #ifdef DEBUG 222 pushl %ebp 223 movl %esp, %ebp 224 movl postbootkernelbase, %eax 225 cmpl %eax, ARG_FROM(%ebp) 226 jb 0f 227 cmpl %eax, ARG_TO(%ebp) 228 jnb 1f 229 0: pushl $.kcopy_panic_msg 230 call panic 231 1: popl %ebp 232 #endif 233 lea _kcopy_copyerr, %eax /* lofault value */ 234 movl %gs:CPU_THREAD, %edx 235 236 do_copy_fault: 237 pushl %ebp 238 movl %esp, %ebp /* setup stack frame */ 239 pushl %esi 240 pushl %edi /* save registers */ 241 242 movl T_LOFAULT(%edx), %edi 243 pushl %edi /* save the current lofault */ 244 movl %eax, T_LOFAULT(%edx) /* new lofault */ 245 246 movl ARG_COUNT(%ebp), %ecx 247 movl ARG_FROM(%ebp), %esi 248 movl ARG_TO(%ebp), %edi 249 shrl $2, %ecx /* word count */ 250 rep 251 smovl 252 movl ARG_COUNT(%ebp), %ecx 253 andl $3, %ecx /* bytes left over */ 254 rep 255 smovb 256 xorl %eax, %eax 257 258 /* 259 * A fault during do_copy_fault is indicated through an errno value 260 * in %eax and we iret from the trap handler to here. 261 */ 262 _kcopy_copyerr: 263 popl %ecx 264 popl %edi 265 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */ 266 popl %esi 267 popl %ebp 268 ret 269 SET_SIZE(kcopy) 270 271 #undef ARG_FROM 272 #undef ARG_TO 273 #undef ARG_COUNT 274 275 #endif /* __i386 */ 276 #endif /* __lint */ 277 278 #if defined(__lint) 279 280 /* 281 * Copy a block of storage. Similar to kcopy but uses non-temporal 282 * instructions. 283 */ 284 285 /* ARGSUSED */ 286 int 287 kcopy_nta(const void *from, void *to, size_t count, int copy_cached) 288 { return (0); } 289 290 #else /* __lint */ 291 292 #if defined(__amd64) 293 294 #define COPY_LOOP_INIT(src, dst, cnt) \ 295 addq cnt, src; \ 296 addq cnt, dst; \ 297 shrq $3, cnt; \ 298 neg cnt 299 300 /* Copy 16 bytes per loop. Uses %rax and %r8 */ 301 #define COPY_LOOP_BODY(src, dst, cnt) \ 302 prefetchnta 0x100(src, cnt, 8); \ 303 movq (src, cnt, 8), %rax; \ 304 movq 0x8(src, cnt, 8), %r8; \ 305 movnti %rax, (dst, cnt, 8); \ 306 movnti %r8, 0x8(dst, cnt, 8); \ 307 addq $2, cnt 308 309 ENTRY(kcopy_nta) 310 pushq %rbp 311 movq %rsp, %rbp 312 #ifdef DEBUG 313 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 314 jb 0f 315 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 316 jnb 1f 317 0: leaq .kcopy_panic_msg(%rip), %rdi 318 xorl %eax, %eax 319 call panic 320 1: 321 #endif 322 323 movq %gs:CPU_THREAD, %r9 324 cmpq $0, %rcx /* No non-temporal access? */ 325 /* 326 * pass lofault value as 4th argument to do_copy_fault 327 */ 328 leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */ 329 jnz do_copy_fault /* use regular access */ 330 /* 331 * Make sure cnt is >= KCOPY_MIN_SIZE 332 */ 333 cmpq $KCOPY_MIN_SIZE, %rdx 334 jb do_copy_fault 335 336 /* 337 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 338 * count is COUNT_ALIGN_SIZE aligned. 339 */ 340 movq %rdi, %r10 341 orq %rsi, %r10 342 andq $NTA_ALIGN_MASK, %r10 343 orq %rdx, %r10 344 andq $COUNT_ALIGN_MASK, %r10 345 jnz do_copy_fault 346 347 ALTENTRY(do_copy_fault_nta) 348 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 349 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 350 movq %rcx, T_LOFAULT(%r9) /* new lofault */ 351 352 /* 353 * COPY_LOOP_BODY uses %rax and %r8 354 */ 355 COPY_LOOP_INIT(%rdi, %rsi, %rdx) 356 2: COPY_LOOP_BODY(%rdi, %rsi, %rdx) 357 jnz 2b 358 359 mfence 360 xorl %eax, %eax /* return 0 (success) */ 361 SMAP_ENABLE_INSTR(1) 362 363 _kcopy_nta_copyerr: 364 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 365 leave 366 ret 367 SET_SIZE(do_copy_fault_nta) 368 SET_SIZE(kcopy_nta) 369 370 #elif defined(__i386) 371 372 #define ARG_FROM 8 373 #define ARG_TO 12 374 #define ARG_COUNT 16 375 376 #define COPY_LOOP_INIT(src, dst, cnt) \ 377 addl cnt, src; \ 378 addl cnt, dst; \ 379 shrl $3, cnt; \ 380 neg cnt 381 382 #define COPY_LOOP_BODY(src, dst, cnt) \ 383 prefetchnta 0x100(src, cnt, 8); \ 384 movl (src, cnt, 8), %esi; \ 385 movnti %esi, (dst, cnt, 8); \ 386 movl 0x4(src, cnt, 8), %esi; \ 387 movnti %esi, 0x4(dst, cnt, 8); \ 388 movl 0x8(src, cnt, 8), %esi; \ 389 movnti %esi, 0x8(dst, cnt, 8); \ 390 movl 0xc(src, cnt, 8), %esi; \ 391 movnti %esi, 0xc(dst, cnt, 8); \ 392 addl $2, cnt 393 394 /* 395 * kcopy_nta is not implemented for 32-bit as no performance 396 * improvement was shown. We simply jump directly to kcopy 397 * and discard the 4 arguments. 398 */ 399 ENTRY(kcopy_nta) 400 jmp kcopy 401 402 lea _kcopy_nta_copyerr, %eax /* lofault value */ 403 ALTENTRY(do_copy_fault_nta) 404 pushl %ebp 405 movl %esp, %ebp /* setup stack frame */ 406 pushl %esi 407 pushl %edi 408 409 movl %gs:CPU_THREAD, %edx 410 movl T_LOFAULT(%edx), %edi 411 pushl %edi /* save the current lofault */ 412 movl %eax, T_LOFAULT(%edx) /* new lofault */ 413 414 /* COPY_LOOP_BODY needs to use %esi */ 415 movl ARG_COUNT(%ebp), %ecx 416 movl ARG_FROM(%ebp), %edi 417 movl ARG_TO(%ebp), %eax 418 COPY_LOOP_INIT(%edi, %eax, %ecx) 419 1: COPY_LOOP_BODY(%edi, %eax, %ecx) 420 jnz 1b 421 mfence 422 423 xorl %eax, %eax 424 _kcopy_nta_copyerr: 425 popl %ecx 426 popl %edi 427 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */ 428 popl %esi 429 leave 430 ret 431 SET_SIZE(do_copy_fault_nta) 432 SET_SIZE(kcopy_nta) 433 434 #undef ARG_FROM 435 #undef ARG_TO 436 #undef ARG_COUNT 437 438 #endif /* __i386 */ 439 #endif /* __lint */ 440 441 #if defined(__lint) 442 443 /* ARGSUSED */ 444 void 445 bcopy(const void *from, void *to, size_t count) 446 {} 447 448 #else /* __lint */ 449 450 #if defined(__amd64) 451 452 ENTRY(bcopy) 453 #ifdef DEBUG 454 orq %rdx, %rdx /* %rdx = count */ 455 jz 1f 456 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 457 jb 0f 458 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 459 jnb 1f 460 0: leaq .bcopy_panic_msg(%rip), %rdi 461 jmp call_panic /* setup stack and call panic */ 462 1: 463 #endif 464 /* 465 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault. 466 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy 467 * uses these registers in future they must be saved and restored. 468 */ 469 ALTENTRY(bcopy_altentry) 470 do_copy: 471 #define L(s) .bcopy/**/s 472 cmpq $0x50, %rdx /* 80 */ 473 jae bcopy_ck_size 474 475 /* 476 * Performance data shows many caller's copy small buffers. So for 477 * best perf for these sizes unrolled code is used. Store data without 478 * worrying about alignment. 479 */ 480 leaq L(fwdPxQx)(%rip), %r10 481 addq %rdx, %rdi 482 addq %rdx, %rsi 483 movslq (%r10,%rdx,4), %rcx 484 leaq (%rcx,%r10,1), %r10 485 jmpq *%r10 486 487 .p2align 4 488 L(fwdPxQx): 489 .int L(P0Q0)-L(fwdPxQx) /* 0 */ 490 .int L(P1Q0)-L(fwdPxQx) 491 .int L(P2Q0)-L(fwdPxQx) 492 .int L(P3Q0)-L(fwdPxQx) 493 .int L(P4Q0)-L(fwdPxQx) 494 .int L(P5Q0)-L(fwdPxQx) 495 .int L(P6Q0)-L(fwdPxQx) 496 .int L(P7Q0)-L(fwdPxQx) 497 498 .int L(P0Q1)-L(fwdPxQx) /* 8 */ 499 .int L(P1Q1)-L(fwdPxQx) 500 .int L(P2Q1)-L(fwdPxQx) 501 .int L(P3Q1)-L(fwdPxQx) 502 .int L(P4Q1)-L(fwdPxQx) 503 .int L(P5Q1)-L(fwdPxQx) 504 .int L(P6Q1)-L(fwdPxQx) 505 .int L(P7Q1)-L(fwdPxQx) 506 507 .int L(P0Q2)-L(fwdPxQx) /* 16 */ 508 .int L(P1Q2)-L(fwdPxQx) 509 .int L(P2Q2)-L(fwdPxQx) 510 .int L(P3Q2)-L(fwdPxQx) 511 .int L(P4Q2)-L(fwdPxQx) 512 .int L(P5Q2)-L(fwdPxQx) 513 .int L(P6Q2)-L(fwdPxQx) 514 .int L(P7Q2)-L(fwdPxQx) 515 516 .int L(P0Q3)-L(fwdPxQx) /* 24 */ 517 .int L(P1Q3)-L(fwdPxQx) 518 .int L(P2Q3)-L(fwdPxQx) 519 .int L(P3Q3)-L(fwdPxQx) 520 .int L(P4Q3)-L(fwdPxQx) 521 .int L(P5Q3)-L(fwdPxQx) 522 .int L(P6Q3)-L(fwdPxQx) 523 .int L(P7Q3)-L(fwdPxQx) 524 525 .int L(P0Q4)-L(fwdPxQx) /* 32 */ 526 .int L(P1Q4)-L(fwdPxQx) 527 .int L(P2Q4)-L(fwdPxQx) 528 .int L(P3Q4)-L(fwdPxQx) 529 .int L(P4Q4)-L(fwdPxQx) 530 .int L(P5Q4)-L(fwdPxQx) 531 .int L(P6Q4)-L(fwdPxQx) 532 .int L(P7Q4)-L(fwdPxQx) 533 534 .int L(P0Q5)-L(fwdPxQx) /* 40 */ 535 .int L(P1Q5)-L(fwdPxQx) 536 .int L(P2Q5)-L(fwdPxQx) 537 .int L(P3Q5)-L(fwdPxQx) 538 .int L(P4Q5)-L(fwdPxQx) 539 .int L(P5Q5)-L(fwdPxQx) 540 .int L(P6Q5)-L(fwdPxQx) 541 .int L(P7Q5)-L(fwdPxQx) 542 543 .int L(P0Q6)-L(fwdPxQx) /* 48 */ 544 .int L(P1Q6)-L(fwdPxQx) 545 .int L(P2Q6)-L(fwdPxQx) 546 .int L(P3Q6)-L(fwdPxQx) 547 .int L(P4Q6)-L(fwdPxQx) 548 .int L(P5Q6)-L(fwdPxQx) 549 .int L(P6Q6)-L(fwdPxQx) 550 .int L(P7Q6)-L(fwdPxQx) 551 552 .int L(P0Q7)-L(fwdPxQx) /* 56 */ 553 .int L(P1Q7)-L(fwdPxQx) 554 .int L(P2Q7)-L(fwdPxQx) 555 .int L(P3Q7)-L(fwdPxQx) 556 .int L(P4Q7)-L(fwdPxQx) 557 .int L(P5Q7)-L(fwdPxQx) 558 .int L(P6Q7)-L(fwdPxQx) 559 .int L(P7Q7)-L(fwdPxQx) 560 561 .int L(P0Q8)-L(fwdPxQx) /* 64 */ 562 .int L(P1Q8)-L(fwdPxQx) 563 .int L(P2Q8)-L(fwdPxQx) 564 .int L(P3Q8)-L(fwdPxQx) 565 .int L(P4Q8)-L(fwdPxQx) 566 .int L(P5Q8)-L(fwdPxQx) 567 .int L(P6Q8)-L(fwdPxQx) 568 .int L(P7Q8)-L(fwdPxQx) 569 570 .int L(P0Q9)-L(fwdPxQx) /* 72 */ 571 .int L(P1Q9)-L(fwdPxQx) 572 .int L(P2Q9)-L(fwdPxQx) 573 .int L(P3Q9)-L(fwdPxQx) 574 .int L(P4Q9)-L(fwdPxQx) 575 .int L(P5Q9)-L(fwdPxQx) 576 .int L(P6Q9)-L(fwdPxQx) 577 .int L(P7Q9)-L(fwdPxQx) /* 79 */ 578 579 .p2align 4 580 L(P0Q9): 581 mov -0x48(%rdi), %rcx 582 mov %rcx, -0x48(%rsi) 583 L(P0Q8): 584 mov -0x40(%rdi), %r10 585 mov %r10, -0x40(%rsi) 586 L(P0Q7): 587 mov -0x38(%rdi), %r8 588 mov %r8, -0x38(%rsi) 589 L(P0Q6): 590 mov -0x30(%rdi), %rcx 591 mov %rcx, -0x30(%rsi) 592 L(P0Q5): 593 mov -0x28(%rdi), %r10 594 mov %r10, -0x28(%rsi) 595 L(P0Q4): 596 mov -0x20(%rdi), %r8 597 mov %r8, -0x20(%rsi) 598 L(P0Q3): 599 mov -0x18(%rdi), %rcx 600 mov %rcx, -0x18(%rsi) 601 L(P0Q2): 602 mov -0x10(%rdi), %r10 603 mov %r10, -0x10(%rsi) 604 L(P0Q1): 605 mov -0x8(%rdi), %r8 606 mov %r8, -0x8(%rsi) 607 L(P0Q0): 608 ret 609 610 .p2align 4 611 L(P1Q9): 612 mov -0x49(%rdi), %r8 613 mov %r8, -0x49(%rsi) 614 L(P1Q8): 615 mov -0x41(%rdi), %rcx 616 mov %rcx, -0x41(%rsi) 617 L(P1Q7): 618 mov -0x39(%rdi), %r10 619 mov %r10, -0x39(%rsi) 620 L(P1Q6): 621 mov -0x31(%rdi), %r8 622 mov %r8, -0x31(%rsi) 623 L(P1Q5): 624 mov -0x29(%rdi), %rcx 625 mov %rcx, -0x29(%rsi) 626 L(P1Q4): 627 mov -0x21(%rdi), %r10 628 mov %r10, -0x21(%rsi) 629 L(P1Q3): 630 mov -0x19(%rdi), %r8 631 mov %r8, -0x19(%rsi) 632 L(P1Q2): 633 mov -0x11(%rdi), %rcx 634 mov %rcx, -0x11(%rsi) 635 L(P1Q1): 636 mov -0x9(%rdi), %r10 637 mov %r10, -0x9(%rsi) 638 L(P1Q0): 639 movzbq -0x1(%rdi), %r8 640 mov %r8b, -0x1(%rsi) 641 ret 642 643 .p2align 4 644 L(P2Q9): 645 mov -0x4a(%rdi), %r8 646 mov %r8, -0x4a(%rsi) 647 L(P2Q8): 648 mov -0x42(%rdi), %rcx 649 mov %rcx, -0x42(%rsi) 650 L(P2Q7): 651 mov -0x3a(%rdi), %r10 652 mov %r10, -0x3a(%rsi) 653 L(P2Q6): 654 mov -0x32(%rdi), %r8 655 mov %r8, -0x32(%rsi) 656 L(P2Q5): 657 mov -0x2a(%rdi), %rcx 658 mov %rcx, -0x2a(%rsi) 659 L(P2Q4): 660 mov -0x22(%rdi), %r10 661 mov %r10, -0x22(%rsi) 662 L(P2Q3): 663 mov -0x1a(%rdi), %r8 664 mov %r8, -0x1a(%rsi) 665 L(P2Q2): 666 mov -0x12(%rdi), %rcx 667 mov %rcx, -0x12(%rsi) 668 L(P2Q1): 669 mov -0xa(%rdi), %r10 670 mov %r10, -0xa(%rsi) 671 L(P2Q0): 672 movzwq -0x2(%rdi), %r8 673 mov %r8w, -0x2(%rsi) 674 ret 675 676 .p2align 4 677 L(P3Q9): 678 mov -0x4b(%rdi), %r8 679 mov %r8, -0x4b(%rsi) 680 L(P3Q8): 681 mov -0x43(%rdi), %rcx 682 mov %rcx, -0x43(%rsi) 683 L(P3Q7): 684 mov -0x3b(%rdi), %r10 685 mov %r10, -0x3b(%rsi) 686 L(P3Q6): 687 mov -0x33(%rdi), %r8 688 mov %r8, -0x33(%rsi) 689 L(P3Q5): 690 mov -0x2b(%rdi), %rcx 691 mov %rcx, -0x2b(%rsi) 692 L(P3Q4): 693 mov -0x23(%rdi), %r10 694 mov %r10, -0x23(%rsi) 695 L(P3Q3): 696 mov -0x1b(%rdi), %r8 697 mov %r8, -0x1b(%rsi) 698 L(P3Q2): 699 mov -0x13(%rdi), %rcx 700 mov %rcx, -0x13(%rsi) 701 L(P3Q1): 702 mov -0xb(%rdi), %r10 703 mov %r10, -0xb(%rsi) 704 /* 705 * These trailing loads/stores have to do all their loads 1st, 706 * then do the stores. 707 */ 708 L(P3Q0): 709 movzwq -0x3(%rdi), %r8 710 movzbq -0x1(%rdi), %r10 711 mov %r8w, -0x3(%rsi) 712 mov %r10b, -0x1(%rsi) 713 ret 714 715 .p2align 4 716 L(P4Q9): 717 mov -0x4c(%rdi), %r8 718 mov %r8, -0x4c(%rsi) 719 L(P4Q8): 720 mov -0x44(%rdi), %rcx 721 mov %rcx, -0x44(%rsi) 722 L(P4Q7): 723 mov -0x3c(%rdi), %r10 724 mov %r10, -0x3c(%rsi) 725 L(P4Q6): 726 mov -0x34(%rdi), %r8 727 mov %r8, -0x34(%rsi) 728 L(P4Q5): 729 mov -0x2c(%rdi), %rcx 730 mov %rcx, -0x2c(%rsi) 731 L(P4Q4): 732 mov -0x24(%rdi), %r10 733 mov %r10, -0x24(%rsi) 734 L(P4Q3): 735 mov -0x1c(%rdi), %r8 736 mov %r8, -0x1c(%rsi) 737 L(P4Q2): 738 mov -0x14(%rdi), %rcx 739 mov %rcx, -0x14(%rsi) 740 L(P4Q1): 741 mov -0xc(%rdi), %r10 742 mov %r10, -0xc(%rsi) 743 L(P4Q0): 744 mov -0x4(%rdi), %r8d 745 mov %r8d, -0x4(%rsi) 746 ret 747 748 .p2align 4 749 L(P5Q9): 750 mov -0x4d(%rdi), %r8 751 mov %r8, -0x4d(%rsi) 752 L(P5Q8): 753 mov -0x45(%rdi), %rcx 754 mov %rcx, -0x45(%rsi) 755 L(P5Q7): 756 mov -0x3d(%rdi), %r10 757 mov %r10, -0x3d(%rsi) 758 L(P5Q6): 759 mov -0x35(%rdi), %r8 760 mov %r8, -0x35(%rsi) 761 L(P5Q5): 762 mov -0x2d(%rdi), %rcx 763 mov %rcx, -0x2d(%rsi) 764 L(P5Q4): 765 mov -0x25(%rdi), %r10 766 mov %r10, -0x25(%rsi) 767 L(P5Q3): 768 mov -0x1d(%rdi), %r8 769 mov %r8, -0x1d(%rsi) 770 L(P5Q2): 771 mov -0x15(%rdi), %rcx 772 mov %rcx, -0x15(%rsi) 773 L(P5Q1): 774 mov -0xd(%rdi), %r10 775 mov %r10, -0xd(%rsi) 776 L(P5Q0): 777 mov -0x5(%rdi), %r8d 778 movzbq -0x1(%rdi), %r10 779 mov %r8d, -0x5(%rsi) 780 mov %r10b, -0x1(%rsi) 781 ret 782 783 .p2align 4 784 L(P6Q9): 785 mov -0x4e(%rdi), %r8 786 mov %r8, -0x4e(%rsi) 787 L(P6Q8): 788 mov -0x46(%rdi), %rcx 789 mov %rcx, -0x46(%rsi) 790 L(P6Q7): 791 mov -0x3e(%rdi), %r10 792 mov %r10, -0x3e(%rsi) 793 L(P6Q6): 794 mov -0x36(%rdi), %r8 795 mov %r8, -0x36(%rsi) 796 L(P6Q5): 797 mov -0x2e(%rdi), %rcx 798 mov %rcx, -0x2e(%rsi) 799 L(P6Q4): 800 mov -0x26(%rdi), %r10 801 mov %r10, -0x26(%rsi) 802 L(P6Q3): 803 mov -0x1e(%rdi), %r8 804 mov %r8, -0x1e(%rsi) 805 L(P6Q2): 806 mov -0x16(%rdi), %rcx 807 mov %rcx, -0x16(%rsi) 808 L(P6Q1): 809 mov -0xe(%rdi), %r10 810 mov %r10, -0xe(%rsi) 811 L(P6Q0): 812 mov -0x6(%rdi), %r8d 813 movzwq -0x2(%rdi), %r10 814 mov %r8d, -0x6(%rsi) 815 mov %r10w, -0x2(%rsi) 816 ret 817 818 .p2align 4 819 L(P7Q9): 820 mov -0x4f(%rdi), %r8 821 mov %r8, -0x4f(%rsi) 822 L(P7Q8): 823 mov -0x47(%rdi), %rcx 824 mov %rcx, -0x47(%rsi) 825 L(P7Q7): 826 mov -0x3f(%rdi), %r10 827 mov %r10, -0x3f(%rsi) 828 L(P7Q6): 829 mov -0x37(%rdi), %r8 830 mov %r8, -0x37(%rsi) 831 L(P7Q5): 832 mov -0x2f(%rdi), %rcx 833 mov %rcx, -0x2f(%rsi) 834 L(P7Q4): 835 mov -0x27(%rdi), %r10 836 mov %r10, -0x27(%rsi) 837 L(P7Q3): 838 mov -0x1f(%rdi), %r8 839 mov %r8, -0x1f(%rsi) 840 L(P7Q2): 841 mov -0x17(%rdi), %rcx 842 mov %rcx, -0x17(%rsi) 843 L(P7Q1): 844 mov -0xf(%rdi), %r10 845 mov %r10, -0xf(%rsi) 846 L(P7Q0): 847 mov -0x7(%rdi), %r8d 848 movzwq -0x3(%rdi), %r10 849 movzbq -0x1(%rdi), %rcx 850 mov %r8d, -0x7(%rsi) 851 mov %r10w, -0x3(%rsi) 852 mov %cl, -0x1(%rsi) 853 ret 854 855 /* 856 * For large sizes rep smovq is fastest. 857 * Transition point determined experimentally as measured on 858 * Intel Xeon processors (incl. Nehalem and previous generations) and 859 * AMD Opteron. The transition value is patched at boot time to avoid 860 * memory reference hit. 861 */ 862 .globl bcopy_patch_start 863 bcopy_patch_start: 864 cmpq $BCOPY_NHM_REP, %rdx 865 .globl bcopy_patch_end 866 bcopy_patch_end: 867 868 .p2align 4 869 .globl bcopy_ck_size 870 bcopy_ck_size: 871 cmpq $BCOPY_DFLT_REP, %rdx 872 jae L(use_rep) 873 874 /* 875 * Align to a 8-byte boundary. Avoids penalties from unaligned stores 876 * as well as from stores spanning cachelines. 877 */ 878 test $0x7, %rsi 879 jz L(aligned_loop) 880 test $0x1, %rsi 881 jz 2f 882 movzbq (%rdi), %r8 883 dec %rdx 884 inc %rdi 885 mov %r8b, (%rsi) 886 inc %rsi 887 2: 888 test $0x2, %rsi 889 jz 4f 890 movzwq (%rdi), %r8 891 sub $0x2, %rdx 892 add $0x2, %rdi 893 mov %r8w, (%rsi) 894 add $0x2, %rsi 895 4: 896 test $0x4, %rsi 897 jz L(aligned_loop) 898 mov (%rdi), %r8d 899 sub $0x4, %rdx 900 add $0x4, %rdi 901 mov %r8d, (%rsi) 902 add $0x4, %rsi 903 904 /* 905 * Copy 64-bytes per loop 906 */ 907 .p2align 4 908 L(aligned_loop): 909 mov (%rdi), %r8 910 mov 0x8(%rdi), %r10 911 lea -0x40(%rdx), %rdx 912 mov %r8, (%rsi) 913 mov %r10, 0x8(%rsi) 914 mov 0x10(%rdi), %rcx 915 mov 0x18(%rdi), %r8 916 mov %rcx, 0x10(%rsi) 917 mov %r8, 0x18(%rsi) 918 919 cmp $0x40, %rdx 920 mov 0x20(%rdi), %r10 921 mov 0x28(%rdi), %rcx 922 mov %r10, 0x20(%rsi) 923 mov %rcx, 0x28(%rsi) 924 mov 0x30(%rdi), %r8 925 mov 0x38(%rdi), %r10 926 lea 0x40(%rdi), %rdi 927 mov %r8, 0x30(%rsi) 928 mov %r10, 0x38(%rsi) 929 lea 0x40(%rsi), %rsi 930 jae L(aligned_loop) 931 932 /* 933 * Copy remaining bytes (0-63) 934 */ 935 L(do_remainder): 936 leaq L(fwdPxQx)(%rip), %r10 937 addq %rdx, %rdi 938 addq %rdx, %rsi 939 movslq (%r10,%rdx,4), %rcx 940 leaq (%rcx,%r10,1), %r10 941 jmpq *%r10 942 943 /* 944 * Use rep smovq. Clear remainder via unrolled code 945 */ 946 .p2align 4 947 L(use_rep): 948 xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */ 949 movq %rdx, %rcx /* %rcx = count */ 950 shrq $3, %rcx /* 8-byte word count */ 951 rep 952 smovq 953 954 xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */ 955 andq $7, %rdx /* remainder */ 956 jnz L(do_remainder) 957 ret 958 #undef L 959 960 #ifdef DEBUG 961 /* 962 * Setup frame on the run-time stack. The end of the input argument 963 * area must be aligned on a 16 byte boundary. The stack pointer %rsp, 964 * always points to the end of the latest allocated stack frame. 965 * panic(const char *format, ...) is a varargs function. When a 966 * function taking variable arguments is called, %rax must be set 967 * to eight times the number of floating point parameters passed 968 * to the function in SSE registers. 969 */ 970 call_panic: 971 pushq %rbp /* align stack properly */ 972 movq %rsp, %rbp 973 xorl %eax, %eax /* no variable arguments */ 974 call panic /* %rdi = format string */ 975 #endif 976 SET_SIZE(bcopy_altentry) 977 SET_SIZE(bcopy) 978 979 #elif defined(__i386) 980 981 #define ARG_FROM 4 982 #define ARG_TO 8 983 #define ARG_COUNT 12 984 985 ENTRY(bcopy) 986 #ifdef DEBUG 987 movl ARG_COUNT(%esp), %eax 988 orl %eax, %eax 989 jz 1f 990 movl postbootkernelbase, %eax 991 cmpl %eax, ARG_FROM(%esp) 992 jb 0f 993 cmpl %eax, ARG_TO(%esp) 994 jnb 1f 995 0: pushl %ebp 996 movl %esp, %ebp 997 pushl $.bcopy_panic_msg 998 call panic 999 1: 1000 #endif 1001 do_copy: 1002 movl %esi, %eax /* save registers */ 1003 movl %edi, %edx 1004 movl ARG_COUNT(%esp), %ecx 1005 movl ARG_FROM(%esp), %esi 1006 movl ARG_TO(%esp), %edi 1007 1008 shrl $2, %ecx /* word count */ 1009 rep 1010 smovl 1011 movl ARG_COUNT(%esp), %ecx 1012 andl $3, %ecx /* bytes left over */ 1013 rep 1014 smovb 1015 movl %eax, %esi /* restore registers */ 1016 movl %edx, %edi 1017 ret 1018 SET_SIZE(bcopy) 1019 1020 #undef ARG_COUNT 1021 #undef ARG_FROM 1022 #undef ARG_TO 1023 1024 #endif /* __i386 */ 1025 #endif /* __lint */ 1026 1027 1028 /* 1029 * Zero a block of storage, returning an error code if we 1030 * take a kernel pagefault which cannot be resolved. 1031 * Returns errno value on pagefault error, 0 if all ok 1032 */ 1033 1034 #if defined(__lint) 1035 1036 /* ARGSUSED */ 1037 int 1038 kzero(void *addr, size_t count) 1039 { return (0); } 1040 1041 #else /* __lint */ 1042 1043 #if defined(__amd64) 1044 1045 ENTRY(kzero) 1046 #ifdef DEBUG 1047 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ 1048 jnb 0f 1049 leaq .kzero_panic_msg(%rip), %rdi 1050 jmp call_panic /* setup stack and call panic */ 1051 0: 1052 #endif 1053 /* 1054 * pass lofault value as 3rd argument for fault return 1055 */ 1056 leaq _kzeroerr(%rip), %rdx 1057 1058 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 1059 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 1060 movq %rdx, T_LOFAULT(%r9) /* new lofault */ 1061 call bzero_altentry 1062 xorl %eax, %eax 1063 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 1064 ret 1065 /* 1066 * A fault during bzero is indicated through an errno value 1067 * in %rax when we iretq to here. 1068 */ 1069 _kzeroerr: 1070 addq $8, %rsp /* pop bzero_altentry call ret addr */ 1071 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 1072 ret 1073 SET_SIZE(kzero) 1074 1075 #elif defined(__i386) 1076 1077 #define ARG_ADDR 8 1078 #define ARG_COUNT 12 1079 1080 ENTRY(kzero) 1081 #ifdef DEBUG 1082 pushl %ebp 1083 movl %esp, %ebp 1084 movl postbootkernelbase, %eax 1085 cmpl %eax, ARG_ADDR(%ebp) 1086 jnb 0f 1087 pushl $.kzero_panic_msg 1088 call panic 1089 0: popl %ebp 1090 #endif 1091 lea _kzeroerr, %eax /* kzeroerr is lofault value */ 1092 1093 pushl %ebp /* save stack base */ 1094 movl %esp, %ebp /* set new stack base */ 1095 pushl %edi /* save %edi */ 1096 1097 mov %gs:CPU_THREAD, %edx 1098 movl T_LOFAULT(%edx), %edi 1099 pushl %edi /* save the current lofault */ 1100 movl %eax, T_LOFAULT(%edx) /* new lofault */ 1101 1102 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */ 1103 movl ARG_ADDR(%ebp), %edi /* %edi <- address of bytes to clear */ 1104 shrl $2, %ecx /* Count of double words to zero */ 1105 xorl %eax, %eax /* sstol val */ 1106 rep 1107 sstol /* %ecx contains words to clear (%eax=0) */ 1108 1109 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */ 1110 andl $3, %ecx /* do mod 4 */ 1111 rep 1112 sstob /* %ecx contains residual bytes to clear */ 1113 1114 /* 1115 * A fault during kzero is indicated through an errno value 1116 * in %eax when we iret to here. 1117 */ 1118 _kzeroerr: 1119 popl %edi 1120 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */ 1121 popl %edi 1122 popl %ebp 1123 ret 1124 SET_SIZE(kzero) 1125 1126 #undef ARG_ADDR 1127 #undef ARG_COUNT 1128 1129 #endif /* __i386 */ 1130 #endif /* __lint */ 1131 1132 /* 1133 * Zero a block of storage. 1134 */ 1135 1136 #if defined(__lint) 1137 1138 /* ARGSUSED */ 1139 void 1140 bzero(void *addr, size_t count) 1141 {} 1142 1143 #else /* __lint */ 1144 1145 #if defined(__amd64) 1146 1147 ENTRY(bzero) 1148 #ifdef DEBUG 1149 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ 1150 jnb 0f 1151 leaq .bzero_panic_msg(%rip), %rdi 1152 jmp call_panic /* setup stack and call panic */ 1153 0: 1154 #endif 1155 ALTENTRY(bzero_altentry) 1156 do_zero: 1157 #define L(s) .bzero/**/s 1158 xorl %eax, %eax 1159 1160 cmpq $0x50, %rsi /* 80 */ 1161 jae L(ck_align) 1162 1163 /* 1164 * Performance data shows many caller's are zeroing small buffers. So 1165 * for best perf for these sizes unrolled code is used. Store zeros 1166 * without worrying about alignment. 1167 */ 1168 leaq L(setPxQx)(%rip), %r10 1169 addq %rsi, %rdi 1170 movslq (%r10,%rsi,4), %rcx 1171 leaq (%rcx,%r10,1), %r10 1172 jmpq *%r10 1173 1174 .p2align 4 1175 L(setPxQx): 1176 .int L(P0Q0)-L(setPxQx) /* 0 */ 1177 .int L(P1Q0)-L(setPxQx) 1178 .int L(P2Q0)-L(setPxQx) 1179 .int L(P3Q0)-L(setPxQx) 1180 .int L(P4Q0)-L(setPxQx) 1181 .int L(P5Q0)-L(setPxQx) 1182 .int L(P6Q0)-L(setPxQx) 1183 .int L(P7Q0)-L(setPxQx) 1184 1185 .int L(P0Q1)-L(setPxQx) /* 8 */ 1186 .int L(P1Q1)-L(setPxQx) 1187 .int L(P2Q1)-L(setPxQx) 1188 .int L(P3Q1)-L(setPxQx) 1189 .int L(P4Q1)-L(setPxQx) 1190 .int L(P5Q1)-L(setPxQx) 1191 .int L(P6Q1)-L(setPxQx) 1192 .int L(P7Q1)-L(setPxQx) 1193 1194 .int L(P0Q2)-L(setPxQx) /* 16 */ 1195 .int L(P1Q2)-L(setPxQx) 1196 .int L(P2Q2)-L(setPxQx) 1197 .int L(P3Q2)-L(setPxQx) 1198 .int L(P4Q2)-L(setPxQx) 1199 .int L(P5Q2)-L(setPxQx) 1200 .int L(P6Q2)-L(setPxQx) 1201 .int L(P7Q2)-L(setPxQx) 1202 1203 .int L(P0Q3)-L(setPxQx) /* 24 */ 1204 .int L(P1Q3)-L(setPxQx) 1205 .int L(P2Q3)-L(setPxQx) 1206 .int L(P3Q3)-L(setPxQx) 1207 .int L(P4Q3)-L(setPxQx) 1208 .int L(P5Q3)-L(setPxQx) 1209 .int L(P6Q3)-L(setPxQx) 1210 .int L(P7Q3)-L(setPxQx) 1211 1212 .int L(P0Q4)-L(setPxQx) /* 32 */ 1213 .int L(P1Q4)-L(setPxQx) 1214 .int L(P2Q4)-L(setPxQx) 1215 .int L(P3Q4)-L(setPxQx) 1216 .int L(P4Q4)-L(setPxQx) 1217 .int L(P5Q4)-L(setPxQx) 1218 .int L(P6Q4)-L(setPxQx) 1219 .int L(P7Q4)-L(setPxQx) 1220 1221 .int L(P0Q5)-L(setPxQx) /* 40 */ 1222 .int L(P1Q5)-L(setPxQx) 1223 .int L(P2Q5)-L(setPxQx) 1224 .int L(P3Q5)-L(setPxQx) 1225 .int L(P4Q5)-L(setPxQx) 1226 .int L(P5Q5)-L(setPxQx) 1227 .int L(P6Q5)-L(setPxQx) 1228 .int L(P7Q5)-L(setPxQx) 1229 1230 .int L(P0Q6)-L(setPxQx) /* 48 */ 1231 .int L(P1Q6)-L(setPxQx) 1232 .int L(P2Q6)-L(setPxQx) 1233 .int L(P3Q6)-L(setPxQx) 1234 .int L(P4Q6)-L(setPxQx) 1235 .int L(P5Q6)-L(setPxQx) 1236 .int L(P6Q6)-L(setPxQx) 1237 .int L(P7Q6)-L(setPxQx) 1238 1239 .int L(P0Q7)-L(setPxQx) /* 56 */ 1240 .int L(P1Q7)-L(setPxQx) 1241 .int L(P2Q7)-L(setPxQx) 1242 .int L(P3Q7)-L(setPxQx) 1243 .int L(P4Q7)-L(setPxQx) 1244 .int L(P5Q7)-L(setPxQx) 1245 .int L(P6Q7)-L(setPxQx) 1246 .int L(P7Q7)-L(setPxQx) 1247 1248 .int L(P0Q8)-L(setPxQx) /* 64 */ 1249 .int L(P1Q8)-L(setPxQx) 1250 .int L(P2Q8)-L(setPxQx) 1251 .int L(P3Q8)-L(setPxQx) 1252 .int L(P4Q8)-L(setPxQx) 1253 .int L(P5Q8)-L(setPxQx) 1254 .int L(P6Q8)-L(setPxQx) 1255 .int L(P7Q8)-L(setPxQx) 1256 1257 .int L(P0Q9)-L(setPxQx) /* 72 */ 1258 .int L(P1Q9)-L(setPxQx) 1259 .int L(P2Q9)-L(setPxQx) 1260 .int L(P3Q9)-L(setPxQx) 1261 .int L(P4Q9)-L(setPxQx) 1262 .int L(P5Q9)-L(setPxQx) 1263 .int L(P6Q9)-L(setPxQx) 1264 .int L(P7Q9)-L(setPxQx) /* 79 */ 1265 1266 .p2align 4 1267 L(P0Q9): mov %rax, -0x48(%rdi) 1268 L(P0Q8): mov %rax, -0x40(%rdi) 1269 L(P0Q7): mov %rax, -0x38(%rdi) 1270 L(P0Q6): mov %rax, -0x30(%rdi) 1271 L(P0Q5): mov %rax, -0x28(%rdi) 1272 L(P0Q4): mov %rax, -0x20(%rdi) 1273 L(P0Q3): mov %rax, -0x18(%rdi) 1274 L(P0Q2): mov %rax, -0x10(%rdi) 1275 L(P0Q1): mov %rax, -0x8(%rdi) 1276 L(P0Q0): 1277 ret 1278 1279 .p2align 4 1280 L(P1Q9): mov %rax, -0x49(%rdi) 1281 L(P1Q8): mov %rax, -0x41(%rdi) 1282 L(P1Q7): mov %rax, -0x39(%rdi) 1283 L(P1Q6): mov %rax, -0x31(%rdi) 1284 L(P1Q5): mov %rax, -0x29(%rdi) 1285 L(P1Q4): mov %rax, -0x21(%rdi) 1286 L(P1Q3): mov %rax, -0x19(%rdi) 1287 L(P1Q2): mov %rax, -0x11(%rdi) 1288 L(P1Q1): mov %rax, -0x9(%rdi) 1289 L(P1Q0): mov %al, -0x1(%rdi) 1290 ret 1291 1292 .p2align 4 1293 L(P2Q9): mov %rax, -0x4a(%rdi) 1294 L(P2Q8): mov %rax, -0x42(%rdi) 1295 L(P2Q7): mov %rax, -0x3a(%rdi) 1296 L(P2Q6): mov %rax, -0x32(%rdi) 1297 L(P2Q5): mov %rax, -0x2a(%rdi) 1298 L(P2Q4): mov %rax, -0x22(%rdi) 1299 L(P2Q3): mov %rax, -0x1a(%rdi) 1300 L(P2Q2): mov %rax, -0x12(%rdi) 1301 L(P2Q1): mov %rax, -0xa(%rdi) 1302 L(P2Q0): mov %ax, -0x2(%rdi) 1303 ret 1304 1305 .p2align 4 1306 L(P3Q9): mov %rax, -0x4b(%rdi) 1307 L(P3Q8): mov %rax, -0x43(%rdi) 1308 L(P3Q7): mov %rax, -0x3b(%rdi) 1309 L(P3Q6): mov %rax, -0x33(%rdi) 1310 L(P3Q5): mov %rax, -0x2b(%rdi) 1311 L(P3Q4): mov %rax, -0x23(%rdi) 1312 L(P3Q3): mov %rax, -0x1b(%rdi) 1313 L(P3Q2): mov %rax, -0x13(%rdi) 1314 L(P3Q1): mov %rax, -0xb(%rdi) 1315 L(P3Q0): mov %ax, -0x3(%rdi) 1316 mov %al, -0x1(%rdi) 1317 ret 1318 1319 .p2align 4 1320 L(P4Q9): mov %rax, -0x4c(%rdi) 1321 L(P4Q8): mov %rax, -0x44(%rdi) 1322 L(P4Q7): mov %rax, -0x3c(%rdi) 1323 L(P4Q6): mov %rax, -0x34(%rdi) 1324 L(P4Q5): mov %rax, -0x2c(%rdi) 1325 L(P4Q4): mov %rax, -0x24(%rdi) 1326 L(P4Q3): mov %rax, -0x1c(%rdi) 1327 L(P4Q2): mov %rax, -0x14(%rdi) 1328 L(P4Q1): mov %rax, -0xc(%rdi) 1329 L(P4Q0): mov %eax, -0x4(%rdi) 1330 ret 1331 1332 .p2align 4 1333 L(P5Q9): mov %rax, -0x4d(%rdi) 1334 L(P5Q8): mov %rax, -0x45(%rdi) 1335 L(P5Q7): mov %rax, -0x3d(%rdi) 1336 L(P5Q6): mov %rax, -0x35(%rdi) 1337 L(P5Q5): mov %rax, -0x2d(%rdi) 1338 L(P5Q4): mov %rax, -0x25(%rdi) 1339 L(P5Q3): mov %rax, -0x1d(%rdi) 1340 L(P5Q2): mov %rax, -0x15(%rdi) 1341 L(P5Q1): mov %rax, -0xd(%rdi) 1342 L(P5Q0): mov %eax, -0x5(%rdi) 1343 mov %al, -0x1(%rdi) 1344 ret 1345 1346 .p2align 4 1347 L(P6Q9): mov %rax, -0x4e(%rdi) 1348 L(P6Q8): mov %rax, -0x46(%rdi) 1349 L(P6Q7): mov %rax, -0x3e(%rdi) 1350 L(P6Q6): mov %rax, -0x36(%rdi) 1351 L(P6Q5): mov %rax, -0x2e(%rdi) 1352 L(P6Q4): mov %rax, -0x26(%rdi) 1353 L(P6Q3): mov %rax, -0x1e(%rdi) 1354 L(P6Q2): mov %rax, -0x16(%rdi) 1355 L(P6Q1): mov %rax, -0xe(%rdi) 1356 L(P6Q0): mov %eax, -0x6(%rdi) 1357 mov %ax, -0x2(%rdi) 1358 ret 1359 1360 .p2align 4 1361 L(P7Q9): mov %rax, -0x4f(%rdi) 1362 L(P7Q8): mov %rax, -0x47(%rdi) 1363 L(P7Q7): mov %rax, -0x3f(%rdi) 1364 L(P7Q6): mov %rax, -0x37(%rdi) 1365 L(P7Q5): mov %rax, -0x2f(%rdi) 1366 L(P7Q4): mov %rax, -0x27(%rdi) 1367 L(P7Q3): mov %rax, -0x1f(%rdi) 1368 L(P7Q2): mov %rax, -0x17(%rdi) 1369 L(P7Q1): mov %rax, -0xf(%rdi) 1370 L(P7Q0): mov %eax, -0x7(%rdi) 1371 mov %ax, -0x3(%rdi) 1372 mov %al, -0x1(%rdi) 1373 ret 1374 1375 /* 1376 * Align to a 16-byte boundary. Avoids penalties from unaligned stores 1377 * as well as from stores spanning cachelines. Note 16-byte alignment 1378 * is better in case where rep sstosq is used. 1379 */ 1380 .p2align 4 1381 L(ck_align): 1382 test $0xf, %rdi 1383 jz L(aligned_now) 1384 test $1, %rdi 1385 jz 2f 1386 mov %al, (%rdi) 1387 dec %rsi 1388 lea 1(%rdi),%rdi 1389 2: 1390 test $2, %rdi 1391 jz 4f 1392 mov %ax, (%rdi) 1393 sub $2, %rsi 1394 lea 2(%rdi),%rdi 1395 4: 1396 test $4, %rdi 1397 jz 8f 1398 mov %eax, (%rdi) 1399 sub $4, %rsi 1400 lea 4(%rdi),%rdi 1401 8: 1402 test $8, %rdi 1403 jz L(aligned_now) 1404 mov %rax, (%rdi) 1405 sub $8, %rsi 1406 lea 8(%rdi),%rdi 1407 1408 /* 1409 * For large sizes rep sstoq is fastest. 1410 * Transition point determined experimentally as measured on 1411 * Intel Xeon processors (incl. Nehalem) and AMD Opteron. 1412 */ 1413 L(aligned_now): 1414 cmp $BZERO_USE_REP, %rsi 1415 ja L(use_rep) 1416 1417 /* 1418 * zero 64-bytes per loop 1419 */ 1420 .p2align 4 1421 L(bzero_loop): 1422 leaq -0x40(%rsi), %rsi 1423 cmpq $0x40, %rsi 1424 movq %rax, (%rdi) 1425 movq %rax, 0x8(%rdi) 1426 movq %rax, 0x10(%rdi) 1427 movq %rax, 0x18(%rdi) 1428 movq %rax, 0x20(%rdi) 1429 movq %rax, 0x28(%rdi) 1430 movq %rax, 0x30(%rdi) 1431 movq %rax, 0x38(%rdi) 1432 leaq 0x40(%rdi), %rdi 1433 jae L(bzero_loop) 1434 1435 /* 1436 * Clear any remaining bytes.. 1437 */ 1438 9: 1439 leaq L(setPxQx)(%rip), %r10 1440 addq %rsi, %rdi 1441 movslq (%r10,%rsi,4), %rcx 1442 leaq (%rcx,%r10,1), %r10 1443 jmpq *%r10 1444 1445 /* 1446 * Use rep sstoq. Clear any remainder via unrolled code 1447 */ 1448 .p2align 4 1449 L(use_rep): 1450 movq %rsi, %rcx /* get size in bytes */ 1451 shrq $3, %rcx /* count of 8-byte words to zero */ 1452 rep 1453 sstoq /* %rcx = words to clear (%rax=0) */ 1454 andq $7, %rsi /* remaining bytes */ 1455 jnz 9b 1456 ret 1457 #undef L 1458 SET_SIZE(bzero_altentry) 1459 SET_SIZE(bzero) 1460 1461 #elif defined(__i386) 1462 1463 #define ARG_ADDR 4 1464 #define ARG_COUNT 8 1465 1466 ENTRY(bzero) 1467 #ifdef DEBUG 1468 movl postbootkernelbase, %eax 1469 cmpl %eax, ARG_ADDR(%esp) 1470 jnb 0f 1471 pushl %ebp 1472 movl %esp, %ebp 1473 pushl $.bzero_panic_msg 1474 call panic 1475 0: 1476 #endif 1477 do_zero: 1478 movl %edi, %edx 1479 movl ARG_COUNT(%esp), %ecx 1480 movl ARG_ADDR(%esp), %edi 1481 shrl $2, %ecx 1482 xorl %eax, %eax 1483 rep 1484 sstol 1485 movl ARG_COUNT(%esp), %ecx 1486 andl $3, %ecx 1487 rep 1488 sstob 1489 movl %edx, %edi 1490 ret 1491 SET_SIZE(bzero) 1492 1493 #undef ARG_ADDR 1494 #undef ARG_COUNT 1495 1496 #endif /* __i386 */ 1497 #endif /* __lint */ 1498 1499 /* 1500 * Transfer data to and from user space - 1501 * Note that these routines can cause faults 1502 * It is assumed that the kernel has nothing at 1503 * less than KERNELBASE in the virtual address space. 1504 * 1505 * Note that copyin(9F) and copyout(9F) are part of the 1506 * DDI/DKI which specifies that they return '-1' on "errors." 1507 * 1508 * Sigh. 1509 * 1510 * So there's two extremely similar routines - xcopyin_nta() and 1511 * xcopyout_nta() which return the errno that we've faithfully computed. 1512 * This allows other callers (e.g. uiomove(9F)) to work correctly. 1513 * Given that these are used pretty heavily, we expand the calling 1514 * sequences inline for all flavours (rather than making wrappers). 1515 */ 1516 1517 /* 1518 * Copy user data to kernel space. 1519 */ 1520 1521 #if defined(__lint) 1522 1523 /* ARGSUSED */ 1524 int 1525 copyin(const void *uaddr, void *kaddr, size_t count) 1526 { return (0); } 1527 1528 #else /* lint */ 1529 1530 #if defined(__amd64) 1531 1532 ENTRY(copyin) 1533 pushq %rbp 1534 movq %rsp, %rbp 1535 subq $24, %rsp 1536 1537 /* 1538 * save args in case we trap and need to rerun as a copyop 1539 */ 1540 movq %rdi, (%rsp) 1541 movq %rsi, 0x8(%rsp) 1542 movq %rdx, 0x10(%rsp) 1543 1544 movq kernelbase(%rip), %rax 1545 #ifdef DEBUG 1546 cmpq %rax, %rsi /* %rsi = kaddr */ 1547 jnb 1f 1548 leaq .copyin_panic_msg(%rip), %rdi 1549 xorl %eax, %eax 1550 call panic 1551 1: 1552 #endif 1553 /* 1554 * pass lofault value as 4th argument to do_copy_fault 1555 */ 1556 leaq _copyin_err(%rip), %rcx 1557 1558 movq %gs:CPU_THREAD, %r9 1559 cmpq %rax, %rdi /* test uaddr < kernelbase */ 1560 jae 3f /* take copyop if uaddr > kernelbase */ 1561 SMAP_DISABLE_INSTR(0) 1562 jmp do_copy_fault /* Takes care of leave for us */ 1563 1564 _copyin_err: 1565 SMAP_ENABLE_INSTR(2) 1566 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1567 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1568 3: 1569 movq T_COPYOPS(%r9), %rax 1570 cmpq $0, %rax 1571 jz 2f 1572 /* 1573 * reload args for the copyop 1574 */ 1575 movq (%rsp), %rdi 1576 movq 0x8(%rsp), %rsi 1577 movq 0x10(%rsp), %rdx 1578 leave 1579 jmp *CP_COPYIN(%rax) 1580 1581 2: movl $-1, %eax 1582 leave 1583 ret 1584 SET_SIZE(copyin) 1585 1586 #elif defined(__i386) 1587 1588 #define ARG_UADDR 4 1589 #define ARG_KADDR 8 1590 1591 ENTRY(copyin) 1592 movl kernelbase, %ecx 1593 #ifdef DEBUG 1594 cmpl %ecx, ARG_KADDR(%esp) 1595 jnb 1f 1596 pushl %ebp 1597 movl %esp, %ebp 1598 pushl $.copyin_panic_msg 1599 call panic 1600 1: 1601 #endif 1602 lea _copyin_err, %eax 1603 1604 movl %gs:CPU_THREAD, %edx 1605 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1606 jb do_copy_fault 1607 jmp 3f 1608 1609 _copyin_err: 1610 popl %ecx 1611 popl %edi 1612 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1613 popl %esi 1614 popl %ebp 1615 3: 1616 movl T_COPYOPS(%edx), %eax 1617 cmpl $0, %eax 1618 jz 2f 1619 jmp *CP_COPYIN(%eax) 1620 1621 2: movl $-1, %eax 1622 ret 1623 SET_SIZE(copyin) 1624 1625 #undef ARG_UADDR 1626 #undef ARG_KADDR 1627 1628 #endif /* __i386 */ 1629 #endif /* __lint */ 1630 1631 #if defined(__lint) 1632 1633 /* ARGSUSED */ 1634 int 1635 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached) 1636 { return (0); } 1637 1638 #else /* __lint */ 1639 1640 #if defined(__amd64) 1641 1642 ENTRY(xcopyin_nta) 1643 pushq %rbp 1644 movq %rsp, %rbp 1645 subq $24, %rsp 1646 1647 /* 1648 * save args in case we trap and need to rerun as a copyop 1649 * %rcx is consumed in this routine so we don't need to save 1650 * it. 1651 */ 1652 movq %rdi, (%rsp) 1653 movq %rsi, 0x8(%rsp) 1654 movq %rdx, 0x10(%rsp) 1655 1656 movq kernelbase(%rip), %rax 1657 #ifdef DEBUG 1658 cmpq %rax, %rsi /* %rsi = kaddr */ 1659 jnb 1f 1660 leaq .xcopyin_panic_msg(%rip), %rdi 1661 xorl %eax, %eax 1662 call panic 1663 1: 1664 #endif 1665 movq %gs:CPU_THREAD, %r9 1666 cmpq %rax, %rdi /* test uaddr < kernelbase */ 1667 jae 4f 1668 cmpq $0, %rcx /* No non-temporal access? */ 1669 /* 1670 * pass lofault value as 4th argument to do_copy_fault 1671 */ 1672 leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */ 1673 jnz 6f /* use regular access */ 1674 /* 1675 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1676 */ 1677 cmpq $XCOPY_MIN_SIZE, %rdx 1678 jae 5f 1679 6: 1680 SMAP_DISABLE_INSTR(1) 1681 jmp do_copy_fault 1682 1683 /* 1684 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1685 * count is COUNT_ALIGN_SIZE aligned. 1686 */ 1687 5: 1688 movq %rdi, %r10 1689 orq %rsi, %r10 1690 andq $NTA_ALIGN_MASK, %r10 1691 orq %rdx, %r10 1692 andq $COUNT_ALIGN_MASK, %r10 1693 jnz 6b 1694 leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */ 1695 SMAP_DISABLE_INSTR(2) 1696 jmp do_copy_fault_nta /* use non-temporal access */ 1697 1698 4: 1699 movl $EFAULT, %eax 1700 jmp 3f 1701 1702 /* 1703 * A fault during do_copy_fault or do_copy_fault_nta is 1704 * indicated through an errno value in %rax and we iret from the 1705 * trap handler to here. 1706 */ 1707 _xcopyin_err: 1708 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1709 _xcopyin_nta_err: 1710 SMAP_ENABLE_INSTR(3) 1711 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1712 3: 1713 movq T_COPYOPS(%r9), %r8 1714 cmpq $0, %r8 1715 jz 2f 1716 1717 /* 1718 * reload args for the copyop 1719 */ 1720 movq (%rsp), %rdi 1721 movq 0x8(%rsp), %rsi 1722 movq 0x10(%rsp), %rdx 1723 leave 1724 jmp *CP_XCOPYIN(%r8) 1725 1726 2: leave 1727 ret 1728 SET_SIZE(xcopyin_nta) 1729 1730 #elif defined(__i386) 1731 1732 #define ARG_UADDR 4 1733 #define ARG_KADDR 8 1734 #define ARG_COUNT 12 1735 #define ARG_CACHED 16 1736 1737 .globl use_sse_copy 1738 1739 ENTRY(xcopyin_nta) 1740 movl kernelbase, %ecx 1741 lea _xcopyin_err, %eax 1742 movl %gs:CPU_THREAD, %edx 1743 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1744 jae 4f 1745 1746 cmpl $0, use_sse_copy /* no sse support */ 1747 jz do_copy_fault 1748 1749 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */ 1750 jnz do_copy_fault 1751 1752 /* 1753 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1754 */ 1755 cmpl $XCOPY_MIN_SIZE, ARG_COUNT(%esp) 1756 jb do_copy_fault 1757 1758 /* 1759 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1760 * count is COUNT_ALIGN_SIZE aligned. 1761 */ 1762 movl ARG_UADDR(%esp), %ecx 1763 orl ARG_KADDR(%esp), %ecx 1764 andl $NTA_ALIGN_MASK, %ecx 1765 orl ARG_COUNT(%esp), %ecx 1766 andl $COUNT_ALIGN_MASK, %ecx 1767 jnz do_copy_fault 1768 1769 jmp do_copy_fault_nta /* use regular access */ 1770 1771 4: 1772 movl $EFAULT, %eax 1773 jmp 3f 1774 1775 /* 1776 * A fault during do_copy_fault or do_copy_fault_nta is 1777 * indicated through an errno value in %eax and we iret from the 1778 * trap handler to here. 1779 */ 1780 _xcopyin_err: 1781 popl %ecx 1782 popl %edi 1783 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1784 popl %esi 1785 popl %ebp 1786 3: 1787 cmpl $0, T_COPYOPS(%edx) 1788 jz 2f 1789 movl T_COPYOPS(%edx), %eax 1790 jmp *CP_XCOPYIN(%eax) 1791 1792 2: rep; ret /* use 2 byte return instruction when branch target */ 1793 /* AMD Software Optimization Guide - Section 6.2 */ 1794 SET_SIZE(xcopyin_nta) 1795 1796 #undef ARG_UADDR 1797 #undef ARG_KADDR 1798 #undef ARG_COUNT 1799 #undef ARG_CACHED 1800 1801 #endif /* __i386 */ 1802 #endif /* __lint */ 1803 1804 /* 1805 * Copy kernel data to user space. 1806 */ 1807 1808 #if defined(__lint) 1809 1810 /* ARGSUSED */ 1811 int 1812 copyout(const void *kaddr, void *uaddr, size_t count) 1813 { return (0); } 1814 1815 #else /* __lint */ 1816 1817 #if defined(__amd64) 1818 1819 ENTRY(copyout) 1820 pushq %rbp 1821 movq %rsp, %rbp 1822 subq $24, %rsp 1823 1824 /* 1825 * save args in case we trap and need to rerun as a copyop 1826 */ 1827 movq %rdi, (%rsp) 1828 movq %rsi, 0x8(%rsp) 1829 movq %rdx, 0x10(%rsp) 1830 1831 movq kernelbase(%rip), %rax 1832 #ifdef DEBUG 1833 cmpq %rax, %rdi /* %rdi = kaddr */ 1834 jnb 1f 1835 leaq .copyout_panic_msg(%rip), %rdi 1836 xorl %eax, %eax 1837 call panic 1838 1: 1839 #endif 1840 /* 1841 * pass lofault value as 4th argument to do_copy_fault 1842 */ 1843 leaq _copyout_err(%rip), %rcx 1844 1845 movq %gs:CPU_THREAD, %r9 1846 cmpq %rax, %rsi /* test uaddr < kernelbase */ 1847 jae 3f /* take copyop if uaddr > kernelbase */ 1848 SMAP_DISABLE_INSTR(3) 1849 jmp do_copy_fault /* Calls leave for us */ 1850 1851 _copyout_err: 1852 SMAP_ENABLE_INSTR(4) 1853 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1854 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1855 3: 1856 movq T_COPYOPS(%r9), %rax 1857 cmpq $0, %rax 1858 jz 2f 1859 1860 /* 1861 * reload args for the copyop 1862 */ 1863 movq (%rsp), %rdi 1864 movq 0x8(%rsp), %rsi 1865 movq 0x10(%rsp), %rdx 1866 leave 1867 jmp *CP_COPYOUT(%rax) 1868 1869 2: movl $-1, %eax 1870 leave 1871 ret 1872 SET_SIZE(copyout) 1873 1874 #elif defined(__i386) 1875 1876 #define ARG_KADDR 4 1877 #define ARG_UADDR 8 1878 1879 ENTRY(copyout) 1880 movl kernelbase, %ecx 1881 #ifdef DEBUG 1882 cmpl %ecx, ARG_KADDR(%esp) 1883 jnb 1f 1884 pushl %ebp 1885 movl %esp, %ebp 1886 pushl $.copyout_panic_msg 1887 call panic 1888 1: 1889 #endif 1890 lea _copyout_err, %eax 1891 movl %gs:CPU_THREAD, %edx 1892 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1893 jb do_copy_fault 1894 jmp 3f 1895 1896 _copyout_err: 1897 popl %ecx 1898 popl %edi 1899 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1900 popl %esi 1901 popl %ebp 1902 3: 1903 movl T_COPYOPS(%edx), %eax 1904 cmpl $0, %eax 1905 jz 2f 1906 jmp *CP_COPYOUT(%eax) 1907 1908 2: movl $-1, %eax 1909 ret 1910 SET_SIZE(copyout) 1911 1912 #undef ARG_UADDR 1913 #undef ARG_KADDR 1914 1915 #endif /* __i386 */ 1916 #endif /* __lint */ 1917 1918 #if defined(__lint) 1919 1920 /* ARGSUSED */ 1921 int 1922 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached) 1923 { return (0); } 1924 1925 #else /* __lint */ 1926 1927 #if defined(__amd64) 1928 1929 ENTRY(xcopyout_nta) 1930 pushq %rbp 1931 movq %rsp, %rbp 1932 subq $24, %rsp 1933 1934 /* 1935 * save args in case we trap and need to rerun as a copyop 1936 */ 1937 movq %rdi, (%rsp) 1938 movq %rsi, 0x8(%rsp) 1939 movq %rdx, 0x10(%rsp) 1940 1941 movq kernelbase(%rip), %rax 1942 #ifdef DEBUG 1943 cmpq %rax, %rdi /* %rdi = kaddr */ 1944 jnb 1f 1945 leaq .xcopyout_panic_msg(%rip), %rdi 1946 xorl %eax, %eax 1947 call panic 1948 1: 1949 #endif 1950 movq %gs:CPU_THREAD, %r9 1951 cmpq %rax, %rsi /* test uaddr < kernelbase */ 1952 jae 4f 1953 1954 cmpq $0, %rcx /* No non-temporal access? */ 1955 /* 1956 * pass lofault value as 4th argument to do_copy_fault 1957 */ 1958 leaq _xcopyout_err(%rip), %rcx 1959 jnz 6f 1960 /* 1961 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1962 */ 1963 cmpq $XCOPY_MIN_SIZE, %rdx 1964 jae 5f 1965 6: 1966 SMAP_DISABLE_INSTR(4) 1967 jmp do_copy_fault 1968 1969 /* 1970 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1971 * count is COUNT_ALIGN_SIZE aligned. 1972 */ 1973 5: 1974 movq %rdi, %r10 1975 orq %rsi, %r10 1976 andq $NTA_ALIGN_MASK, %r10 1977 orq %rdx, %r10 1978 andq $COUNT_ALIGN_MASK, %r10 1979 jnz 6b 1980 leaq _xcopyout_nta_err(%rip), %rcx 1981 SMAP_DISABLE_INSTR(5) 1982 call do_copy_fault_nta 1983 SMAP_ENABLE_INSTR(5) 1984 ret 1985 1986 4: 1987 movl $EFAULT, %eax 1988 jmp 3f 1989 1990 /* 1991 * A fault during do_copy_fault or do_copy_fault_nta is 1992 * indicated through an errno value in %rax and we iret from the 1993 * trap handler to here. 1994 */ 1995 _xcopyout_err: 1996 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1997 _xcopyout_nta_err: 1998 SMAP_ENABLE_INSTR(6) 1999 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 2000 3: 2001 movq T_COPYOPS(%r9), %r8 2002 cmpq $0, %r8 2003 jz 2f 2004 2005 /* 2006 * reload args for the copyop 2007 */ 2008 movq (%rsp), %rdi 2009 movq 0x8(%rsp), %rsi 2010 movq 0x10(%rsp), %rdx 2011 leave 2012 jmp *CP_XCOPYOUT(%r8) 2013 2014 2: leave 2015 ret 2016 SET_SIZE(xcopyout_nta) 2017 2018 #elif defined(__i386) 2019 2020 #define ARG_KADDR 4 2021 #define ARG_UADDR 8 2022 #define ARG_COUNT 12 2023 #define ARG_CACHED 16 2024 2025 ENTRY(xcopyout_nta) 2026 movl kernelbase, %ecx 2027 lea _xcopyout_err, %eax 2028 movl %gs:CPU_THREAD, %edx 2029 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 2030 jae 4f 2031 2032 cmpl $0, use_sse_copy /* no sse support */ 2033 jz do_copy_fault 2034 2035 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */ 2036 jnz do_copy_fault 2037 2038 /* 2039 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 2040 */ 2041 cmpl $XCOPY_MIN_SIZE, %edx 2042 jb do_copy_fault 2043 2044 /* 2045 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 2046 * count is COUNT_ALIGN_SIZE aligned. 2047 */ 2048 movl ARG_UADDR(%esp), %ecx 2049 orl ARG_KADDR(%esp), %ecx 2050 andl $NTA_ALIGN_MASK, %ecx 2051 orl ARG_COUNT(%esp), %ecx 2052 andl $COUNT_ALIGN_MASK, %ecx 2053 jnz do_copy_fault 2054 jmp do_copy_fault_nta 2055 2056 4: 2057 movl $EFAULT, %eax 2058 jmp 3f 2059 2060 /* 2061 * A fault during do_copy_fault or do_copy_fault_nta is 2062 * indicated through an errno value in %eax and we iret from the 2063 * trap handler to here. 2064 */ 2065 _xcopyout_err: 2066 / restore the original lofault 2067 popl %ecx 2068 popl %edi 2069 movl %ecx, T_LOFAULT(%edx) / original lofault 2070 popl %esi 2071 popl %ebp 2072 3: 2073 cmpl $0, T_COPYOPS(%edx) 2074 jz 2f 2075 movl T_COPYOPS(%edx), %eax 2076 jmp *CP_XCOPYOUT(%eax) 2077 2078 2: rep; ret /* use 2 byte return instruction when branch target */ 2079 /* AMD Software Optimization Guide - Section 6.2 */ 2080 SET_SIZE(xcopyout_nta) 2081 2082 #undef ARG_UADDR 2083 #undef ARG_KADDR 2084 #undef ARG_COUNT 2085 #undef ARG_CACHED 2086 2087 #endif /* __i386 */ 2088 #endif /* __lint */ 2089 2090 /* 2091 * Copy a null terminated string from one point to another in 2092 * the kernel address space. 2093 */ 2094 2095 #if defined(__lint) 2096 2097 /* ARGSUSED */ 2098 int 2099 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied) 2100 { return (0); } 2101 2102 #else /* __lint */ 2103 2104 #if defined(__amd64) 2105 2106 ENTRY(copystr) 2107 pushq %rbp 2108 movq %rsp, %rbp 2109 #ifdef DEBUG 2110 movq kernelbase(%rip), %rax 2111 cmpq %rax, %rdi /* %rdi = from */ 2112 jb 0f 2113 cmpq %rax, %rsi /* %rsi = to */ 2114 jnb 1f 2115 0: leaq .copystr_panic_msg(%rip), %rdi 2116 xorl %eax, %eax 2117 call panic 2118 1: 2119 #endif 2120 movq %gs:CPU_THREAD, %r9 2121 movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */ 2122 /* 5th argument to do_copystr */ 2123 xorl %r10d,%r10d /* pass smap restore need in %r10d */ 2124 /* as a non-ABI 6th arg */ 2125 do_copystr: 2126 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 2127 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 2128 movq %r8, T_LOFAULT(%r9) /* new lofault */ 2129 2130 movq %rdx, %r8 /* save maxlength */ 2131 2132 cmpq $0, %rdx /* %rdx = maxlength */ 2133 je copystr_enametoolong /* maxlength == 0 */ 2134 2135 copystr_loop: 2136 decq %r8 2137 movb (%rdi), %al 2138 incq %rdi 2139 movb %al, (%rsi) 2140 incq %rsi 2141 cmpb $0, %al 2142 je copystr_null /* null char */ 2143 cmpq $0, %r8 2144 jne copystr_loop 2145 2146 copystr_enametoolong: 2147 movl $ENAMETOOLONG, %eax 2148 jmp copystr_out 2149 2150 copystr_null: 2151 xorl %eax, %eax /* no error */ 2152 2153 copystr_out: 2154 cmpq $0, %rcx /* want length? */ 2155 je copystr_smap /* no */ 2156 subq %r8, %rdx /* compute length and store it */ 2157 movq %rdx, (%rcx) 2158 2159 copystr_smap: 2160 cmpl $0, %r10d 2161 jz copystr_done 2162 SMAP_ENABLE_INSTR(7) 2163 2164 copystr_done: 2165 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 2166 leave 2167 ret 2168 SET_SIZE(copystr) 2169 2170 #elif defined(__i386) 2171 2172 #define ARG_FROM 8 2173 #define ARG_TO 12 2174 #define ARG_MAXLEN 16 2175 #define ARG_LENCOPIED 20 2176 2177 ENTRY(copystr) 2178 #ifdef DEBUG 2179 pushl %ebp 2180 movl %esp, %ebp 2181 movl kernelbase, %eax 2182 cmpl %eax, ARG_FROM(%esp) 2183 jb 0f 2184 cmpl %eax, ARG_TO(%esp) 2185 jnb 1f 2186 0: pushl $.copystr_panic_msg 2187 call panic 2188 1: popl %ebp 2189 #endif 2190 /* get the current lofault address */ 2191 movl %gs:CPU_THREAD, %eax 2192 movl T_LOFAULT(%eax), %eax 2193 do_copystr: 2194 pushl %ebp /* setup stack frame */ 2195 movl %esp, %ebp 2196 pushl %ebx /* save registers */ 2197 pushl %edi 2198 2199 movl %gs:CPU_THREAD, %ebx 2200 movl T_LOFAULT(%ebx), %edi 2201 pushl %edi /* save the current lofault */ 2202 movl %eax, T_LOFAULT(%ebx) /* new lofault */ 2203 2204 movl ARG_MAXLEN(%ebp), %ecx 2205 cmpl $0, %ecx 2206 je copystr_enametoolong /* maxlength == 0 */ 2207 2208 movl ARG_FROM(%ebp), %ebx /* source address */ 2209 movl ARG_TO(%ebp), %edx /* destination address */ 2210 2211 copystr_loop: 2212 decl %ecx 2213 movb (%ebx), %al 2214 incl %ebx 2215 movb %al, (%edx) 2216 incl %edx 2217 cmpb $0, %al 2218 je copystr_null /* null char */ 2219 cmpl $0, %ecx 2220 jne copystr_loop 2221 2222 copystr_enametoolong: 2223 movl $ENAMETOOLONG, %eax 2224 jmp copystr_out 2225 2226 copystr_null: 2227 xorl %eax, %eax /* no error */ 2228 2229 copystr_out: 2230 cmpl $0, ARG_LENCOPIED(%ebp) /* want length? */ 2231 je copystr_done /* no */ 2232 movl ARG_MAXLEN(%ebp), %edx 2233 subl %ecx, %edx /* compute length and store it */ 2234 movl ARG_LENCOPIED(%ebp), %ecx 2235 movl %edx, (%ecx) 2236 2237 copystr_done: 2238 popl %edi 2239 movl %gs:CPU_THREAD, %ebx 2240 movl %edi, T_LOFAULT(%ebx) /* restore the original lofault */ 2241 2242 popl %edi 2243 popl %ebx 2244 popl %ebp 2245 ret 2246 SET_SIZE(copystr) 2247 2248 #undef ARG_FROM 2249 #undef ARG_TO 2250 #undef ARG_MAXLEN 2251 #undef ARG_LENCOPIED 2252 2253 #endif /* __i386 */ 2254 #endif /* __lint */ 2255 2256 /* 2257 * Copy a null terminated string from the user address space into 2258 * the kernel address space. 2259 */ 2260 2261 #if defined(__lint) 2262 2263 /* ARGSUSED */ 2264 int 2265 copyinstr(const char *uaddr, char *kaddr, size_t maxlength, 2266 size_t *lencopied) 2267 { return (0); } 2268 2269 #else /* __lint */ 2270 2271 #if defined(__amd64) 2272 2273 ENTRY(copyinstr) 2274 pushq %rbp 2275 movq %rsp, %rbp 2276 subq $32, %rsp 2277 2278 /* 2279 * save args in case we trap and need to rerun as a copyop 2280 */ 2281 movq %rdi, (%rsp) 2282 movq %rsi, 0x8(%rsp) 2283 movq %rdx, 0x10(%rsp) 2284 movq %rcx, 0x18(%rsp) 2285 2286 movq kernelbase(%rip), %rax 2287 #ifdef DEBUG 2288 cmpq %rax, %rsi /* %rsi = kaddr */ 2289 jnb 1f 2290 leaq .copyinstr_panic_msg(%rip), %rdi 2291 xorl %eax, %eax 2292 call panic 2293 1: 2294 #endif 2295 /* 2296 * pass lofault value as 5th argument to do_copystr 2297 * do_copystr expects whether or not we need smap in %r10d 2298 */ 2299 leaq _copyinstr_error(%rip), %r8 2300 movl $1, %r10d 2301 2302 cmpq %rax, %rdi /* test uaddr < kernelbase */ 2303 jae 4f 2304 SMAP_DISABLE_INSTR(6) 2305 jmp do_copystr 2306 4: 2307 movq %gs:CPU_THREAD, %r9 2308 jmp 3f 2309 2310 _copyinstr_error: 2311 SMAP_ENABLE_INSTR(8) 2312 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 2313 3: 2314 movq T_COPYOPS(%r9), %rax 2315 cmpq $0, %rax 2316 jz 2f 2317 2318 /* 2319 * reload args for the copyop 2320 */ 2321 movq (%rsp), %rdi 2322 movq 0x8(%rsp), %rsi 2323 movq 0x10(%rsp), %rdx 2324 movq 0x18(%rsp), %rcx 2325 leave 2326 jmp *CP_COPYINSTR(%rax) 2327 2328 2: movl $EFAULT, %eax /* return EFAULT */ 2329 leave 2330 ret 2331 SET_SIZE(copyinstr) 2332 2333 #elif defined(__i386) 2334 2335 #define ARG_UADDR 4 2336 #define ARG_KADDR 8 2337 2338 ENTRY(copyinstr) 2339 movl kernelbase, %ecx 2340 #ifdef DEBUG 2341 cmpl %ecx, ARG_KADDR(%esp) 2342 jnb 1f 2343 pushl %ebp 2344 movl %esp, %ebp 2345 pushl $.copyinstr_panic_msg 2346 call panic 2347 1: 2348 #endif 2349 lea _copyinstr_error, %eax 2350 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 2351 jb do_copystr 2352 movl %gs:CPU_THREAD, %edx 2353 jmp 3f 2354 2355 _copyinstr_error: 2356 popl %edi 2357 movl %gs:CPU_THREAD, %edx 2358 movl %edi, T_LOFAULT(%edx) /* original lofault */ 2359 2360 popl %edi 2361 popl %ebx 2362 popl %ebp 2363 3: 2364 movl T_COPYOPS(%edx), %eax 2365 cmpl $0, %eax 2366 jz 2f 2367 jmp *CP_COPYINSTR(%eax) 2368 2369 2: movl $EFAULT, %eax /* return EFAULT */ 2370 ret 2371 SET_SIZE(copyinstr) 2372 2373 #undef ARG_UADDR 2374 #undef ARG_KADDR 2375 2376 #endif /* __i386 */ 2377 #endif /* __lint */ 2378 2379 /* 2380 * Copy a null terminated string from the kernel 2381 * address space to the user address space. 2382 */ 2383 2384 #if defined(__lint) 2385 2386 /* ARGSUSED */ 2387 int 2388 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength, 2389 size_t *lencopied) 2390 { return (0); } 2391 2392 #else /* __lint */ 2393 2394 #if defined(__amd64) 2395 2396 ENTRY(copyoutstr) 2397 pushq %rbp 2398 movq %rsp, %rbp 2399 subq $32, %rsp 2400 2401 /* 2402 * save args in case we trap and need to rerun as a copyop 2403 */ 2404 movq %rdi, (%rsp) 2405 movq %rsi, 0x8(%rsp) 2406 movq %rdx, 0x10(%rsp) 2407 movq %rcx, 0x18(%rsp) 2408 2409 movq kernelbase(%rip), %rax 2410 #ifdef DEBUG 2411 cmpq %rax, %rdi /* %rdi = kaddr */ 2412 jnb 1f 2413 leaq .copyoutstr_panic_msg(%rip), %rdi 2414 jmp call_panic /* setup stack and call panic */ 2415 1: 2416 #endif 2417 /* 2418 * pass lofault value as 5th argument to do_copystr 2419 * pass one as 6th argument to do_copystr in %r10d 2420 */ 2421 leaq _copyoutstr_error(%rip), %r8 2422 movl $1, %r10d 2423 2424 cmpq %rax, %rsi /* test uaddr < kernelbase */ 2425 jae 4f 2426 SMAP_DISABLE_INSTR(7) 2427 jmp do_copystr 2428 4: 2429 movq %gs:CPU_THREAD, %r9 2430 jmp 3f 2431 2432 _copyoutstr_error: 2433 SMAP_ENABLE_INSTR(9) 2434 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 2435 3: 2436 movq T_COPYOPS(%r9), %rax 2437 cmpq $0, %rax 2438 jz 2f 2439 2440 /* 2441 * reload args for the copyop 2442 */ 2443 movq (%rsp), %rdi 2444 movq 0x8(%rsp), %rsi 2445 movq 0x10(%rsp), %rdx 2446 movq 0x18(%rsp), %rcx 2447 leave 2448 jmp *CP_COPYOUTSTR(%rax) 2449 2450 2: movl $EFAULT, %eax /* return EFAULT */ 2451 leave 2452 ret 2453 SET_SIZE(copyoutstr) 2454 2455 #elif defined(__i386) 2456 2457 #define ARG_KADDR 4 2458 #define ARG_UADDR 8 2459 2460 ENTRY(copyoutstr) 2461 movl kernelbase, %ecx 2462 #ifdef DEBUG 2463 cmpl %ecx, ARG_KADDR(%esp) 2464 jnb 1f 2465 pushl %ebp 2466 movl %esp, %ebp 2467 pushl $.copyoutstr_panic_msg 2468 call panic 2469 1: 2470 #endif 2471 lea _copyoutstr_error, %eax 2472 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 2473 jb do_copystr 2474 movl %gs:CPU_THREAD, %edx 2475 jmp 3f 2476 2477 _copyoutstr_error: 2478 popl %edi 2479 movl %gs:CPU_THREAD, %edx 2480 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */ 2481 2482 popl %edi 2483 popl %ebx 2484 popl %ebp 2485 3: 2486 movl T_COPYOPS(%edx), %eax 2487 cmpl $0, %eax 2488 jz 2f 2489 jmp *CP_COPYOUTSTR(%eax) 2490 2491 2: movl $EFAULT, %eax /* return EFAULT */ 2492 ret 2493 SET_SIZE(copyoutstr) 2494 2495 #undef ARG_KADDR 2496 #undef ARG_UADDR 2497 2498 #endif /* __i386 */ 2499 #endif /* __lint */ 2500 2501 /* 2502 * Since all of the fuword() variants are so similar, we have a macro to spit 2503 * them out. This allows us to create DTrace-unobservable functions easily. 2504 */ 2505 2506 #if defined(__lint) 2507 2508 #if defined(__amd64) 2509 2510 /* ARGSUSED */ 2511 int 2512 fuword64(const void *addr, uint64_t *dst) 2513 { return (0); } 2514 2515 #endif 2516 2517 /* ARGSUSED */ 2518 int 2519 fuword32(const void *addr, uint32_t *dst) 2520 { return (0); } 2521 2522 /* ARGSUSED */ 2523 int 2524 fuword16(const void *addr, uint16_t *dst) 2525 { return (0); } 2526 2527 /* ARGSUSED */ 2528 int 2529 fuword8(const void *addr, uint8_t *dst) 2530 { return (0); } 2531 2532 #else /* __lint */ 2533 2534 #if defined(__amd64) 2535 2536 /* 2537 * Note that we don't save and reload the arguments here 2538 * because their values are not altered in the copy path. 2539 * Additionally, when successful, the smap_enable jmp will 2540 * actually return us to our original caller. 2541 */ 2542 2543 #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \ 2544 ENTRY(NAME) \ 2545 movq %gs:CPU_THREAD, %r9; \ 2546 cmpq kernelbase(%rip), %rdi; \ 2547 jae 1f; \ 2548 leaq _flt_/**/NAME, %rdx; \ 2549 movq %rdx, T_LOFAULT(%r9); \ 2550 SMAP_DISABLE_INSTR(DISNUM) \ 2551 INSTR (%rdi), REG; \ 2552 movq $0, T_LOFAULT(%r9); \ 2553 INSTR REG, (%rsi); \ 2554 xorl %eax, %eax; \ 2555 SMAP_ENABLE_INSTR(EN1) \ 2556 ret; \ 2557 _flt_/**/NAME: \ 2558 SMAP_ENABLE_INSTR(EN2) \ 2559 movq $0, T_LOFAULT(%r9); \ 2560 1: \ 2561 movq T_COPYOPS(%r9), %rax; \ 2562 cmpq $0, %rax; \ 2563 jz 2f; \ 2564 jmp *COPYOP(%rax); \ 2565 2: \ 2566 movl $-1, %eax; \ 2567 ret; \ 2568 SET_SIZE(NAME) 2569 2570 FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11) 2571 FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13) 2572 FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15) 2573 FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17) 2574 2575 #elif defined(__i386) 2576 2577 #define FUWORD(NAME, INSTR, REG, COPYOP) \ 2578 ENTRY(NAME) \ 2579 movl %gs:CPU_THREAD, %ecx; \ 2580 movl kernelbase, %eax; \ 2581 cmpl %eax, 4(%esp); \ 2582 jae 1f; \ 2583 lea _flt_/**/NAME, %edx; \ 2584 movl %edx, T_LOFAULT(%ecx); \ 2585 movl 4(%esp), %eax; \ 2586 movl 8(%esp), %edx; \ 2587 INSTR (%eax), REG; \ 2588 movl $0, T_LOFAULT(%ecx); \ 2589 INSTR REG, (%edx); \ 2590 xorl %eax, %eax; \ 2591 ret; \ 2592 _flt_/**/NAME: \ 2593 movl $0, T_LOFAULT(%ecx); \ 2594 1: \ 2595 movl T_COPYOPS(%ecx), %eax; \ 2596 cmpl $0, %eax; \ 2597 jz 2f; \ 2598 jmp *COPYOP(%eax); \ 2599 2: \ 2600 movl $-1, %eax; \ 2601 ret; \ 2602 SET_SIZE(NAME) 2603 2604 FUWORD(fuword32, movl, %eax, CP_FUWORD32) 2605 FUWORD(fuword16, movw, %ax, CP_FUWORD16) 2606 FUWORD(fuword8, movb, %al, CP_FUWORD8) 2607 2608 #endif /* __i386 */ 2609 2610 #undef FUWORD 2611 2612 #endif /* __lint */ 2613 2614 /* 2615 * Set user word. 2616 */ 2617 2618 #if defined(__lint) 2619 2620 #if defined(__amd64) 2621 2622 /* ARGSUSED */ 2623 int 2624 suword64(void *addr, uint64_t value) 2625 { return (0); } 2626 2627 #endif 2628 2629 /* ARGSUSED */ 2630 int 2631 suword32(void *addr, uint32_t value) 2632 { return (0); } 2633 2634 /* ARGSUSED */ 2635 int 2636 suword16(void *addr, uint16_t value) 2637 { return (0); } 2638 2639 /* ARGSUSED */ 2640 int 2641 suword8(void *addr, uint8_t value) 2642 { return (0); } 2643 2644 #else /* lint */ 2645 2646 #if defined(__amd64) 2647 2648 /* 2649 * Note that we don't save and reload the arguments here 2650 * because their values are not altered in the copy path. 2651 */ 2652 2653 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \ 2654 ENTRY(NAME) \ 2655 movq %gs:CPU_THREAD, %r9; \ 2656 cmpq kernelbase(%rip), %rdi; \ 2657 jae 1f; \ 2658 leaq _flt_/**/NAME, %rdx; \ 2659 SMAP_DISABLE_INSTR(DISNUM) \ 2660 movq %rdx, T_LOFAULT(%r9); \ 2661 INSTR REG, (%rdi); \ 2662 movq $0, T_LOFAULT(%r9); \ 2663 xorl %eax, %eax; \ 2664 SMAP_ENABLE_INSTR(EN1) \ 2665 ret; \ 2666 _flt_/**/NAME: \ 2667 SMAP_ENABLE_INSTR(EN2) \ 2668 movq $0, T_LOFAULT(%r9); \ 2669 1: \ 2670 movq T_COPYOPS(%r9), %rax; \ 2671 cmpq $0, %rax; \ 2672 jz 3f; \ 2673 jmp *COPYOP(%rax); \ 2674 3: \ 2675 movl $-1, %eax; \ 2676 ret; \ 2677 SET_SIZE(NAME) 2678 2679 SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19) 2680 SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21) 2681 SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23) 2682 SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25) 2683 2684 #elif defined(__i386) 2685 2686 #define SUWORD(NAME, INSTR, REG, COPYOP) \ 2687 ENTRY(NAME) \ 2688 movl %gs:CPU_THREAD, %ecx; \ 2689 movl kernelbase, %eax; \ 2690 cmpl %eax, 4(%esp); \ 2691 jae 1f; \ 2692 lea _flt_/**/NAME, %edx; \ 2693 movl %edx, T_LOFAULT(%ecx); \ 2694 movl 4(%esp), %eax; \ 2695 movl 8(%esp), %edx; \ 2696 INSTR REG, (%eax); \ 2697 movl $0, T_LOFAULT(%ecx); \ 2698 xorl %eax, %eax; \ 2699 ret; \ 2700 _flt_/**/NAME: \ 2701 movl $0, T_LOFAULT(%ecx); \ 2702 1: \ 2703 movl T_COPYOPS(%ecx), %eax; \ 2704 cmpl $0, %eax; \ 2705 jz 3f; \ 2706 movl COPYOP(%eax), %ecx; \ 2707 jmp *%ecx; \ 2708 3: \ 2709 movl $-1, %eax; \ 2710 ret; \ 2711 SET_SIZE(NAME) 2712 2713 SUWORD(suword32, movl, %edx, CP_SUWORD32) 2714 SUWORD(suword16, movw, %dx, CP_SUWORD16) 2715 SUWORD(suword8, movb, %dl, CP_SUWORD8) 2716 2717 #endif /* __i386 */ 2718 2719 #undef SUWORD 2720 2721 #endif /* __lint */ 2722 2723 #if defined(__lint) 2724 2725 #if defined(__amd64) 2726 2727 /*ARGSUSED*/ 2728 void 2729 fuword64_noerr(const void *addr, uint64_t *dst) 2730 {} 2731 2732 #endif 2733 2734 /*ARGSUSED*/ 2735 void 2736 fuword32_noerr(const void *addr, uint32_t *dst) 2737 {} 2738 2739 /*ARGSUSED*/ 2740 void 2741 fuword8_noerr(const void *addr, uint8_t *dst) 2742 {} 2743 2744 /*ARGSUSED*/ 2745 void 2746 fuword16_noerr(const void *addr, uint16_t *dst) 2747 {} 2748 2749 #else /* __lint */ 2750 2751 #if defined(__amd64) 2752 2753 #define FUWORD_NOERR(NAME, INSTR, REG) \ 2754 ENTRY(NAME) \ 2755 cmpq kernelbase(%rip), %rdi; \ 2756 cmovnbq kernelbase(%rip), %rdi; \ 2757 INSTR (%rdi), REG; \ 2758 INSTR REG, (%rsi); \ 2759 ret; \ 2760 SET_SIZE(NAME) 2761 2762 FUWORD_NOERR(fuword64_noerr, movq, %rax) 2763 FUWORD_NOERR(fuword32_noerr, movl, %eax) 2764 FUWORD_NOERR(fuword16_noerr, movw, %ax) 2765 FUWORD_NOERR(fuword8_noerr, movb, %al) 2766 2767 #elif defined(__i386) 2768 2769 #define FUWORD_NOERR(NAME, INSTR, REG) \ 2770 ENTRY(NAME) \ 2771 movl 4(%esp), %eax; \ 2772 cmpl kernelbase, %eax; \ 2773 jb 1f; \ 2774 movl kernelbase, %eax; \ 2775 1: movl 8(%esp), %edx; \ 2776 INSTR (%eax), REG; \ 2777 INSTR REG, (%edx); \ 2778 ret; \ 2779 SET_SIZE(NAME) 2780 2781 FUWORD_NOERR(fuword32_noerr, movl, %ecx) 2782 FUWORD_NOERR(fuword16_noerr, movw, %cx) 2783 FUWORD_NOERR(fuword8_noerr, movb, %cl) 2784 2785 #endif /* __i386 */ 2786 2787 #undef FUWORD_NOERR 2788 2789 #endif /* __lint */ 2790 2791 #if defined(__lint) 2792 2793 #if defined(__amd64) 2794 2795 /*ARGSUSED*/ 2796 void 2797 suword64_noerr(void *addr, uint64_t value) 2798 {} 2799 2800 #endif 2801 2802 /*ARGSUSED*/ 2803 void 2804 suword32_noerr(void *addr, uint32_t value) 2805 {} 2806 2807 /*ARGSUSED*/ 2808 void 2809 suword16_noerr(void *addr, uint16_t value) 2810 {} 2811 2812 /*ARGSUSED*/ 2813 void 2814 suword8_noerr(void *addr, uint8_t value) 2815 {} 2816 2817 #else /* lint */ 2818 2819 #if defined(__amd64) 2820 2821 #define SUWORD_NOERR(NAME, INSTR, REG) \ 2822 ENTRY(NAME) \ 2823 cmpq kernelbase(%rip), %rdi; \ 2824 cmovnbq kernelbase(%rip), %rdi; \ 2825 INSTR REG, (%rdi); \ 2826 ret; \ 2827 SET_SIZE(NAME) 2828 2829 SUWORD_NOERR(suword64_noerr, movq, %rsi) 2830 SUWORD_NOERR(suword32_noerr, movl, %esi) 2831 SUWORD_NOERR(suword16_noerr, movw, %si) 2832 SUWORD_NOERR(suword8_noerr, movb, %sil) 2833 2834 #elif defined(__i386) 2835 2836 #define SUWORD_NOERR(NAME, INSTR, REG) \ 2837 ENTRY(NAME) \ 2838 movl 4(%esp), %eax; \ 2839 cmpl kernelbase, %eax; \ 2840 jb 1f; \ 2841 movl kernelbase, %eax; \ 2842 1: \ 2843 movl 8(%esp), %edx; \ 2844 INSTR REG, (%eax); \ 2845 ret; \ 2846 SET_SIZE(NAME) 2847 2848 SUWORD_NOERR(suword32_noerr, movl, %edx) 2849 SUWORD_NOERR(suword16_noerr, movw, %dx) 2850 SUWORD_NOERR(suword8_noerr, movb, %dl) 2851 2852 #endif /* __i386 */ 2853 2854 #undef SUWORD_NOERR 2855 2856 #endif /* lint */ 2857 2858 2859 #if defined(__lint) 2860 2861 /*ARGSUSED*/ 2862 int 2863 subyte(void *addr, uchar_t value) 2864 { return (0); } 2865 2866 /*ARGSUSED*/ 2867 void 2868 subyte_noerr(void *addr, uchar_t value) 2869 {} 2870 2871 /*ARGSUSED*/ 2872 int 2873 fulword(const void *addr, ulong_t *valuep) 2874 { return (0); } 2875 2876 /*ARGSUSED*/ 2877 void 2878 fulword_noerr(const void *addr, ulong_t *valuep) 2879 {} 2880 2881 /*ARGSUSED*/ 2882 int 2883 sulword(void *addr, ulong_t valuep) 2884 { return (0); } 2885 2886 /*ARGSUSED*/ 2887 void 2888 sulword_noerr(void *addr, ulong_t valuep) 2889 {} 2890 2891 #else 2892 2893 .weak subyte 2894 subyte=suword8 2895 .weak subyte_noerr 2896 subyte_noerr=suword8_noerr 2897 2898 #if defined(__amd64) 2899 2900 .weak fulword 2901 fulword=fuword64 2902 .weak fulword_noerr 2903 fulword_noerr=fuword64_noerr 2904 .weak sulword 2905 sulword=suword64 2906 .weak sulword_noerr 2907 sulword_noerr=suword64_noerr 2908 2909 #elif defined(__i386) 2910 2911 .weak fulword 2912 fulword=fuword32 2913 .weak fulword_noerr 2914 fulword_noerr=fuword32_noerr 2915 .weak sulword 2916 sulword=suword32 2917 .weak sulword_noerr 2918 sulword_noerr=suword32_noerr 2919 2920 #endif /* __i386 */ 2921 2922 #endif /* __lint */ 2923 2924 #if defined(__lint) 2925 2926 /* 2927 * Copy a block of storage - must not overlap (from + len <= to). 2928 * No fault handler installed (to be called under on_fault()) 2929 */ 2930 2931 /* ARGSUSED */ 2932 void 2933 copyout_noerr(const void *kfrom, void *uto, size_t count) 2934 {} 2935 2936 /* ARGSUSED */ 2937 void 2938 copyin_noerr(const void *ufrom, void *kto, size_t count) 2939 {} 2940 2941 /* 2942 * Zero a block of storage in user space 2943 */ 2944 2945 /* ARGSUSED */ 2946 void 2947 uzero(void *addr, size_t count) 2948 {} 2949 2950 /* 2951 * copy a block of storage in user space 2952 */ 2953 2954 /* ARGSUSED */ 2955 void 2956 ucopy(const void *ufrom, void *uto, size_t ulength) 2957 {} 2958 2959 /* 2960 * copy a string in user space 2961 */ 2962 2963 /* ARGSUSED */ 2964 void 2965 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied) 2966 {} 2967 2968 #else /* __lint */ 2969 2970 #if defined(__amd64) 2971 2972 ENTRY(copyin_noerr) 2973 movq kernelbase(%rip), %rax 2974 #ifdef DEBUG 2975 cmpq %rax, %rsi /* %rsi = kto */ 2976 jae 1f 2977 leaq .cpyin_ne_pmsg(%rip), %rdi 2978 jmp call_panic /* setup stack and call panic */ 2979 1: 2980 #endif 2981 cmpq %rax, %rdi /* ufrom < kernelbase */ 2982 jb do_copy 2983 movq %rax, %rdi /* force fault at kernelbase */ 2984 jmp do_copy 2985 SET_SIZE(copyin_noerr) 2986 2987 ENTRY(copyout_noerr) 2988 movq kernelbase(%rip), %rax 2989 #ifdef DEBUG 2990 cmpq %rax, %rdi /* %rdi = kfrom */ 2991 jae 1f 2992 leaq .cpyout_ne_pmsg(%rip), %rdi 2993 jmp call_panic /* setup stack and call panic */ 2994 1: 2995 #endif 2996 cmpq %rax, %rsi /* uto < kernelbase */ 2997 jb do_copy 2998 movq %rax, %rsi /* force fault at kernelbase */ 2999 jmp do_copy 3000 SET_SIZE(copyout_noerr) 3001 3002 ENTRY(uzero) 3003 movq kernelbase(%rip), %rax 3004 cmpq %rax, %rdi 3005 jb do_zero 3006 movq %rax, %rdi /* force fault at kernelbase */ 3007 jmp do_zero 3008 SET_SIZE(uzero) 3009 3010 ENTRY(ucopy) 3011 movq kernelbase(%rip), %rax 3012 cmpq %rax, %rdi 3013 cmovaeq %rax, %rdi /* force fault at kernelbase */ 3014 cmpq %rax, %rsi 3015 cmovaeq %rax, %rsi /* force fault at kernelbase */ 3016 jmp do_copy 3017 SET_SIZE(ucopy) 3018 3019 /* 3020 * Note, the frame pointer is required here becuase do_copystr expects 3021 * to be able to pop it off! 3022 */ 3023 ENTRY(ucopystr) 3024 pushq %rbp 3025 movq %rsp, %rbp 3026 movq kernelbase(%rip), %rax 3027 cmpq %rax, %rdi 3028 cmovaeq %rax, %rdi /* force fault at kernelbase */ 3029 cmpq %rax, %rsi 3030 cmovaeq %rax, %rsi /* force fault at kernelbase */ 3031 /* do_copystr expects lofault address in %r8 */ 3032 /* do_copystr expects whether or not we need smap in %r10 */ 3033 xorl %r10d, %r10d 3034 movq %gs:CPU_THREAD, %r8 3035 movq T_LOFAULT(%r8), %r8 3036 jmp do_copystr 3037 SET_SIZE(ucopystr) 3038 3039 #elif defined(__i386) 3040 3041 ENTRY(copyin_noerr) 3042 movl kernelbase, %eax 3043 #ifdef DEBUG 3044 cmpl %eax, 8(%esp) 3045 jae 1f 3046 pushl $.cpyin_ne_pmsg 3047 call panic 3048 1: 3049 #endif 3050 cmpl %eax, 4(%esp) 3051 jb do_copy 3052 movl %eax, 4(%esp) /* force fault at kernelbase */ 3053 jmp do_copy 3054 SET_SIZE(copyin_noerr) 3055 3056 ENTRY(copyout_noerr) 3057 movl kernelbase, %eax 3058 #ifdef DEBUG 3059 cmpl %eax, 4(%esp) 3060 jae 1f 3061 pushl $.cpyout_ne_pmsg 3062 call panic 3063 1: 3064 #endif 3065 cmpl %eax, 8(%esp) 3066 jb do_copy 3067 movl %eax, 8(%esp) /* force fault at kernelbase */ 3068 jmp do_copy 3069 SET_SIZE(copyout_noerr) 3070 3071 ENTRY(uzero) 3072 movl kernelbase, %eax 3073 cmpl %eax, 4(%esp) 3074 jb do_zero 3075 movl %eax, 4(%esp) /* force fault at kernelbase */ 3076 jmp do_zero 3077 SET_SIZE(uzero) 3078 3079 ENTRY(ucopy) 3080 movl kernelbase, %eax 3081 cmpl %eax, 4(%esp) 3082 jb 1f 3083 movl %eax, 4(%esp) /* force fault at kernelbase */ 3084 1: 3085 cmpl %eax, 8(%esp) 3086 jb do_copy 3087 movl %eax, 8(%esp) /* force fault at kernelbase */ 3088 jmp do_copy 3089 SET_SIZE(ucopy) 3090 3091 ENTRY(ucopystr) 3092 movl kernelbase, %eax 3093 cmpl %eax, 4(%esp) 3094 jb 1f 3095 movl %eax, 4(%esp) /* force fault at kernelbase */ 3096 1: 3097 cmpl %eax, 8(%esp) 3098 jb 2f 3099 movl %eax, 8(%esp) /* force fault at kernelbase */ 3100 2: 3101 /* do_copystr expects the lofault address in %eax */ 3102 movl %gs:CPU_THREAD, %eax 3103 movl T_LOFAULT(%eax), %eax 3104 jmp do_copystr 3105 SET_SIZE(ucopystr) 3106 3107 #endif /* __i386 */ 3108 3109 #ifdef DEBUG 3110 .data 3111 .kcopy_panic_msg: 3112 .string "kcopy: arguments below kernelbase" 3113 .bcopy_panic_msg: 3114 .string "bcopy: arguments below kernelbase" 3115 .kzero_panic_msg: 3116 .string "kzero: arguments below kernelbase" 3117 .bzero_panic_msg: 3118 .string "bzero: arguments below kernelbase" 3119 .copyin_panic_msg: 3120 .string "copyin: kaddr argument below kernelbase" 3121 .xcopyin_panic_msg: 3122 .string "xcopyin: kaddr argument below kernelbase" 3123 .copyout_panic_msg: 3124 .string "copyout: kaddr argument below kernelbase" 3125 .xcopyout_panic_msg: 3126 .string "xcopyout: kaddr argument below kernelbase" 3127 .copystr_panic_msg: 3128 .string "copystr: arguments in user space" 3129 .copyinstr_panic_msg: 3130 .string "copyinstr: kaddr argument not in kernel address space" 3131 .copyoutstr_panic_msg: 3132 .string "copyoutstr: kaddr argument not in kernel address space" 3133 .cpyin_ne_pmsg: 3134 .string "copyin_noerr: argument not in kernel address space" 3135 .cpyout_ne_pmsg: 3136 .string "copyout_noerr: argument not in kernel address space" 3137 #endif 3138 3139 #endif /* __lint */ 3140 3141 /* 3142 * These functions are used for SMAP, supervisor mode access protection. They 3143 * are hotpatched to become real instructions when the system starts up which is 3144 * done in mlsetup() as a part of enabling the other CR4 related features. 3145 * 3146 * Generally speaking, smap_disable() is a stac instruction and smap_enable is a 3147 * clac instruction. It's safe to call these any number of times, and in fact, 3148 * out of paranoia, the kernel will likely call it at several points. 3149 */ 3150 3151 #if defined(__lint) 3152 3153 void 3154 smap_enable(void) 3155 {} 3156 3157 void 3158 smap_disable(void) 3159 {} 3160 3161 #else 3162 3163 #if defined (__amd64) || defined(__i386) 3164 ENTRY(smap_disable) 3165 nop 3166 nop 3167 nop 3168 ret 3169 SET_SIZE(smap_disable) 3170 3171 ENTRY(smap_enable) 3172 nop 3173 nop 3174 nop 3175 ret 3176 SET_SIZE(smap_enable) 3177 3178 #endif /* __amd64 || __i386 */ 3179 3180 #endif /* __lint */ 3181 3182 #ifndef __lint 3183 3184 .data 3185 .align 4 3186 .globl _smap_enable_patch_count 3187 .type _smap_enable_patch_count,@object 3188 .size _smap_enable_patch_count, 4 3189 _smap_enable_patch_count: 3190 .long SMAP_ENABLE_COUNT 3191 3192 .globl _smap_disable_patch_count 3193 .type _smap_disable_patch_count,@object 3194 .size _smap_disable_patch_count, 4 3195 _smap_disable_patch_count: 3196 .long SMAP_DISABLE_COUNT 3197 3198 #endif /* __lint */ --- EOF ---