1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 2009, Intel Corporation 28 * All rights reserved. 29 */ 30 31 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 33 /* All Rights Reserved */ 34 35 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 36 /* All Rights Reserved */ 37 38 /* 39 * Copyright (c) 2018 Joyent, Inc. 40 */ 41 42 #include <sys/errno.h> 43 #include <sys/asm_linkage.h> 44 45 #if defined(__lint) 46 #include <sys/types.h> 47 #include <sys/systm.h> 48 #else /* __lint */ 49 #include "assym.h" 50 #endif /* __lint */ 51 52 #define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ 53 #define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ 54 /* 55 * Non-temopral access (NTA) alignment requirement 56 */ 57 #define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */ 58 #define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1) 59 #define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */ 60 #define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1) 61 62 /* 63 * With the introduction of Broadwell, Intel has introduced supervisor mode 64 * access protection -- SMAP. SMAP forces the kernel to set certain bits to 65 * enable access of user pages (AC in rflags, defines as PS_ACHK in 66 * <sys/psw.h>). One of the challenges is that the implementation of many of the 67 * userland copy routines directly use the kernel ones. For example, copyin and 68 * copyout simply go and jump to the do_copy_fault label and traditionally let 69 * those deal with the return for them. In fact, changing that is a can of frame 70 * pointers. 71 * 72 * Rules and Constraints: 73 * 74 * 1. For anything that's not in copy.s, we have it do explicit calls to the 75 * smap related code. It usually is in a position where it is able to. This is 76 * restricted to the following three places: DTrace, resume() in swtch.s and 77 * on_fault/no_fault. If you want to add it somewhere else, we should be 78 * thinking twice. 79 * 80 * 2. We try to toggle this at the smallest window possible. This means that if 81 * we take a fault, need to try to use a copyop in copyin() or copyout(), or any 82 * other function, we will always leave with SMAP enabled (the kernel cannot 83 * access user pages). 84 * 85 * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are 86 * explicitly only allowed to be called while in an on_fault()/no_fault() handler, 87 * which already takes care of ensuring that SMAP is enabled and disabled. Note 88 * this means that when under an on_fault()/no_fault() handler, one must not 89 * call the non-*_noeer() routines. 90 * 91 * 4. The first thing we should do after coming out of an lofault handler is to 92 * make sure that we call smap_enable again to ensure that we are safely 93 * protected, as more often than not, we will have disabled smap to get there. 94 * 95 * 5. The SMAP functions, smap_enable and smap_disable may not touch any 96 * registers beyond those done by the call and ret. These routines may be called 97 * from arbitrary contexts in copy.s where we have slightly more special ABIs in 98 * place. 99 * 100 * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and 101 * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and 102 * smap_disable()). If the number of these is changed, you must update the 103 * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below. 104 * 105 * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is 106 * no known technical reason preventing it from being enabled. 107 * 108 * 8. Generally this .s file is processed by a K&R style cpp. This means that it 109 * really has a lot of feelings about whitespace. In particular, if you have a 110 * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'. 111 * 112 * 9. The smap_enable and smap_disable functions should not generally be called. 113 * They exist such that DTrace and on_trap() may use them, that's it. 114 * 115 * 10. In general, the kernel has its own value for rflags that gets used. This 116 * is maintained in a few different places which vary based on how the thread 117 * comes into existence and whether it's a user thread. In general, when the 118 * kernel takes a trap, it always will set ourselves to a known set of flags, 119 * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that 120 * PS_ACHK is cleared for us. In addition, when using the sysenter instruction, 121 * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for 122 * where that gets masked off. 123 */ 124 125 /* 126 * The optimal 64-bit bcopy and kcopy for modern x86 processors uses 127 * "rep smovq" for large sizes. Performance data shows that many calls to 128 * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for 129 * these small sizes unrolled code is used. For medium sizes loops writing 130 * 64-bytes per loop are used. Transition points were determined experimentally. 131 */ 132 #define BZERO_USE_REP (1024) 133 #define BCOPY_DFLT_REP (128) 134 #define BCOPY_NHM_REP (768) 135 136 /* 137 * Copy a block of storage, returning an error code if `from' or 138 * `to' takes a kernel pagefault which cannot be resolved. 139 * Returns errno value on pagefault error, 0 if all ok 140 */ 141 142 /* 143 * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to 144 * additional call instructions. 145 */ 146 #if defined(__amd64) 147 #define SMAP_DISABLE_COUNT 16 148 #define SMAP_ENABLE_COUNT 26 149 #elif defined(__i386) 150 #define SMAP_DISABLE_COUNT 0 151 #define SMAP_ENABLE_COUNT 0 152 #endif 153 154 #define SMAP_DISABLE_INSTR(ITER) \ 155 .globl _smap_disable_patch_/**/ITER; \ 156 _smap_disable_patch_/**/ITER/**/:; \ 157 nop; nop; nop; 158 159 #define SMAP_ENABLE_INSTR(ITER) \ 160 .globl _smap_enable_patch_/**/ITER; \ 161 _smap_enable_patch_/**/ITER/**/:; \ 162 nop; nop; nop; 163 164 #if defined(__lint) 165 166 /* ARGSUSED */ 167 int 168 kcopy(const void *from, void *to, size_t count) 169 { return (0); } 170 171 #else /* __lint */ 172 173 .globl kernelbase 174 .globl postbootkernelbase 175 176 #if defined(__amd64) 177 178 ENTRY(kcopy) 179 pushq %rbp 180 movq %rsp, %rbp 181 #ifdef DEBUG 182 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 183 jb 0f 184 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 185 jnb 1f 186 0: leaq .kcopy_panic_msg(%rip), %rdi 187 xorl %eax, %eax 188 call panic 189 1: 190 #endif 191 /* 192 * pass lofault value as 4th argument to do_copy_fault 193 */ 194 leaq _kcopy_copyerr(%rip), %rcx 195 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 196 197 do_copy_fault: 198 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 199 movq %rcx, T_LOFAULT(%r9) /* new lofault */ 200 call bcopy_altentry 201 xorl %eax, %eax /* return 0 (success) */ 202 SMAP_ENABLE_INSTR(0) 203 204 /* 205 * A fault during do_copy_fault is indicated through an errno value 206 * in %rax and we iretq from the trap handler to here. 207 */ 208 _kcopy_copyerr: 209 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 210 leave 211 ret 212 SET_SIZE(kcopy) 213 214 #elif defined(__i386) 215 216 #define ARG_FROM 8 217 #define ARG_TO 12 218 #define ARG_COUNT 16 219 220 ENTRY(kcopy) 221 #ifdef DEBUG 222 pushl %ebp 223 movl %esp, %ebp 224 movl postbootkernelbase, %eax 225 cmpl %eax, ARG_FROM(%ebp) 226 jb 0f 227 cmpl %eax, ARG_TO(%ebp) 228 jnb 1f 229 0: pushl $.kcopy_panic_msg 230 call panic 231 1: popl %ebp 232 #endif 233 lea _kcopy_copyerr, %eax /* lofault value */ 234 movl %gs:CPU_THREAD, %edx 235 236 do_copy_fault: 237 pushl %ebp 238 movl %esp, %ebp /* setup stack frame */ 239 pushl %esi 240 pushl %edi /* save registers */ 241 242 movl T_LOFAULT(%edx), %edi 243 pushl %edi /* save the current lofault */ 244 movl %eax, T_LOFAULT(%edx) /* new lofault */ 245 246 movl ARG_COUNT(%ebp), %ecx 247 movl ARG_FROM(%ebp), %esi 248 movl ARG_TO(%ebp), %edi 249 shrl $2, %ecx /* word count */ 250 rep 251 smovl 252 movl ARG_COUNT(%ebp), %ecx 253 andl $3, %ecx /* bytes left over */ 254 rep 255 smovb 256 xorl %eax, %eax 257 258 /* 259 * A fault during do_copy_fault is indicated through an errno value 260 * in %eax and we iret from the trap handler to here. 261 */ 262 _kcopy_copyerr: 263 popl %ecx 264 popl %edi 265 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */ 266 popl %esi 267 popl %ebp 268 ret 269 SET_SIZE(kcopy) 270 271 #undef ARG_FROM 272 #undef ARG_TO 273 #undef ARG_COUNT 274 275 #endif /* __i386 */ 276 #endif /* __lint */ 277 278 #if defined(__lint) 279 280 /* 281 * Copy a block of storage. Similar to kcopy but uses non-temporal 282 * instructions. 283 */ 284 285 /* ARGSUSED */ 286 int 287 kcopy_nta(const void *from, void *to, size_t count, int copy_cached) 288 { return (0); } 289 290 #else /* __lint */ 291 292 #if defined(__amd64) 293 294 #define COPY_LOOP_INIT(src, dst, cnt) \ 295 addq cnt, src; \ 296 addq cnt, dst; \ 297 shrq $3, cnt; \ 298 neg cnt 299 300 /* Copy 16 bytes per loop. Uses %rax and %r8 */ 301 #define COPY_LOOP_BODY(src, dst, cnt) \ 302 prefetchnta 0x100(src, cnt, 8); \ 303 movq (src, cnt, 8), %rax; \ 304 movq 0x8(src, cnt, 8), %r8; \ 305 movnti %rax, (dst, cnt, 8); \ 306 movnti %r8, 0x8(dst, cnt, 8); \ 307 addq $2, cnt 308 309 ENTRY(kcopy_nta) 310 pushq %rbp 311 movq %rsp, %rbp 312 #ifdef DEBUG 313 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 314 jb 0f 315 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 316 jnb 1f 317 0: leaq .kcopy_panic_msg(%rip), %rdi 318 xorl %eax, %eax 319 call panic 320 1: 321 #endif 322 323 movq %gs:CPU_THREAD, %r9 324 cmpq $0, %rcx /* No non-temporal access? */ 325 /* 326 * pass lofault value as 4th argument to do_copy_fault 327 */ 328 leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */ 329 jnz do_copy_fault /* use regular access */ 330 /* 331 * Make sure cnt is >= KCOPY_MIN_SIZE 332 */ 333 cmpq $KCOPY_MIN_SIZE, %rdx 334 jb do_copy_fault 335 336 /* 337 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 338 * count is COUNT_ALIGN_SIZE aligned. 339 */ 340 movq %rdi, %r10 341 orq %rsi, %r10 342 andq $NTA_ALIGN_MASK, %r10 343 orq %rdx, %r10 344 andq $COUNT_ALIGN_MASK, %r10 345 jnz do_copy_fault 346 347 ALTENTRY(do_copy_fault_nta) 348 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 349 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 350 movq %rcx, T_LOFAULT(%r9) /* new lofault */ 351 352 /* 353 * COPY_LOOP_BODY uses %rax and %r8 354 */ 355 COPY_LOOP_INIT(%rdi, %rsi, %rdx) 356 2: COPY_LOOP_BODY(%rdi, %rsi, %rdx) 357 jnz 2b 358 359 mfence 360 xorl %eax, %eax /* return 0 (success) */ 361 SMAP_ENABLE_INSTR(1) 362 363 _kcopy_nta_copyerr: 364 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 365 leave 366 ret 367 SET_SIZE(do_copy_fault_nta) 368 SET_SIZE(kcopy_nta) 369 370 #elif defined(__i386) 371 372 #define ARG_FROM 8 373 #define ARG_TO 12 374 #define ARG_COUNT 16 375 376 #define COPY_LOOP_INIT(src, dst, cnt) \ 377 addl cnt, src; \ 378 addl cnt, dst; \ 379 shrl $3, cnt; \ 380 neg cnt 381 382 #define COPY_LOOP_BODY(src, dst, cnt) \ 383 prefetchnta 0x100(src, cnt, 8); \ 384 movl (src, cnt, 8), %esi; \ 385 movnti %esi, (dst, cnt, 8); \ 386 movl 0x4(src, cnt, 8), %esi; \ 387 movnti %esi, 0x4(dst, cnt, 8); \ 388 movl 0x8(src, cnt, 8), %esi; \ 389 movnti %esi, 0x8(dst, cnt, 8); \ 390 movl 0xc(src, cnt, 8), %esi; \ 391 movnti %esi, 0xc(dst, cnt, 8); \ 392 addl $2, cnt 393 394 /* 395 * kcopy_nta is not implemented for 32-bit as no performance 396 * improvement was shown. We simply jump directly to kcopy 397 * and discard the 4 arguments. 398 */ 399 ENTRY(kcopy_nta) 400 jmp kcopy 401 402 lea _kcopy_nta_copyerr, %eax /* lofault value */ 403 ALTENTRY(do_copy_fault_nta) 404 pushl %ebp 405 movl %esp, %ebp /* setup stack frame */ 406 pushl %esi 407 pushl %edi 408 409 movl %gs:CPU_THREAD, %edx 410 movl T_LOFAULT(%edx), %edi 411 pushl %edi /* save the current lofault */ 412 movl %eax, T_LOFAULT(%edx) /* new lofault */ 413 414 /* COPY_LOOP_BODY needs to use %esi */ 415 movl ARG_COUNT(%ebp), %ecx 416 movl ARG_FROM(%ebp), %edi 417 movl ARG_TO(%ebp), %eax 418 COPY_LOOP_INIT(%edi, %eax, %ecx) 419 1: COPY_LOOP_BODY(%edi, %eax, %ecx) 420 jnz 1b 421 mfence 422 423 xorl %eax, %eax 424 _kcopy_nta_copyerr: 425 popl %ecx 426 popl %edi 427 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */ 428 popl %esi 429 leave 430 ret 431 SET_SIZE(do_copy_fault_nta) 432 SET_SIZE(kcopy_nta) 433 434 #undef ARG_FROM 435 #undef ARG_TO 436 #undef ARG_COUNT 437 438 #endif /* __i386 */ 439 #endif /* __lint */ 440 441 #if defined(__lint) 442 443 /* ARGSUSED */ 444 void 445 bcopy(const void *from, void *to, size_t count) 446 {} 447 448 #else /* __lint */ 449 450 #if defined(__amd64) 451 452 ENTRY(bcopy) 453 #ifdef DEBUG 454 orq %rdx, %rdx /* %rdx = count */ 455 jz 1f 456 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 457 jb 0f 458 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 459 jnb 1f 460 0: leaq .bcopy_panic_msg(%rip), %rdi 461 jmp call_panic /* setup stack and call panic */ 462 1: 463 #endif 464 /* 465 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault. 466 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy 467 * uses these registers in future they must be saved and restored. 468 */ 469 ALTENTRY(bcopy_altentry) 470 do_copy: 471 #define L(s) .bcopy/**/s 472 cmpq $0x50, %rdx /* 80 */ 473 jae bcopy_ck_size 474 475 /* 476 * Performance data shows many caller's copy small buffers. So for 477 * best perf for these sizes unrolled code is used. Store data without 478 * worrying about alignment. 479 */ 480 leaq L(fwdPxQx)(%rip), %r10 481 addq %rdx, %rdi 482 addq %rdx, %rsi 483 movslq (%r10,%rdx,4), %rcx 484 leaq (%rcx,%r10,1), %r10 485 jmpq *%r10 486 487 .p2align 4 488 L(fwdPxQx): 489 .int L(P0Q0)-L(fwdPxQx) /* 0 */ 490 .int L(P1Q0)-L(fwdPxQx) 491 .int L(P2Q0)-L(fwdPxQx) 492 .int L(P3Q0)-L(fwdPxQx) 493 .int L(P4Q0)-L(fwdPxQx) 494 .int L(P5Q0)-L(fwdPxQx) 495 .int L(P6Q0)-L(fwdPxQx) 496 .int L(P7Q0)-L(fwdPxQx) 497 498 .int L(P0Q1)-L(fwdPxQx) /* 8 */ 499 .int L(P1Q1)-L(fwdPxQx) 500 .int L(P2Q1)-L(fwdPxQx) 501 .int L(P3Q1)-L(fwdPxQx) 502 .int L(P4Q1)-L(fwdPxQx) 503 .int L(P5Q1)-L(fwdPxQx) 504 .int L(P6Q1)-L(fwdPxQx) 505 .int L(P7Q1)-L(fwdPxQx) 506 507 .int L(P0Q2)-L(fwdPxQx) /* 16 */ 508 .int L(P1Q2)-L(fwdPxQx) 509 .int L(P2Q2)-L(fwdPxQx) 510 .int L(P3Q2)-L(fwdPxQx) 511 .int L(P4Q2)-L(fwdPxQx) 512 .int L(P5Q2)-L(fwdPxQx) 513 .int L(P6Q2)-L(fwdPxQx) 514 .int L(P7Q2)-L(fwdPxQx) 515 516 .int L(P0Q3)-L(fwdPxQx) /* 24 */ 517 .int L(P1Q3)-L(fwdPxQx) 518 .int L(P2Q3)-L(fwdPxQx) 519 .int L(P3Q3)-L(fwdPxQx) 520 .int L(P4Q3)-L(fwdPxQx) 521 .int L(P5Q3)-L(fwdPxQx) 522 .int L(P6Q3)-L(fwdPxQx) 523 .int L(P7Q3)-L(fwdPxQx) 524 525 .int L(P0Q4)-L(fwdPxQx) /* 32 */ 526 .int L(P1Q4)-L(fwdPxQx) 527 .int L(P2Q4)-L(fwdPxQx) 528 .int L(P3Q4)-L(fwdPxQx) 529 .int L(P4Q4)-L(fwdPxQx) 530 .int L(P5Q4)-L(fwdPxQx) 531 .int L(P6Q4)-L(fwdPxQx) 532 .int L(P7Q4)-L(fwdPxQx) 533 534 .int L(P0Q5)-L(fwdPxQx) /* 40 */ 535 .int L(P1Q5)-L(fwdPxQx) 536 .int L(P2Q5)-L(fwdPxQx) 537 .int L(P3Q5)-L(fwdPxQx) 538 .int L(P4Q5)-L(fwdPxQx) 539 .int L(P5Q5)-L(fwdPxQx) 540 .int L(P6Q5)-L(fwdPxQx) 541 .int L(P7Q5)-L(fwdPxQx) 542 543 .int L(P0Q6)-L(fwdPxQx) /* 48 */ 544 .int L(P1Q6)-L(fwdPxQx) 545 .int L(P2Q6)-L(fwdPxQx) 546 .int L(P3Q6)-L(fwdPxQx) 547 .int L(P4Q6)-L(fwdPxQx) 548 .int L(P5Q6)-L(fwdPxQx) 549 .int L(P6Q6)-L(fwdPxQx) 550 .int L(P7Q6)-L(fwdPxQx) 551 552 .int L(P0Q7)-L(fwdPxQx) /* 56 */ 553 .int L(P1Q7)-L(fwdPxQx) 554 .int L(P2Q7)-L(fwdPxQx) 555 .int L(P3Q7)-L(fwdPxQx) 556 .int L(P4Q7)-L(fwdPxQx) 557 .int L(P5Q7)-L(fwdPxQx) 558 .int L(P6Q7)-L(fwdPxQx) 559 .int L(P7Q7)-L(fwdPxQx) 560 561 .int L(P0Q8)-L(fwdPxQx) /* 64 */ 562 .int L(P1Q8)-L(fwdPxQx) 563 .int L(P2Q8)-L(fwdPxQx) 564 .int L(P3Q8)-L(fwdPxQx) 565 .int L(P4Q8)-L(fwdPxQx) 566 .int L(P5Q8)-L(fwdPxQx) 567 .int L(P6Q8)-L(fwdPxQx) 568 .int L(P7Q8)-L(fwdPxQx) 569 570 .int L(P0Q9)-L(fwdPxQx) /* 72 */ 571 .int L(P1Q9)-L(fwdPxQx) 572 .int L(P2Q9)-L(fwdPxQx) 573 .int L(P3Q9)-L(fwdPxQx) 574 .int L(P4Q9)-L(fwdPxQx) 575 .int L(P5Q9)-L(fwdPxQx) 576 .int L(P6Q9)-L(fwdPxQx) 577 .int L(P7Q9)-L(fwdPxQx) /* 79 */ 578 579 .p2align 4 580 L(P0Q9): 581 mov -0x48(%rdi), %rcx 582 mov %rcx, -0x48(%rsi) 583 L(P0Q8): 584 mov -0x40(%rdi), %r10 585 mov %r10, -0x40(%rsi) 586 L(P0Q7): 587 mov -0x38(%rdi), %r8 588 mov %r8, -0x38(%rsi) 589 L(P0Q6): 590 mov -0x30(%rdi), %rcx 591 mov %rcx, -0x30(%rsi) 592 L(P0Q5): 593 mov -0x28(%rdi), %r10 594 mov %r10, -0x28(%rsi) 595 L(P0Q4): 596 mov -0x20(%rdi), %r8 597 mov %r8, -0x20(%rsi) 598 L(P0Q3): 599 mov -0x18(%rdi), %rcx 600 mov %rcx, -0x18(%rsi) 601 L(P0Q2): 602 mov -0x10(%rdi), %r10 603 mov %r10, -0x10(%rsi) 604 L(P0Q1): 605 mov -0x8(%rdi), %r8 606 mov %r8, -0x8(%rsi) 607 L(P0Q0): 608 ret 609 610 .p2align 4 611 L(P1Q9): 612 mov -0x49(%rdi), %r8 613 mov %r8, -0x49(%rsi) 614 L(P1Q8): 615 mov -0x41(%rdi), %rcx 616 mov %rcx, -0x41(%rsi) 617 L(P1Q7): 618 mov -0x39(%rdi), %r10 619 mov %r10, -0x39(%rsi) 620 L(P1Q6): 621 mov -0x31(%rdi), %r8 622 mov %r8, -0x31(%rsi) 623 L(P1Q5): 624 mov -0x29(%rdi), %rcx 625 mov %rcx, -0x29(%rsi) 626 L(P1Q4): 627 mov -0x21(%rdi), %r10 628 mov %r10, -0x21(%rsi) 629 L(P1Q3): 630 mov -0x19(%rdi), %r8 631 mov %r8, -0x19(%rsi) 632 L(P1Q2): 633 mov -0x11(%rdi), %rcx 634 mov %rcx, -0x11(%rsi) 635 L(P1Q1): 636 mov -0x9(%rdi), %r10 637 mov %r10, -0x9(%rsi) 638 L(P1Q0): 639 movzbq -0x1(%rdi), %r8 640 mov %r8b, -0x1(%rsi) 641 ret 642 643 .p2align 4 644 L(P2Q9): 645 mov -0x4a(%rdi), %r8 646 mov %r8, -0x4a(%rsi) 647 L(P2Q8): 648 mov -0x42(%rdi), %rcx 649 mov %rcx, -0x42(%rsi) 650 L(P2Q7): 651 mov -0x3a(%rdi), %r10 652 mov %r10, -0x3a(%rsi) 653 L(P2Q6): 654 mov -0x32(%rdi), %r8 655 mov %r8, -0x32(%rsi) 656 L(P2Q5): 657 mov -0x2a(%rdi), %rcx 658 mov %rcx, -0x2a(%rsi) 659 L(P2Q4): 660 mov -0x22(%rdi), %r10 661 mov %r10, -0x22(%rsi) 662 L(P2Q3): 663 mov -0x1a(%rdi), %r8 664 mov %r8, -0x1a(%rsi) 665 L(P2Q2): 666 mov -0x12(%rdi), %rcx 667 mov %rcx, -0x12(%rsi) 668 L(P2Q1): 669 mov -0xa(%rdi), %r10 670 mov %r10, -0xa(%rsi) 671 L(P2Q0): 672 movzwq -0x2(%rdi), %r8 673 mov %r8w, -0x2(%rsi) 674 ret 675 676 .p2align 4 677 L(P3Q9): 678 mov -0x4b(%rdi), %r8 679 mov %r8, -0x4b(%rsi) 680 L(P3Q8): 681 mov -0x43(%rdi), %rcx 682 mov %rcx, -0x43(%rsi) 683 L(P3Q7): 684 mov -0x3b(%rdi), %r10 685 mov %r10, -0x3b(%rsi) 686 L(P3Q6): 687 mov -0x33(%rdi), %r8 688 mov %r8, -0x33(%rsi) 689 L(P3Q5): 690 mov -0x2b(%rdi), %rcx 691 mov %rcx, -0x2b(%rsi) 692 L(P3Q4): 693 mov -0x23(%rdi), %r10 694 mov %r10, -0x23(%rsi) 695 L(P3Q3): 696 mov -0x1b(%rdi), %r8 697 mov %r8, -0x1b(%rsi) 698 L(P3Q2): 699 mov -0x13(%rdi), %rcx 700 mov %rcx, -0x13(%rsi) 701 L(P3Q1): 702 mov -0xb(%rdi), %r10 703 mov %r10, -0xb(%rsi) 704 /* 705 * These trailing loads/stores have to do all their loads 1st, 706 * then do the stores. 707 */ 708 L(P3Q0): 709 movzwq -0x3(%rdi), %r8 710 movzbq -0x1(%rdi), %r10 711 mov %r8w, -0x3(%rsi) 712 mov %r10b, -0x1(%rsi) 713 ret 714 715 .p2align 4 716 L(P4Q9): 717 mov -0x4c(%rdi), %r8 718 mov %r8, -0x4c(%rsi) 719 L(P4Q8): 720 mov -0x44(%rdi), %rcx 721 mov %rcx, -0x44(%rsi) 722 L(P4Q7): 723 mov -0x3c(%rdi), %r10 724 mov %r10, -0x3c(%rsi) 725 L(P4Q6): 726 mov -0x34(%rdi), %r8 727 mov %r8, -0x34(%rsi) 728 L(P4Q5): 729 mov -0x2c(%rdi), %rcx 730 mov %rcx, -0x2c(%rsi) 731 L(P4Q4): 732 mov -0x24(%rdi), %r10 733 mov %r10, -0x24(%rsi) 734 L(P4Q3): 735 mov -0x1c(%rdi), %r8 736 mov %r8, -0x1c(%rsi) 737 L(P4Q2): 738 mov -0x14(%rdi), %rcx 739 mov %rcx, -0x14(%rsi) 740 L(P4Q1): 741 mov -0xc(%rdi), %r10 742 mov %r10, -0xc(%rsi) 743 L(P4Q0): 744 mov -0x4(%rdi), %r8d 745 mov %r8d, -0x4(%rsi) 746 ret 747 748 .p2align 4 749 L(P5Q9): 750 mov -0x4d(%rdi), %r8 751 mov %r8, -0x4d(%rsi) 752 L(P5Q8): 753 mov -0x45(%rdi), %rcx 754 mov %rcx, -0x45(%rsi) 755 L(P5Q7): 756 mov -0x3d(%rdi), %r10 757 mov %r10, -0x3d(%rsi) 758 L(P5Q6): 759 mov -0x35(%rdi), %r8 760 mov %r8, -0x35(%rsi) 761 L(P5Q5): 762 mov -0x2d(%rdi), %rcx 763 mov %rcx, -0x2d(%rsi) 764 L(P5Q4): 765 mov -0x25(%rdi), %r10 766 mov %r10, -0x25(%rsi) 767 L(P5Q3): 768 mov -0x1d(%rdi), %r8 769 mov %r8, -0x1d(%rsi) 770 L(P5Q2): 771 mov -0x15(%rdi), %rcx 772 mov %rcx, -0x15(%rsi) 773 L(P5Q1): 774 mov -0xd(%rdi), %r10 775 mov %r10, -0xd(%rsi) 776 L(P5Q0): 777 mov -0x5(%rdi), %r8d 778 movzbq -0x1(%rdi), %r10 779 mov %r8d, -0x5(%rsi) 780 mov %r10b, -0x1(%rsi) 781 ret 782 783 .p2align 4 784 L(P6Q9): 785 mov -0x4e(%rdi), %r8 786 mov %r8, -0x4e(%rsi) 787 L(P6Q8): 788 mov -0x46(%rdi), %rcx 789 mov %rcx, -0x46(%rsi) 790 L(P6Q7): 791 mov -0x3e(%rdi), %r10 792 mov %r10, -0x3e(%rsi) 793 L(P6Q6): 794 mov -0x36(%rdi), %r8 795 mov %r8, -0x36(%rsi) 796 L(P6Q5): 797 mov -0x2e(%rdi), %rcx 798 mov %rcx, -0x2e(%rsi) 799 L(P6Q4): 800 mov -0x26(%rdi), %r10 801 mov %r10, -0x26(%rsi) 802 L(P6Q3): 803 mov -0x1e(%rdi), %r8 804 mov %r8, -0x1e(%rsi) 805 L(P6Q2): 806 mov -0x16(%rdi), %rcx 807 mov %rcx, -0x16(%rsi) 808 L(P6Q1): 809 mov -0xe(%rdi), %r10 810 mov %r10, -0xe(%rsi) 811 L(P6Q0): 812 mov -0x6(%rdi), %r8d 813 movzwq -0x2(%rdi), %r10 814 mov %r8d, -0x6(%rsi) 815 mov %r10w, -0x2(%rsi) 816 ret 817 818 .p2align 4 819 L(P7Q9): 820 mov -0x4f(%rdi), %r8 821 mov %r8, -0x4f(%rsi) 822 L(P7Q8): 823 mov -0x47(%rdi), %rcx 824 mov %rcx, -0x47(%rsi) 825 L(P7Q7): 826 mov -0x3f(%rdi), %r10 827 mov %r10, -0x3f(%rsi) 828 L(P7Q6): 829 mov -0x37(%rdi), %r8 830 mov %r8, -0x37(%rsi) 831 L(P7Q5): 832 mov -0x2f(%rdi), %rcx 833 mov %rcx, -0x2f(%rsi) 834 L(P7Q4): 835 mov -0x27(%rdi), %r10 836 mov %r10, -0x27(%rsi) 837 L(P7Q3): 838 mov -0x1f(%rdi), %r8 839 mov %r8, -0x1f(%rsi) 840 L(P7Q2): 841 mov -0x17(%rdi), %rcx 842 mov %rcx, -0x17(%rsi) 843 L(P7Q1): 844 mov -0xf(%rdi), %r10 845 mov %r10, -0xf(%rsi) 846 L(P7Q0): 847 mov -0x7(%rdi), %r8d 848 movzwq -0x3(%rdi), %r10 849 movzbq -0x1(%rdi), %rcx 850 mov %r8d, -0x7(%rsi) 851 mov %r10w, -0x3(%rsi) 852 mov %cl, -0x1(%rsi) 853 ret 854 855 /* 856 * For large sizes rep smovq is fastest. 857 * Transition point determined experimentally as measured on 858 * Intel Xeon processors (incl. Nehalem and previous generations) and 859 * AMD Opteron. The transition value is patched at boot time to avoid 860 * memory reference hit. 861 */ 862 .globl bcopy_patch_start 863 bcopy_patch_start: 864 cmpq $BCOPY_NHM_REP, %rdx 865 .globl bcopy_patch_end 866 bcopy_patch_end: 867 868 .p2align 4 869 ALTENTRY(bcopy_ck_size) 870 871 cmpq $BCOPY_DFLT_REP, %rdx 872 jae L(use_rep) 873 874 /* 875 * Align to a 8-byte boundary. Avoids penalties from unaligned stores 876 * as well as from stores spanning cachelines. 877 */ 878 test $0x7, %rsi 879 jz L(aligned_loop) 880 test $0x1, %rsi 881 jz 2f 882 movzbq (%rdi), %r8 883 dec %rdx 884 inc %rdi 885 mov %r8b, (%rsi) 886 inc %rsi 887 2: 888 test $0x2, %rsi 889 jz 4f 890 movzwq (%rdi), %r8 891 sub $0x2, %rdx 892 add $0x2, %rdi 893 mov %r8w, (%rsi) 894 add $0x2, %rsi 895 4: 896 test $0x4, %rsi 897 jz L(aligned_loop) 898 mov (%rdi), %r8d 899 sub $0x4, %rdx 900 add $0x4, %rdi 901 mov %r8d, (%rsi) 902 add $0x4, %rsi 903 904 /* 905 * Copy 64-bytes per loop 906 */ 907 .p2align 4 908 L(aligned_loop): 909 mov (%rdi), %r8 910 mov 0x8(%rdi), %r10 911 lea -0x40(%rdx), %rdx 912 mov %r8, (%rsi) 913 mov %r10, 0x8(%rsi) 914 mov 0x10(%rdi), %rcx 915 mov 0x18(%rdi), %r8 916 mov %rcx, 0x10(%rsi) 917 mov %r8, 0x18(%rsi) 918 919 cmp $0x40, %rdx 920 mov 0x20(%rdi), %r10 921 mov 0x28(%rdi), %rcx 922 mov %r10, 0x20(%rsi) 923 mov %rcx, 0x28(%rsi) 924 mov 0x30(%rdi), %r8 925 mov 0x38(%rdi), %r10 926 lea 0x40(%rdi), %rdi 927 mov %r8, 0x30(%rsi) 928 mov %r10, 0x38(%rsi) 929 lea 0x40(%rsi), %rsi 930 jae L(aligned_loop) 931 932 /* 933 * Copy remaining bytes (0-63) 934 */ 935 L(do_remainder): 936 leaq L(fwdPxQx)(%rip), %r10 937 addq %rdx, %rdi 938 addq %rdx, %rsi 939 movslq (%r10,%rdx,4), %rcx 940 leaq (%rcx,%r10,1), %r10 941 jmpq *%r10 942 943 /* 944 * Use rep smovq. Clear remainder via unrolled code 945 */ 946 .p2align 4 947 L(use_rep): 948 xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */ 949 movq %rdx, %rcx /* %rcx = count */ 950 shrq $3, %rcx /* 8-byte word count */ 951 rep 952 smovq 953 954 xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */ 955 andq $7, %rdx /* remainder */ 956 jnz L(do_remainder) 957 ret 958 #undef L 959 SET_SIZE(bcopy_ck_size) 960 961 #ifdef DEBUG 962 /* 963 * Setup frame on the run-time stack. The end of the input argument 964 * area must be aligned on a 16 byte boundary. The stack pointer %rsp, 965 * always points to the end of the latest allocated stack frame. 966 * panic(const char *format, ...) is a varargs function. When a 967 * function taking variable arguments is called, %rax must be set 968 * to eight times the number of floating point parameters passed 969 * to the function in SSE registers. 970 */ 971 call_panic: 972 pushq %rbp /* align stack properly */ 973 movq %rsp, %rbp 974 xorl %eax, %eax /* no variable arguments */ 975 call panic /* %rdi = format string */ 976 #endif 977 SET_SIZE(bcopy_altentry) 978 SET_SIZE(bcopy) 979 980 #elif defined(__i386) 981 982 #define ARG_FROM 4 983 #define ARG_TO 8 984 #define ARG_COUNT 12 985 986 ENTRY(bcopy) 987 #ifdef DEBUG 988 movl ARG_COUNT(%esp), %eax 989 orl %eax, %eax 990 jz 1f 991 movl postbootkernelbase, %eax 992 cmpl %eax, ARG_FROM(%esp) 993 jb 0f 994 cmpl %eax, ARG_TO(%esp) 995 jnb 1f 996 0: pushl %ebp 997 movl %esp, %ebp 998 pushl $.bcopy_panic_msg 999 call panic 1000 1: 1001 #endif 1002 do_copy: 1003 movl %esi, %eax /* save registers */ 1004 movl %edi, %edx 1005 movl ARG_COUNT(%esp), %ecx 1006 movl ARG_FROM(%esp), %esi 1007 movl ARG_TO(%esp), %edi 1008 1009 shrl $2, %ecx /* word count */ 1010 rep 1011 smovl 1012 movl ARG_COUNT(%esp), %ecx 1013 andl $3, %ecx /* bytes left over */ 1014 rep 1015 smovb 1016 movl %eax, %esi /* restore registers */ 1017 movl %edx, %edi 1018 ret 1019 SET_SIZE(bcopy) 1020 1021 #undef ARG_COUNT 1022 #undef ARG_FROM 1023 #undef ARG_TO 1024 1025 #endif /* __i386 */ 1026 #endif /* __lint */ 1027 1028 1029 /* 1030 * Zero a block of storage, returning an error code if we 1031 * take a kernel pagefault which cannot be resolved. 1032 * Returns errno value on pagefault error, 0 if all ok 1033 */ 1034 1035 #if defined(__lint) 1036 1037 /* ARGSUSED */ 1038 int 1039 kzero(void *addr, size_t count) 1040 { return (0); } 1041 1042 #else /* __lint */ 1043 1044 #if defined(__amd64) 1045 1046 ENTRY(kzero) 1047 #ifdef DEBUG 1048 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ 1049 jnb 0f 1050 leaq .kzero_panic_msg(%rip), %rdi 1051 jmp call_panic /* setup stack and call panic */ 1052 0: 1053 #endif 1054 /* 1055 * pass lofault value as 3rd argument for fault return 1056 */ 1057 leaq _kzeroerr(%rip), %rdx 1058 1059 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 1060 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 1061 movq %rdx, T_LOFAULT(%r9) /* new lofault */ 1062 call bzero_altentry 1063 xorl %eax, %eax 1064 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 1065 ret 1066 /* 1067 * A fault during bzero is indicated through an errno value 1068 * in %rax when we iretq to here. 1069 */ 1070 _kzeroerr: 1071 addq $8, %rsp /* pop bzero_altentry call ret addr */ 1072 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 1073 ret 1074 SET_SIZE(kzero) 1075 1076 #elif defined(__i386) 1077 1078 #define ARG_ADDR 8 1079 #define ARG_COUNT 12 1080 1081 ENTRY(kzero) 1082 #ifdef DEBUG 1083 pushl %ebp 1084 movl %esp, %ebp 1085 movl postbootkernelbase, %eax 1086 cmpl %eax, ARG_ADDR(%ebp) 1087 jnb 0f 1088 pushl $.kzero_panic_msg 1089 call panic 1090 0: popl %ebp 1091 #endif 1092 lea _kzeroerr, %eax /* kzeroerr is lofault value */ 1093 1094 pushl %ebp /* save stack base */ 1095 movl %esp, %ebp /* set new stack base */ 1096 pushl %edi /* save %edi */ 1097 1098 mov %gs:CPU_THREAD, %edx 1099 movl T_LOFAULT(%edx), %edi 1100 pushl %edi /* save the current lofault */ 1101 movl %eax, T_LOFAULT(%edx) /* new lofault */ 1102 1103 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */ 1104 movl ARG_ADDR(%ebp), %edi /* %edi <- address of bytes to clear */ 1105 shrl $2, %ecx /* Count of double words to zero */ 1106 xorl %eax, %eax /* sstol val */ 1107 rep 1108 sstol /* %ecx contains words to clear (%eax=0) */ 1109 1110 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */ 1111 andl $3, %ecx /* do mod 4 */ 1112 rep 1113 sstob /* %ecx contains residual bytes to clear */ 1114 1115 /* 1116 * A fault during kzero is indicated through an errno value 1117 * in %eax when we iret to here. 1118 */ 1119 _kzeroerr: 1120 popl %edi 1121 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */ 1122 popl %edi 1123 popl %ebp 1124 ret 1125 SET_SIZE(kzero) 1126 1127 #undef ARG_ADDR 1128 #undef ARG_COUNT 1129 1130 #endif /* __i386 */ 1131 #endif /* __lint */ 1132 1133 /* 1134 * Zero a block of storage. 1135 */ 1136 1137 #if defined(__lint) 1138 1139 /* ARGSUSED */ 1140 void 1141 bzero(void *addr, size_t count) 1142 {} 1143 1144 #else /* __lint */ 1145 1146 #if defined(__amd64) 1147 1148 ENTRY(bzero) 1149 #ifdef DEBUG 1150 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ 1151 jnb 0f 1152 leaq .bzero_panic_msg(%rip), %rdi 1153 jmp call_panic /* setup stack and call panic */ 1154 0: 1155 #endif 1156 ALTENTRY(bzero_altentry) 1157 do_zero: 1158 #define L(s) .bzero/**/s 1159 xorl %eax, %eax 1160 1161 cmpq $0x50, %rsi /* 80 */ 1162 jae L(ck_align) 1163 1164 /* 1165 * Performance data shows many caller's are zeroing small buffers. So 1166 * for best perf for these sizes unrolled code is used. Store zeros 1167 * without worrying about alignment. 1168 */ 1169 leaq L(setPxQx)(%rip), %r10 1170 addq %rsi, %rdi 1171 movslq (%r10,%rsi,4), %rcx 1172 leaq (%rcx,%r10,1), %r10 1173 jmpq *%r10 1174 1175 .p2align 4 1176 L(setPxQx): 1177 .int L(P0Q0)-L(setPxQx) /* 0 */ 1178 .int L(P1Q0)-L(setPxQx) 1179 .int L(P2Q0)-L(setPxQx) 1180 .int L(P3Q0)-L(setPxQx) 1181 .int L(P4Q0)-L(setPxQx) 1182 .int L(P5Q0)-L(setPxQx) 1183 .int L(P6Q0)-L(setPxQx) 1184 .int L(P7Q0)-L(setPxQx) 1185 1186 .int L(P0Q1)-L(setPxQx) /* 8 */ 1187 .int L(P1Q1)-L(setPxQx) 1188 .int L(P2Q1)-L(setPxQx) 1189 .int L(P3Q1)-L(setPxQx) 1190 .int L(P4Q1)-L(setPxQx) 1191 .int L(P5Q1)-L(setPxQx) 1192 .int L(P6Q1)-L(setPxQx) 1193 .int L(P7Q1)-L(setPxQx) 1194 1195 .int L(P0Q2)-L(setPxQx) /* 16 */ 1196 .int L(P1Q2)-L(setPxQx) 1197 .int L(P2Q2)-L(setPxQx) 1198 .int L(P3Q2)-L(setPxQx) 1199 .int L(P4Q2)-L(setPxQx) 1200 .int L(P5Q2)-L(setPxQx) 1201 .int L(P6Q2)-L(setPxQx) 1202 .int L(P7Q2)-L(setPxQx) 1203 1204 .int L(P0Q3)-L(setPxQx) /* 24 */ 1205 .int L(P1Q3)-L(setPxQx) 1206 .int L(P2Q3)-L(setPxQx) 1207 .int L(P3Q3)-L(setPxQx) 1208 .int L(P4Q3)-L(setPxQx) 1209 .int L(P5Q3)-L(setPxQx) 1210 .int L(P6Q3)-L(setPxQx) 1211 .int L(P7Q3)-L(setPxQx) 1212 1213 .int L(P0Q4)-L(setPxQx) /* 32 */ 1214 .int L(P1Q4)-L(setPxQx) 1215 .int L(P2Q4)-L(setPxQx) 1216 .int L(P3Q4)-L(setPxQx) 1217 .int L(P4Q4)-L(setPxQx) 1218 .int L(P5Q4)-L(setPxQx) 1219 .int L(P6Q4)-L(setPxQx) 1220 .int L(P7Q4)-L(setPxQx) 1221 1222 .int L(P0Q5)-L(setPxQx) /* 40 */ 1223 .int L(P1Q5)-L(setPxQx) 1224 .int L(P2Q5)-L(setPxQx) 1225 .int L(P3Q5)-L(setPxQx) 1226 .int L(P4Q5)-L(setPxQx) 1227 .int L(P5Q5)-L(setPxQx) 1228 .int L(P6Q5)-L(setPxQx) 1229 .int L(P7Q5)-L(setPxQx) 1230 1231 .int L(P0Q6)-L(setPxQx) /* 48 */ 1232 .int L(P1Q6)-L(setPxQx) 1233 .int L(P2Q6)-L(setPxQx) 1234 .int L(P3Q6)-L(setPxQx) 1235 .int L(P4Q6)-L(setPxQx) 1236 .int L(P5Q6)-L(setPxQx) 1237 .int L(P6Q6)-L(setPxQx) 1238 .int L(P7Q6)-L(setPxQx) 1239 1240 .int L(P0Q7)-L(setPxQx) /* 56 */ 1241 .int L(P1Q7)-L(setPxQx) 1242 .int L(P2Q7)-L(setPxQx) 1243 .int L(P3Q7)-L(setPxQx) 1244 .int L(P4Q7)-L(setPxQx) 1245 .int L(P5Q7)-L(setPxQx) 1246 .int L(P6Q7)-L(setPxQx) 1247 .int L(P7Q7)-L(setPxQx) 1248 1249 .int L(P0Q8)-L(setPxQx) /* 64 */ 1250 .int L(P1Q8)-L(setPxQx) 1251 .int L(P2Q8)-L(setPxQx) 1252 .int L(P3Q8)-L(setPxQx) 1253 .int L(P4Q8)-L(setPxQx) 1254 .int L(P5Q8)-L(setPxQx) 1255 .int L(P6Q8)-L(setPxQx) 1256 .int L(P7Q8)-L(setPxQx) 1257 1258 .int L(P0Q9)-L(setPxQx) /* 72 */ 1259 .int L(P1Q9)-L(setPxQx) 1260 .int L(P2Q9)-L(setPxQx) 1261 .int L(P3Q9)-L(setPxQx) 1262 .int L(P4Q9)-L(setPxQx) 1263 .int L(P5Q9)-L(setPxQx) 1264 .int L(P6Q9)-L(setPxQx) 1265 .int L(P7Q9)-L(setPxQx) /* 79 */ 1266 1267 .p2align 4 1268 L(P0Q9): mov %rax, -0x48(%rdi) 1269 L(P0Q8): mov %rax, -0x40(%rdi) 1270 L(P0Q7): mov %rax, -0x38(%rdi) 1271 L(P0Q6): mov %rax, -0x30(%rdi) 1272 L(P0Q5): mov %rax, -0x28(%rdi) 1273 L(P0Q4): mov %rax, -0x20(%rdi) 1274 L(P0Q3): mov %rax, -0x18(%rdi) 1275 L(P0Q2): mov %rax, -0x10(%rdi) 1276 L(P0Q1): mov %rax, -0x8(%rdi) 1277 L(P0Q0): 1278 ret 1279 1280 .p2align 4 1281 L(P1Q9): mov %rax, -0x49(%rdi) 1282 L(P1Q8): mov %rax, -0x41(%rdi) 1283 L(P1Q7): mov %rax, -0x39(%rdi) 1284 L(P1Q6): mov %rax, -0x31(%rdi) 1285 L(P1Q5): mov %rax, -0x29(%rdi) 1286 L(P1Q4): mov %rax, -0x21(%rdi) 1287 L(P1Q3): mov %rax, -0x19(%rdi) 1288 L(P1Q2): mov %rax, -0x11(%rdi) 1289 L(P1Q1): mov %rax, -0x9(%rdi) 1290 L(P1Q0): mov %al, -0x1(%rdi) 1291 ret 1292 1293 .p2align 4 1294 L(P2Q9): mov %rax, -0x4a(%rdi) 1295 L(P2Q8): mov %rax, -0x42(%rdi) 1296 L(P2Q7): mov %rax, -0x3a(%rdi) 1297 L(P2Q6): mov %rax, -0x32(%rdi) 1298 L(P2Q5): mov %rax, -0x2a(%rdi) 1299 L(P2Q4): mov %rax, -0x22(%rdi) 1300 L(P2Q3): mov %rax, -0x1a(%rdi) 1301 L(P2Q2): mov %rax, -0x12(%rdi) 1302 L(P2Q1): mov %rax, -0xa(%rdi) 1303 L(P2Q0): mov %ax, -0x2(%rdi) 1304 ret 1305 1306 .p2align 4 1307 L(P3Q9): mov %rax, -0x4b(%rdi) 1308 L(P3Q8): mov %rax, -0x43(%rdi) 1309 L(P3Q7): mov %rax, -0x3b(%rdi) 1310 L(P3Q6): mov %rax, -0x33(%rdi) 1311 L(P3Q5): mov %rax, -0x2b(%rdi) 1312 L(P3Q4): mov %rax, -0x23(%rdi) 1313 L(P3Q3): mov %rax, -0x1b(%rdi) 1314 L(P3Q2): mov %rax, -0x13(%rdi) 1315 L(P3Q1): mov %rax, -0xb(%rdi) 1316 L(P3Q0): mov %ax, -0x3(%rdi) 1317 mov %al, -0x1(%rdi) 1318 ret 1319 1320 .p2align 4 1321 L(P4Q9): mov %rax, -0x4c(%rdi) 1322 L(P4Q8): mov %rax, -0x44(%rdi) 1323 L(P4Q7): mov %rax, -0x3c(%rdi) 1324 L(P4Q6): mov %rax, -0x34(%rdi) 1325 L(P4Q5): mov %rax, -0x2c(%rdi) 1326 L(P4Q4): mov %rax, -0x24(%rdi) 1327 L(P4Q3): mov %rax, -0x1c(%rdi) 1328 L(P4Q2): mov %rax, -0x14(%rdi) 1329 L(P4Q1): mov %rax, -0xc(%rdi) 1330 L(P4Q0): mov %eax, -0x4(%rdi) 1331 ret 1332 1333 .p2align 4 1334 L(P5Q9): mov %rax, -0x4d(%rdi) 1335 L(P5Q8): mov %rax, -0x45(%rdi) 1336 L(P5Q7): mov %rax, -0x3d(%rdi) 1337 L(P5Q6): mov %rax, -0x35(%rdi) 1338 L(P5Q5): mov %rax, -0x2d(%rdi) 1339 L(P5Q4): mov %rax, -0x25(%rdi) 1340 L(P5Q3): mov %rax, -0x1d(%rdi) 1341 L(P5Q2): mov %rax, -0x15(%rdi) 1342 L(P5Q1): mov %rax, -0xd(%rdi) 1343 L(P5Q0): mov %eax, -0x5(%rdi) 1344 mov %al, -0x1(%rdi) 1345 ret 1346 1347 .p2align 4 1348 L(P6Q9): mov %rax, -0x4e(%rdi) 1349 L(P6Q8): mov %rax, -0x46(%rdi) 1350 L(P6Q7): mov %rax, -0x3e(%rdi) 1351 L(P6Q6): mov %rax, -0x36(%rdi) 1352 L(P6Q5): mov %rax, -0x2e(%rdi) 1353 L(P6Q4): mov %rax, -0x26(%rdi) 1354 L(P6Q3): mov %rax, -0x1e(%rdi) 1355 L(P6Q2): mov %rax, -0x16(%rdi) 1356 L(P6Q1): mov %rax, -0xe(%rdi) 1357 L(P6Q0): mov %eax, -0x6(%rdi) 1358 mov %ax, -0x2(%rdi) 1359 ret 1360 1361 .p2align 4 1362 L(P7Q9): mov %rax, -0x4f(%rdi) 1363 L(P7Q8): mov %rax, -0x47(%rdi) 1364 L(P7Q7): mov %rax, -0x3f(%rdi) 1365 L(P7Q6): mov %rax, -0x37(%rdi) 1366 L(P7Q5): mov %rax, -0x2f(%rdi) 1367 L(P7Q4): mov %rax, -0x27(%rdi) 1368 L(P7Q3): mov %rax, -0x1f(%rdi) 1369 L(P7Q2): mov %rax, -0x17(%rdi) 1370 L(P7Q1): mov %rax, -0xf(%rdi) 1371 L(P7Q0): mov %eax, -0x7(%rdi) 1372 mov %ax, -0x3(%rdi) 1373 mov %al, -0x1(%rdi) 1374 ret 1375 1376 /* 1377 * Align to a 16-byte boundary. Avoids penalties from unaligned stores 1378 * as well as from stores spanning cachelines. Note 16-byte alignment 1379 * is better in case where rep sstosq is used. 1380 */ 1381 .p2align 4 1382 L(ck_align): 1383 test $0xf, %rdi 1384 jz L(aligned_now) 1385 test $1, %rdi 1386 jz 2f 1387 mov %al, (%rdi) 1388 dec %rsi 1389 lea 1(%rdi),%rdi 1390 2: 1391 test $2, %rdi 1392 jz 4f 1393 mov %ax, (%rdi) 1394 sub $2, %rsi 1395 lea 2(%rdi),%rdi 1396 4: 1397 test $4, %rdi 1398 jz 8f 1399 mov %eax, (%rdi) 1400 sub $4, %rsi 1401 lea 4(%rdi),%rdi 1402 8: 1403 test $8, %rdi 1404 jz L(aligned_now) 1405 mov %rax, (%rdi) 1406 sub $8, %rsi 1407 lea 8(%rdi),%rdi 1408 1409 /* 1410 * For large sizes rep sstoq is fastest. 1411 * Transition point determined experimentally as measured on 1412 * Intel Xeon processors (incl. Nehalem) and AMD Opteron. 1413 */ 1414 L(aligned_now): 1415 cmp $BZERO_USE_REP, %rsi 1416 ja L(use_rep) 1417 1418 /* 1419 * zero 64-bytes per loop 1420 */ 1421 .p2align 4 1422 L(bzero_loop): 1423 leaq -0x40(%rsi), %rsi 1424 cmpq $0x40, %rsi 1425 movq %rax, (%rdi) 1426 movq %rax, 0x8(%rdi) 1427 movq %rax, 0x10(%rdi) 1428 movq %rax, 0x18(%rdi) 1429 movq %rax, 0x20(%rdi) 1430 movq %rax, 0x28(%rdi) 1431 movq %rax, 0x30(%rdi) 1432 movq %rax, 0x38(%rdi) 1433 leaq 0x40(%rdi), %rdi 1434 jae L(bzero_loop) 1435 1436 /* 1437 * Clear any remaining bytes.. 1438 */ 1439 9: 1440 leaq L(setPxQx)(%rip), %r10 1441 addq %rsi, %rdi 1442 movslq (%r10,%rsi,4), %rcx 1443 leaq (%rcx,%r10,1), %r10 1444 jmpq *%r10 1445 1446 /* 1447 * Use rep sstoq. Clear any remainder via unrolled code 1448 */ 1449 .p2align 4 1450 L(use_rep): 1451 movq %rsi, %rcx /* get size in bytes */ 1452 shrq $3, %rcx /* count of 8-byte words to zero */ 1453 rep 1454 sstoq /* %rcx = words to clear (%rax=0) */ 1455 andq $7, %rsi /* remaining bytes */ 1456 jnz 9b 1457 ret 1458 #undef L 1459 SET_SIZE(bzero_altentry) 1460 SET_SIZE(bzero) 1461 1462 #elif defined(__i386) 1463 1464 #define ARG_ADDR 4 1465 #define ARG_COUNT 8 1466 1467 ENTRY(bzero) 1468 #ifdef DEBUG 1469 movl postbootkernelbase, %eax 1470 cmpl %eax, ARG_ADDR(%esp) 1471 jnb 0f 1472 pushl %ebp 1473 movl %esp, %ebp 1474 pushl $.bzero_panic_msg 1475 call panic 1476 0: 1477 #endif 1478 do_zero: 1479 movl %edi, %edx 1480 movl ARG_COUNT(%esp), %ecx 1481 movl ARG_ADDR(%esp), %edi 1482 shrl $2, %ecx 1483 xorl %eax, %eax 1484 rep 1485 sstol 1486 movl ARG_COUNT(%esp), %ecx 1487 andl $3, %ecx 1488 rep 1489 sstob 1490 movl %edx, %edi 1491 ret 1492 SET_SIZE(bzero) 1493 1494 #undef ARG_ADDR 1495 #undef ARG_COUNT 1496 1497 #endif /* __i386 */ 1498 #endif /* __lint */ 1499 1500 /* 1501 * Transfer data to and from user space - 1502 * Note that these routines can cause faults 1503 * It is assumed that the kernel has nothing at 1504 * less than KERNELBASE in the virtual address space. 1505 * 1506 * Note that copyin(9F) and copyout(9F) are part of the 1507 * DDI/DKI which specifies that they return '-1' on "errors." 1508 * 1509 * Sigh. 1510 * 1511 * So there's two extremely similar routines - xcopyin_nta() and 1512 * xcopyout_nta() which return the errno that we've faithfully computed. 1513 * This allows other callers (e.g. uiomove(9F)) to work correctly. 1514 * Given that these are used pretty heavily, we expand the calling 1515 * sequences inline for all flavours (rather than making wrappers). 1516 */ 1517 1518 /* 1519 * Copy user data to kernel space. 1520 */ 1521 1522 #if defined(__lint) 1523 1524 /* ARGSUSED */ 1525 int 1526 copyin(const void *uaddr, void *kaddr, size_t count) 1527 { return (0); } 1528 1529 #else /* lint */ 1530 1531 #if defined(__amd64) 1532 1533 ENTRY(copyin) 1534 pushq %rbp 1535 movq %rsp, %rbp 1536 subq $24, %rsp 1537 1538 /* 1539 * save args in case we trap and need to rerun as a copyop 1540 */ 1541 movq %rdi, (%rsp) 1542 movq %rsi, 0x8(%rsp) 1543 movq %rdx, 0x10(%rsp) 1544 1545 movq kernelbase(%rip), %rax 1546 #ifdef DEBUG 1547 cmpq %rax, %rsi /* %rsi = kaddr */ 1548 jnb 1f 1549 leaq .copyin_panic_msg(%rip), %rdi 1550 xorl %eax, %eax 1551 call panic 1552 1: 1553 #endif 1554 /* 1555 * pass lofault value as 4th argument to do_copy_fault 1556 */ 1557 leaq _copyin_err(%rip), %rcx 1558 1559 movq %gs:CPU_THREAD, %r9 1560 cmpq %rax, %rdi /* test uaddr < kernelbase */ 1561 jae 3f /* take copyop if uaddr > kernelbase */ 1562 SMAP_DISABLE_INSTR(0) 1563 jmp do_copy_fault /* Takes care of leave for us */ 1564 1565 _copyin_err: 1566 SMAP_ENABLE_INSTR(2) 1567 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1568 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1569 3: 1570 movq T_COPYOPS(%r9), %rax 1571 cmpq $0, %rax 1572 jz 2f 1573 /* 1574 * reload args for the copyop 1575 */ 1576 movq (%rsp), %rdi 1577 movq 0x8(%rsp), %rsi 1578 movq 0x10(%rsp), %rdx 1579 leave 1580 jmp *CP_COPYIN(%rax) 1581 1582 2: movl $-1, %eax 1583 leave 1584 ret 1585 SET_SIZE(copyin) 1586 1587 #elif defined(__i386) 1588 1589 #define ARG_UADDR 4 1590 #define ARG_KADDR 8 1591 1592 ENTRY(copyin) 1593 movl kernelbase, %ecx 1594 #ifdef DEBUG 1595 cmpl %ecx, ARG_KADDR(%esp) 1596 jnb 1f 1597 pushl %ebp 1598 movl %esp, %ebp 1599 pushl $.copyin_panic_msg 1600 call panic 1601 1: 1602 #endif 1603 lea _copyin_err, %eax 1604 1605 movl %gs:CPU_THREAD, %edx 1606 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1607 jb do_copy_fault 1608 jmp 3f 1609 1610 _copyin_err: 1611 popl %ecx 1612 popl %edi 1613 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1614 popl %esi 1615 popl %ebp 1616 3: 1617 movl T_COPYOPS(%edx), %eax 1618 cmpl $0, %eax 1619 jz 2f 1620 jmp *CP_COPYIN(%eax) 1621 1622 2: movl $-1, %eax 1623 ret 1624 SET_SIZE(copyin) 1625 1626 #undef ARG_UADDR 1627 #undef ARG_KADDR 1628 1629 #endif /* __i386 */ 1630 #endif /* __lint */ 1631 1632 #if defined(__lint) 1633 1634 /* ARGSUSED */ 1635 int 1636 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached) 1637 { return (0); } 1638 1639 #else /* __lint */ 1640 1641 #if defined(__amd64) 1642 1643 ENTRY(xcopyin_nta) 1644 pushq %rbp 1645 movq %rsp, %rbp 1646 subq $24, %rsp 1647 1648 /* 1649 * save args in case we trap and need to rerun as a copyop 1650 * %rcx is consumed in this routine so we don't need to save 1651 * it. 1652 */ 1653 movq %rdi, (%rsp) 1654 movq %rsi, 0x8(%rsp) 1655 movq %rdx, 0x10(%rsp) 1656 1657 movq kernelbase(%rip), %rax 1658 #ifdef DEBUG 1659 cmpq %rax, %rsi /* %rsi = kaddr */ 1660 jnb 1f 1661 leaq .xcopyin_panic_msg(%rip), %rdi 1662 xorl %eax, %eax 1663 call panic 1664 1: 1665 #endif 1666 movq %gs:CPU_THREAD, %r9 1667 cmpq %rax, %rdi /* test uaddr < kernelbase */ 1668 jae 4f 1669 cmpq $0, %rcx /* No non-temporal access? */ 1670 /* 1671 * pass lofault value as 4th argument to do_copy_fault 1672 */ 1673 leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */ 1674 jnz 6f /* use regular access */ 1675 /* 1676 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1677 */ 1678 cmpq $XCOPY_MIN_SIZE, %rdx 1679 jae 5f 1680 6: 1681 SMAP_DISABLE_INSTR(1) 1682 jmp do_copy_fault 1683 1684 /* 1685 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1686 * count is COUNT_ALIGN_SIZE aligned. 1687 */ 1688 5: 1689 movq %rdi, %r10 1690 orq %rsi, %r10 1691 andq $NTA_ALIGN_MASK, %r10 1692 orq %rdx, %r10 1693 andq $COUNT_ALIGN_MASK, %r10 1694 jnz 6b 1695 leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */ 1696 SMAP_DISABLE_INSTR(2) 1697 jmp do_copy_fault_nta /* use non-temporal access */ 1698 1699 4: 1700 movl $EFAULT, %eax 1701 jmp 3f 1702 1703 /* 1704 * A fault during do_copy_fault or do_copy_fault_nta is 1705 * indicated through an errno value in %rax and we iret from the 1706 * trap handler to here. 1707 */ 1708 _xcopyin_err: 1709 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1710 _xcopyin_nta_err: 1711 SMAP_ENABLE_INSTR(3) 1712 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1713 3: 1714 movq T_COPYOPS(%r9), %r8 1715 cmpq $0, %r8 1716 jz 2f 1717 1718 /* 1719 * reload args for the copyop 1720 */ 1721 movq (%rsp), %rdi 1722 movq 0x8(%rsp), %rsi 1723 movq 0x10(%rsp), %rdx 1724 leave 1725 jmp *CP_XCOPYIN(%r8) 1726 1727 2: leave 1728 ret 1729 SET_SIZE(xcopyin_nta) 1730 1731 #elif defined(__i386) 1732 1733 #define ARG_UADDR 4 1734 #define ARG_KADDR 8 1735 #define ARG_COUNT 12 1736 #define ARG_CACHED 16 1737 1738 .globl use_sse_copy 1739 1740 ENTRY(xcopyin_nta) 1741 movl kernelbase, %ecx 1742 lea _xcopyin_err, %eax 1743 movl %gs:CPU_THREAD, %edx 1744 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1745 jae 4f 1746 1747 cmpl $0, use_sse_copy /* no sse support */ 1748 jz do_copy_fault 1749 1750 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */ 1751 jnz do_copy_fault 1752 1753 /* 1754 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1755 */ 1756 cmpl $XCOPY_MIN_SIZE, ARG_COUNT(%esp) 1757 jb do_copy_fault 1758 1759 /* 1760 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1761 * count is COUNT_ALIGN_SIZE aligned. 1762 */ 1763 movl ARG_UADDR(%esp), %ecx 1764 orl ARG_KADDR(%esp), %ecx 1765 andl $NTA_ALIGN_MASK, %ecx 1766 orl ARG_COUNT(%esp), %ecx 1767 andl $COUNT_ALIGN_MASK, %ecx 1768 jnz do_copy_fault 1769 1770 jmp do_copy_fault_nta /* use regular access */ 1771 1772 4: 1773 movl $EFAULT, %eax 1774 jmp 3f 1775 1776 /* 1777 * A fault during do_copy_fault or do_copy_fault_nta is 1778 * indicated through an errno value in %eax and we iret from the 1779 * trap handler to here. 1780 */ 1781 _xcopyin_err: 1782 popl %ecx 1783 popl %edi 1784 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1785 popl %esi 1786 popl %ebp 1787 3: 1788 cmpl $0, T_COPYOPS(%edx) 1789 jz 2f 1790 movl T_COPYOPS(%edx), %eax 1791 jmp *CP_XCOPYIN(%eax) 1792 1793 2: rep; ret /* use 2 byte return instruction when branch target */ 1794 /* AMD Software Optimization Guide - Section 6.2 */ 1795 SET_SIZE(xcopyin_nta) 1796 1797 #undef ARG_UADDR 1798 #undef ARG_KADDR 1799 #undef ARG_COUNT 1800 #undef ARG_CACHED 1801 1802 #endif /* __i386 */ 1803 #endif /* __lint */ 1804 1805 /* 1806 * Copy kernel data to user space. 1807 */ 1808 1809 #if defined(__lint) 1810 1811 /* ARGSUSED */ 1812 int 1813 copyout(const void *kaddr, void *uaddr, size_t count) 1814 { return (0); } 1815 1816 #else /* __lint */ 1817 1818 #if defined(__amd64) 1819 1820 ENTRY(copyout) 1821 pushq %rbp 1822 movq %rsp, %rbp 1823 subq $24, %rsp 1824 1825 /* 1826 * save args in case we trap and need to rerun as a copyop 1827 */ 1828 movq %rdi, (%rsp) 1829 movq %rsi, 0x8(%rsp) 1830 movq %rdx, 0x10(%rsp) 1831 1832 movq kernelbase(%rip), %rax 1833 #ifdef DEBUG 1834 cmpq %rax, %rdi /* %rdi = kaddr */ 1835 jnb 1f 1836 leaq .copyout_panic_msg(%rip), %rdi 1837 xorl %eax, %eax 1838 call panic 1839 1: 1840 #endif 1841 /* 1842 * pass lofault value as 4th argument to do_copy_fault 1843 */ 1844 leaq _copyout_err(%rip), %rcx 1845 1846 movq %gs:CPU_THREAD, %r9 1847 cmpq %rax, %rsi /* test uaddr < kernelbase */ 1848 jae 3f /* take copyop if uaddr > kernelbase */ 1849 SMAP_DISABLE_INSTR(3) 1850 jmp do_copy_fault /* Calls leave for us */ 1851 1852 _copyout_err: 1853 SMAP_ENABLE_INSTR(4) 1854 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1855 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1856 3: 1857 movq T_COPYOPS(%r9), %rax 1858 cmpq $0, %rax 1859 jz 2f 1860 1861 /* 1862 * reload args for the copyop 1863 */ 1864 movq (%rsp), %rdi 1865 movq 0x8(%rsp), %rsi 1866 movq 0x10(%rsp), %rdx 1867 leave 1868 jmp *CP_COPYOUT(%rax) 1869 1870 2: movl $-1, %eax 1871 leave 1872 ret 1873 SET_SIZE(copyout) 1874 1875 #elif defined(__i386) 1876 1877 #define ARG_KADDR 4 1878 #define ARG_UADDR 8 1879 1880 ENTRY(copyout) 1881 movl kernelbase, %ecx 1882 #ifdef DEBUG 1883 cmpl %ecx, ARG_KADDR(%esp) 1884 jnb 1f 1885 pushl %ebp 1886 movl %esp, %ebp 1887 pushl $.copyout_panic_msg 1888 call panic 1889 1: 1890 #endif 1891 lea _copyout_err, %eax 1892 movl %gs:CPU_THREAD, %edx 1893 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1894 jb do_copy_fault 1895 jmp 3f 1896 1897 _copyout_err: 1898 popl %ecx 1899 popl %edi 1900 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1901 popl %esi 1902 popl %ebp 1903 3: 1904 movl T_COPYOPS(%edx), %eax 1905 cmpl $0, %eax 1906 jz 2f 1907 jmp *CP_COPYOUT(%eax) 1908 1909 2: movl $-1, %eax 1910 ret 1911 SET_SIZE(copyout) 1912 1913 #undef ARG_UADDR 1914 #undef ARG_KADDR 1915 1916 #endif /* __i386 */ 1917 #endif /* __lint */ 1918 1919 #if defined(__lint) 1920 1921 /* ARGSUSED */ 1922 int 1923 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached) 1924 { return (0); } 1925 1926 #else /* __lint */ 1927 1928 #if defined(__amd64) 1929 1930 ENTRY(xcopyout_nta) 1931 pushq %rbp 1932 movq %rsp, %rbp 1933 subq $24, %rsp 1934 1935 /* 1936 * save args in case we trap and need to rerun as a copyop 1937 */ 1938 movq %rdi, (%rsp) 1939 movq %rsi, 0x8(%rsp) 1940 movq %rdx, 0x10(%rsp) 1941 1942 movq kernelbase(%rip), %rax 1943 #ifdef DEBUG 1944 cmpq %rax, %rdi /* %rdi = kaddr */ 1945 jnb 1f 1946 leaq .xcopyout_panic_msg(%rip), %rdi 1947 xorl %eax, %eax 1948 call panic 1949 1: 1950 #endif 1951 movq %gs:CPU_THREAD, %r9 1952 cmpq %rax, %rsi /* test uaddr < kernelbase */ 1953 jae 4f 1954 1955 cmpq $0, %rcx /* No non-temporal access? */ 1956 /* 1957 * pass lofault value as 4th argument to do_copy_fault 1958 */ 1959 leaq _xcopyout_err(%rip), %rcx 1960 jnz 6f 1961 /* 1962 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1963 */ 1964 cmpq $XCOPY_MIN_SIZE, %rdx 1965 jae 5f 1966 6: 1967 SMAP_DISABLE_INSTR(4) 1968 jmp do_copy_fault 1969 1970 /* 1971 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1972 * count is COUNT_ALIGN_SIZE aligned. 1973 */ 1974 5: 1975 movq %rdi, %r10 1976 orq %rsi, %r10 1977 andq $NTA_ALIGN_MASK, %r10 1978 orq %rdx, %r10 1979 andq $COUNT_ALIGN_MASK, %r10 1980 jnz 6b 1981 leaq _xcopyout_nta_err(%rip), %rcx 1982 SMAP_DISABLE_INSTR(5) 1983 call do_copy_fault_nta 1984 SMAP_ENABLE_INSTR(5) 1985 ret 1986 1987 4: 1988 movl $EFAULT, %eax 1989 jmp 3f 1990 1991 /* 1992 * A fault during do_copy_fault or do_copy_fault_nta is 1993 * indicated through an errno value in %rax and we iret from the 1994 * trap handler to here. 1995 */ 1996 _xcopyout_err: 1997 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1998 _xcopyout_nta_err: 1999 SMAP_ENABLE_INSTR(6) 2000 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 2001 3: 2002 movq T_COPYOPS(%r9), %r8 2003 cmpq $0, %r8 2004 jz 2f 2005 2006 /* 2007 * reload args for the copyop 2008 */ 2009 movq (%rsp), %rdi 2010 movq 0x8(%rsp), %rsi 2011 movq 0x10(%rsp), %rdx 2012 leave 2013 jmp *CP_XCOPYOUT(%r8) 2014 2015 2: leave 2016 ret 2017 SET_SIZE(xcopyout_nta) 2018 2019 #elif defined(__i386) 2020 2021 #define ARG_KADDR 4 2022 #define ARG_UADDR 8 2023 #define ARG_COUNT 12 2024 #define ARG_CACHED 16 2025 2026 ENTRY(xcopyout_nta) 2027 movl kernelbase, %ecx 2028 lea _xcopyout_err, %eax 2029 movl %gs:CPU_THREAD, %edx 2030 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 2031 jae 4f 2032 2033 cmpl $0, use_sse_copy /* no sse support */ 2034 jz do_copy_fault 2035 2036 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */ 2037 jnz do_copy_fault 2038 2039 /* 2040 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 2041 */ 2042 cmpl $XCOPY_MIN_SIZE, %edx 2043 jb do_copy_fault 2044 2045 /* 2046 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 2047 * count is COUNT_ALIGN_SIZE aligned. 2048 */ 2049 movl ARG_UADDR(%esp), %ecx 2050 orl ARG_KADDR(%esp), %ecx 2051 andl $NTA_ALIGN_MASK, %ecx 2052 orl ARG_COUNT(%esp), %ecx 2053 andl $COUNT_ALIGN_MASK, %ecx 2054 jnz do_copy_fault 2055 jmp do_copy_fault_nta 2056 2057 4: 2058 movl $EFAULT, %eax 2059 jmp 3f 2060 2061 /* 2062 * A fault during do_copy_fault or do_copy_fault_nta is 2063 * indicated through an errno value in %eax and we iret from the 2064 * trap handler to here. 2065 */ 2066 _xcopyout_err: 2067 / restore the original lofault 2068 popl %ecx 2069 popl %edi 2070 movl %ecx, T_LOFAULT(%edx) / original lofault 2071 popl %esi 2072 popl %ebp 2073 3: 2074 cmpl $0, T_COPYOPS(%edx) 2075 jz 2f 2076 movl T_COPYOPS(%edx), %eax 2077 jmp *CP_XCOPYOUT(%eax) 2078 2079 2: rep; ret /* use 2 byte return instruction when branch target */ 2080 /* AMD Software Optimization Guide - Section 6.2 */ 2081 SET_SIZE(xcopyout_nta) 2082 2083 #undef ARG_UADDR 2084 #undef ARG_KADDR 2085 #undef ARG_COUNT 2086 #undef ARG_CACHED 2087 2088 #endif /* __i386 */ 2089 #endif /* __lint */ 2090 2091 /* 2092 * Copy a null terminated string from one point to another in 2093 * the kernel address space. 2094 */ 2095 2096 #if defined(__lint) 2097 2098 /* ARGSUSED */ 2099 int 2100 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied) 2101 { return (0); } 2102 2103 #else /* __lint */ 2104 2105 #if defined(__amd64) 2106 2107 ENTRY(copystr) 2108 pushq %rbp 2109 movq %rsp, %rbp 2110 #ifdef DEBUG 2111 movq kernelbase(%rip), %rax 2112 cmpq %rax, %rdi /* %rdi = from */ 2113 jb 0f 2114 cmpq %rax, %rsi /* %rsi = to */ 2115 jnb 1f 2116 0: leaq .copystr_panic_msg(%rip), %rdi 2117 xorl %eax, %eax 2118 call panic 2119 1: 2120 #endif 2121 movq %gs:CPU_THREAD, %r9 2122 movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */ 2123 /* 5th argument to do_copystr */ 2124 xorl %r10d,%r10d /* pass smap restore need in %r10d */ 2125 /* as a non-ABI 6th arg */ 2126 do_copystr: 2127 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 2128 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 2129 movq %r8, T_LOFAULT(%r9) /* new lofault */ 2130 2131 movq %rdx, %r8 /* save maxlength */ 2132 2133 cmpq $0, %rdx /* %rdx = maxlength */ 2134 je copystr_enametoolong /* maxlength == 0 */ 2135 2136 copystr_loop: 2137 decq %r8 2138 movb (%rdi), %al 2139 incq %rdi 2140 movb %al, (%rsi) 2141 incq %rsi 2142 cmpb $0, %al 2143 je copystr_null /* null char */ 2144 cmpq $0, %r8 2145 jne copystr_loop 2146 2147 copystr_enametoolong: 2148 movl $ENAMETOOLONG, %eax 2149 jmp copystr_out 2150 2151 copystr_null: 2152 xorl %eax, %eax /* no error */ 2153 2154 copystr_out: 2155 cmpq $0, %rcx /* want length? */ 2156 je copystr_smap /* no */ 2157 subq %r8, %rdx /* compute length and store it */ 2158 movq %rdx, (%rcx) 2159 2160 copystr_smap: 2161 cmpl $0, %r10d 2162 jz copystr_done 2163 SMAP_ENABLE_INSTR(7) 2164 2165 copystr_done: 2166 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 2167 leave 2168 ret 2169 SET_SIZE(copystr) 2170 2171 #elif defined(__i386) 2172 2173 #define ARG_FROM 8 2174 #define ARG_TO 12 2175 #define ARG_MAXLEN 16 2176 #define ARG_LENCOPIED 20 2177 2178 ENTRY(copystr) 2179 #ifdef DEBUG 2180 pushl %ebp 2181 movl %esp, %ebp 2182 movl kernelbase, %eax 2183 cmpl %eax, ARG_FROM(%esp) 2184 jb 0f 2185 cmpl %eax, ARG_TO(%esp) 2186 jnb 1f 2187 0: pushl $.copystr_panic_msg 2188 call panic 2189 1: popl %ebp 2190 #endif 2191 /* get the current lofault address */ 2192 movl %gs:CPU_THREAD, %eax 2193 movl T_LOFAULT(%eax), %eax 2194 do_copystr: 2195 pushl %ebp /* setup stack frame */ 2196 movl %esp, %ebp 2197 pushl %ebx /* save registers */ 2198 pushl %edi 2199 2200 movl %gs:CPU_THREAD, %ebx 2201 movl T_LOFAULT(%ebx), %edi 2202 pushl %edi /* save the current lofault */ 2203 movl %eax, T_LOFAULT(%ebx) /* new lofault */ 2204 2205 movl ARG_MAXLEN(%ebp), %ecx 2206 cmpl $0, %ecx 2207 je copystr_enametoolong /* maxlength == 0 */ 2208 2209 movl ARG_FROM(%ebp), %ebx /* source address */ 2210 movl ARG_TO(%ebp), %edx /* destination address */ 2211 2212 copystr_loop: 2213 decl %ecx 2214 movb (%ebx), %al 2215 incl %ebx 2216 movb %al, (%edx) 2217 incl %edx 2218 cmpb $0, %al 2219 je copystr_null /* null char */ 2220 cmpl $0, %ecx 2221 jne copystr_loop 2222 2223 copystr_enametoolong: 2224 movl $ENAMETOOLONG, %eax 2225 jmp copystr_out 2226 2227 copystr_null: 2228 xorl %eax, %eax /* no error */ 2229 2230 copystr_out: 2231 cmpl $0, ARG_LENCOPIED(%ebp) /* want length? */ 2232 je copystr_done /* no */ 2233 movl ARG_MAXLEN(%ebp), %edx 2234 subl %ecx, %edx /* compute length and store it */ 2235 movl ARG_LENCOPIED(%ebp), %ecx 2236 movl %edx, (%ecx) 2237 2238 copystr_done: 2239 popl %edi 2240 movl %gs:CPU_THREAD, %ebx 2241 movl %edi, T_LOFAULT(%ebx) /* restore the original lofault */ 2242 2243 popl %edi 2244 popl %ebx 2245 popl %ebp 2246 ret 2247 SET_SIZE(copystr) 2248 2249 #undef ARG_FROM 2250 #undef ARG_TO 2251 #undef ARG_MAXLEN 2252 #undef ARG_LENCOPIED 2253 2254 #endif /* __i386 */ 2255 #endif /* __lint */ 2256 2257 /* 2258 * Copy a null terminated string from the user address space into 2259 * the kernel address space. 2260 */ 2261 2262 #if defined(__lint) 2263 2264 /* ARGSUSED */ 2265 int 2266 copyinstr(const char *uaddr, char *kaddr, size_t maxlength, 2267 size_t *lencopied) 2268 { return (0); } 2269 2270 #else /* __lint */ 2271 2272 #if defined(__amd64) 2273 2274 ENTRY(copyinstr) 2275 pushq %rbp 2276 movq %rsp, %rbp 2277 subq $32, %rsp 2278 2279 /* 2280 * save args in case we trap and need to rerun as a copyop 2281 */ 2282 movq %rdi, (%rsp) 2283 movq %rsi, 0x8(%rsp) 2284 movq %rdx, 0x10(%rsp) 2285 movq %rcx, 0x18(%rsp) 2286 2287 movq kernelbase(%rip), %rax 2288 #ifdef DEBUG 2289 cmpq %rax, %rsi /* %rsi = kaddr */ 2290 jnb 1f 2291 leaq .copyinstr_panic_msg(%rip), %rdi 2292 xorl %eax, %eax 2293 call panic 2294 1: 2295 #endif 2296 /* 2297 * pass lofault value as 5th argument to do_copystr 2298 * do_copystr expects whether or not we need smap in %r10d 2299 */ 2300 leaq _copyinstr_error(%rip), %r8 2301 movl $1, %r10d 2302 2303 cmpq %rax, %rdi /* test uaddr < kernelbase */ 2304 jae 4f 2305 SMAP_DISABLE_INSTR(6) 2306 jmp do_copystr 2307 4: 2308 movq %gs:CPU_THREAD, %r9 2309 jmp 3f 2310 2311 _copyinstr_error: 2312 SMAP_ENABLE_INSTR(8) 2313 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 2314 3: 2315 movq T_COPYOPS(%r9), %rax 2316 cmpq $0, %rax 2317 jz 2f 2318 2319 /* 2320 * reload args for the copyop 2321 */ 2322 movq (%rsp), %rdi 2323 movq 0x8(%rsp), %rsi 2324 movq 0x10(%rsp), %rdx 2325 movq 0x18(%rsp), %rcx 2326 leave 2327 jmp *CP_COPYINSTR(%rax) 2328 2329 2: movl $EFAULT, %eax /* return EFAULT */ 2330 leave 2331 ret 2332 SET_SIZE(copyinstr) 2333 2334 #elif defined(__i386) 2335 2336 #define ARG_UADDR 4 2337 #define ARG_KADDR 8 2338 2339 ENTRY(copyinstr) 2340 movl kernelbase, %ecx 2341 #ifdef DEBUG 2342 cmpl %ecx, ARG_KADDR(%esp) 2343 jnb 1f 2344 pushl %ebp 2345 movl %esp, %ebp 2346 pushl $.copyinstr_panic_msg 2347 call panic 2348 1: 2349 #endif 2350 lea _copyinstr_error, %eax 2351 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 2352 jb do_copystr 2353 movl %gs:CPU_THREAD, %edx 2354 jmp 3f 2355 2356 _copyinstr_error: 2357 popl %edi 2358 movl %gs:CPU_THREAD, %edx 2359 movl %edi, T_LOFAULT(%edx) /* original lofault */ 2360 2361 popl %edi 2362 popl %ebx 2363 popl %ebp 2364 3: 2365 movl T_COPYOPS(%edx), %eax 2366 cmpl $0, %eax 2367 jz 2f 2368 jmp *CP_COPYINSTR(%eax) 2369 2370 2: movl $EFAULT, %eax /* return EFAULT */ 2371 ret 2372 SET_SIZE(copyinstr) 2373 2374 #undef ARG_UADDR 2375 #undef ARG_KADDR 2376 2377 #endif /* __i386 */ 2378 #endif /* __lint */ 2379 2380 /* 2381 * Copy a null terminated string from the kernel 2382 * address space to the user address space. 2383 */ 2384 2385 #if defined(__lint) 2386 2387 /* ARGSUSED */ 2388 int 2389 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength, 2390 size_t *lencopied) 2391 { return (0); } 2392 2393 #else /* __lint */ 2394 2395 #if defined(__amd64) 2396 2397 ENTRY(copyoutstr) 2398 pushq %rbp 2399 movq %rsp, %rbp 2400 subq $32, %rsp 2401 2402 /* 2403 * save args in case we trap and need to rerun as a copyop 2404 */ 2405 movq %rdi, (%rsp) 2406 movq %rsi, 0x8(%rsp) 2407 movq %rdx, 0x10(%rsp) 2408 movq %rcx, 0x18(%rsp) 2409 2410 movq kernelbase(%rip), %rax 2411 #ifdef DEBUG 2412 cmpq %rax, %rdi /* %rdi = kaddr */ 2413 jnb 1f 2414 leaq .copyoutstr_panic_msg(%rip), %rdi 2415 jmp call_panic /* setup stack and call panic */ 2416 1: 2417 #endif 2418 /* 2419 * pass lofault value as 5th argument to do_copystr 2420 * pass one as 6th argument to do_copystr in %r10d 2421 */ 2422 leaq _copyoutstr_error(%rip), %r8 2423 movl $1, %r10d 2424 2425 cmpq %rax, %rsi /* test uaddr < kernelbase */ 2426 jae 4f 2427 SMAP_DISABLE_INSTR(7) 2428 jmp do_copystr 2429 4: 2430 movq %gs:CPU_THREAD, %r9 2431 jmp 3f 2432 2433 _copyoutstr_error: 2434 SMAP_ENABLE_INSTR(9) 2435 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 2436 3: 2437 movq T_COPYOPS(%r9), %rax 2438 cmpq $0, %rax 2439 jz 2f 2440 2441 /* 2442 * reload args for the copyop 2443 */ 2444 movq (%rsp), %rdi 2445 movq 0x8(%rsp), %rsi 2446 movq 0x10(%rsp), %rdx 2447 movq 0x18(%rsp), %rcx 2448 leave 2449 jmp *CP_COPYOUTSTR(%rax) 2450 2451 2: movl $EFAULT, %eax /* return EFAULT */ 2452 leave 2453 ret 2454 SET_SIZE(copyoutstr) 2455 2456 #elif defined(__i386) 2457 2458 #define ARG_KADDR 4 2459 #define ARG_UADDR 8 2460 2461 ENTRY(copyoutstr) 2462 movl kernelbase, %ecx 2463 #ifdef DEBUG 2464 cmpl %ecx, ARG_KADDR(%esp) 2465 jnb 1f 2466 pushl %ebp 2467 movl %esp, %ebp 2468 pushl $.copyoutstr_panic_msg 2469 call panic 2470 1: 2471 #endif 2472 lea _copyoutstr_error, %eax 2473 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 2474 jb do_copystr 2475 movl %gs:CPU_THREAD, %edx 2476 jmp 3f 2477 2478 _copyoutstr_error: 2479 popl %edi 2480 movl %gs:CPU_THREAD, %edx 2481 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */ 2482 2483 popl %edi 2484 popl %ebx 2485 popl %ebp 2486 3: 2487 movl T_COPYOPS(%edx), %eax 2488 cmpl $0, %eax 2489 jz 2f 2490 jmp *CP_COPYOUTSTR(%eax) 2491 2492 2: movl $EFAULT, %eax /* return EFAULT */ 2493 ret 2494 SET_SIZE(copyoutstr) 2495 2496 #undef ARG_KADDR 2497 #undef ARG_UADDR 2498 2499 #endif /* __i386 */ 2500 #endif /* __lint */ 2501 2502 /* 2503 * Since all of the fuword() variants are so similar, we have a macro to spit 2504 * them out. This allows us to create DTrace-unobservable functions easily. 2505 */ 2506 2507 #if defined(__lint) 2508 2509 #if defined(__amd64) 2510 2511 /* ARGSUSED */ 2512 int 2513 fuword64(const void *addr, uint64_t *dst) 2514 { return (0); } 2515 2516 #endif 2517 2518 /* ARGSUSED */ 2519 int 2520 fuword32(const void *addr, uint32_t *dst) 2521 { return (0); } 2522 2523 /* ARGSUSED */ 2524 int 2525 fuword16(const void *addr, uint16_t *dst) 2526 { return (0); } 2527 2528 /* ARGSUSED */ 2529 int 2530 fuword8(const void *addr, uint8_t *dst) 2531 { return (0); } 2532 2533 #else /* __lint */ 2534 2535 #if defined(__amd64) 2536 2537 /* 2538 * Note that we don't save and reload the arguments here 2539 * because their values are not altered in the copy path. 2540 * Additionally, when successful, the smap_enable jmp will 2541 * actually return us to our original caller. 2542 */ 2543 2544 #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \ 2545 ENTRY(NAME) \ 2546 movq %gs:CPU_THREAD, %r9; \ 2547 cmpq kernelbase(%rip), %rdi; \ 2548 jae 1f; \ 2549 leaq _flt_/**/NAME, %rdx; \ 2550 movq %rdx, T_LOFAULT(%r9); \ 2551 SMAP_DISABLE_INSTR(DISNUM) \ 2552 INSTR (%rdi), REG; \ 2553 movq $0, T_LOFAULT(%r9); \ 2554 INSTR REG, (%rsi); \ 2555 xorl %eax, %eax; \ 2556 SMAP_ENABLE_INSTR(EN1) \ 2557 ret; \ 2558 _flt_/**/NAME: \ 2559 SMAP_ENABLE_INSTR(EN2) \ 2560 movq $0, T_LOFAULT(%r9); \ 2561 1: \ 2562 movq T_COPYOPS(%r9), %rax; \ 2563 cmpq $0, %rax; \ 2564 jz 2f; \ 2565 jmp *COPYOP(%rax); \ 2566 2: \ 2567 movl $-1, %eax; \ 2568 ret; \ 2569 SET_SIZE(NAME) 2570 2571 FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11) 2572 FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13) 2573 FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15) 2574 FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17) 2575 2576 #elif defined(__i386) 2577 2578 #define FUWORD(NAME, INSTR, REG, COPYOP) \ 2579 ENTRY(NAME) \ 2580 movl %gs:CPU_THREAD, %ecx; \ 2581 movl kernelbase, %eax; \ 2582 cmpl %eax, 4(%esp); \ 2583 jae 1f; \ 2584 lea _flt_/**/NAME, %edx; \ 2585 movl %edx, T_LOFAULT(%ecx); \ 2586 movl 4(%esp), %eax; \ 2587 movl 8(%esp), %edx; \ 2588 INSTR (%eax), REG; \ 2589 movl $0, T_LOFAULT(%ecx); \ 2590 INSTR REG, (%edx); \ 2591 xorl %eax, %eax; \ 2592 ret; \ 2593 _flt_/**/NAME: \ 2594 movl $0, T_LOFAULT(%ecx); \ 2595 1: \ 2596 movl T_COPYOPS(%ecx), %eax; \ 2597 cmpl $0, %eax; \ 2598 jz 2f; \ 2599 jmp *COPYOP(%eax); \ 2600 2: \ 2601 movl $-1, %eax; \ 2602 ret; \ 2603 SET_SIZE(NAME) 2604 2605 FUWORD(fuword32, movl, %eax, CP_FUWORD32) 2606 FUWORD(fuword16, movw, %ax, CP_FUWORD16) 2607 FUWORD(fuword8, movb, %al, CP_FUWORD8) 2608 2609 #endif /* __i386 */ 2610 2611 #undef FUWORD 2612 2613 #endif /* __lint */ 2614 2615 /* 2616 * Set user word. 2617 */ 2618 2619 #if defined(__lint) 2620 2621 #if defined(__amd64) 2622 2623 /* ARGSUSED */ 2624 int 2625 suword64(void *addr, uint64_t value) 2626 { return (0); } 2627 2628 #endif 2629 2630 /* ARGSUSED */ 2631 int 2632 suword32(void *addr, uint32_t value) 2633 { return (0); } 2634 2635 /* ARGSUSED */ 2636 int 2637 suword16(void *addr, uint16_t value) 2638 { return (0); } 2639 2640 /* ARGSUSED */ 2641 int 2642 suword8(void *addr, uint8_t value) 2643 { return (0); } 2644 2645 #else /* lint */ 2646 2647 #if defined(__amd64) 2648 2649 /* 2650 * Note that we don't save and reload the arguments here 2651 * because their values are not altered in the copy path. 2652 */ 2653 2654 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \ 2655 ENTRY(NAME) \ 2656 movq %gs:CPU_THREAD, %r9; \ 2657 cmpq kernelbase(%rip), %rdi; \ 2658 jae 1f; \ 2659 leaq _flt_/**/NAME, %rdx; \ 2660 SMAP_DISABLE_INSTR(DISNUM) \ 2661 movq %rdx, T_LOFAULT(%r9); \ 2662 INSTR REG, (%rdi); \ 2663 movq $0, T_LOFAULT(%r9); \ 2664 xorl %eax, %eax; \ 2665 SMAP_ENABLE_INSTR(EN1) \ 2666 ret; \ 2667 _flt_/**/NAME: \ 2668 SMAP_ENABLE_INSTR(EN2) \ 2669 movq $0, T_LOFAULT(%r9); \ 2670 1: \ 2671 movq T_COPYOPS(%r9), %rax; \ 2672 cmpq $0, %rax; \ 2673 jz 3f; \ 2674 jmp *COPYOP(%rax); \ 2675 3: \ 2676 movl $-1, %eax; \ 2677 ret; \ 2678 SET_SIZE(NAME) 2679 2680 SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19) 2681 SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21) 2682 SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23) 2683 SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25) 2684 2685 #elif defined(__i386) 2686 2687 #define SUWORD(NAME, INSTR, REG, COPYOP) \ 2688 ENTRY(NAME) \ 2689 movl %gs:CPU_THREAD, %ecx; \ 2690 movl kernelbase, %eax; \ 2691 cmpl %eax, 4(%esp); \ 2692 jae 1f; \ 2693 lea _flt_/**/NAME, %edx; \ 2694 movl %edx, T_LOFAULT(%ecx); \ 2695 movl 4(%esp), %eax; \ 2696 movl 8(%esp), %edx; \ 2697 INSTR REG, (%eax); \ 2698 movl $0, T_LOFAULT(%ecx); \ 2699 xorl %eax, %eax; \ 2700 ret; \ 2701 _flt_/**/NAME: \ 2702 movl $0, T_LOFAULT(%ecx); \ 2703 1: \ 2704 movl T_COPYOPS(%ecx), %eax; \ 2705 cmpl $0, %eax; \ 2706 jz 3f; \ 2707 movl COPYOP(%eax), %ecx; \ 2708 jmp *%ecx; \ 2709 3: \ 2710 movl $-1, %eax; \ 2711 ret; \ 2712 SET_SIZE(NAME) 2713 2714 SUWORD(suword32, movl, %edx, CP_SUWORD32) 2715 SUWORD(suword16, movw, %dx, CP_SUWORD16) 2716 SUWORD(suword8, movb, %dl, CP_SUWORD8) 2717 2718 #endif /* __i386 */ 2719 2720 #undef SUWORD 2721 2722 #endif /* __lint */ 2723 2724 #if defined(__lint) 2725 2726 #if defined(__amd64) 2727 2728 /*ARGSUSED*/ 2729 void 2730 fuword64_noerr(const void *addr, uint64_t *dst) 2731 {} 2732 2733 #endif 2734 2735 /*ARGSUSED*/ 2736 void 2737 fuword32_noerr(const void *addr, uint32_t *dst) 2738 {} 2739 2740 /*ARGSUSED*/ 2741 void 2742 fuword8_noerr(const void *addr, uint8_t *dst) 2743 {} 2744 2745 /*ARGSUSED*/ 2746 void 2747 fuword16_noerr(const void *addr, uint16_t *dst) 2748 {} 2749 2750 #else /* __lint */ 2751 2752 #if defined(__amd64) 2753 2754 #define FUWORD_NOERR(NAME, INSTR, REG) \ 2755 ENTRY(NAME) \ 2756 cmpq kernelbase(%rip), %rdi; \ 2757 cmovnbq kernelbase(%rip), %rdi; \ 2758 INSTR (%rdi), REG; \ 2759 INSTR REG, (%rsi); \ 2760 ret; \ 2761 SET_SIZE(NAME) 2762 2763 FUWORD_NOERR(fuword64_noerr, movq, %rax) 2764 FUWORD_NOERR(fuword32_noerr, movl, %eax) 2765 FUWORD_NOERR(fuword16_noerr, movw, %ax) 2766 FUWORD_NOERR(fuword8_noerr, movb, %al) 2767 2768 #elif defined(__i386) 2769 2770 #define FUWORD_NOERR(NAME, INSTR, REG) \ 2771 ENTRY(NAME) \ 2772 movl 4(%esp), %eax; \ 2773 cmpl kernelbase, %eax; \ 2774 jb 1f; \ 2775 movl kernelbase, %eax; \ 2776 1: movl 8(%esp), %edx; \ 2777 INSTR (%eax), REG; \ 2778 INSTR REG, (%edx); \ 2779 ret; \ 2780 SET_SIZE(NAME) 2781 2782 FUWORD_NOERR(fuword32_noerr, movl, %ecx) 2783 FUWORD_NOERR(fuword16_noerr, movw, %cx) 2784 FUWORD_NOERR(fuword8_noerr, movb, %cl) 2785 2786 #endif /* __i386 */ 2787 2788 #undef FUWORD_NOERR 2789 2790 #endif /* __lint */ 2791 2792 #if defined(__lint) 2793 2794 #if defined(__amd64) 2795 2796 /*ARGSUSED*/ 2797 void 2798 suword64_noerr(void *addr, uint64_t value) 2799 {} 2800 2801 #endif 2802 2803 /*ARGSUSED*/ 2804 void 2805 suword32_noerr(void *addr, uint32_t value) 2806 {} 2807 2808 /*ARGSUSED*/ 2809 void 2810 suword16_noerr(void *addr, uint16_t value) 2811 {} 2812 2813 /*ARGSUSED*/ 2814 void 2815 suword8_noerr(void *addr, uint8_t value) 2816 {} 2817 2818 #else /* lint */ 2819 2820 #if defined(__amd64) 2821 2822 #define SUWORD_NOERR(NAME, INSTR, REG) \ 2823 ENTRY(NAME) \ 2824 cmpq kernelbase(%rip), %rdi; \ 2825 cmovnbq kernelbase(%rip), %rdi; \ 2826 INSTR REG, (%rdi); \ 2827 ret; \ 2828 SET_SIZE(NAME) 2829 2830 SUWORD_NOERR(suword64_noerr, movq, %rsi) 2831 SUWORD_NOERR(suword32_noerr, movl, %esi) 2832 SUWORD_NOERR(suword16_noerr, movw, %si) 2833 SUWORD_NOERR(suword8_noerr, movb, %sil) 2834 2835 #elif defined(__i386) 2836 2837 #define SUWORD_NOERR(NAME, INSTR, REG) \ 2838 ENTRY(NAME) \ 2839 movl 4(%esp), %eax; \ 2840 cmpl kernelbase, %eax; \ 2841 jb 1f; \ 2842 movl kernelbase, %eax; \ 2843 1: \ 2844 movl 8(%esp), %edx; \ 2845 INSTR REG, (%eax); \ 2846 ret; \ 2847 SET_SIZE(NAME) 2848 2849 SUWORD_NOERR(suword32_noerr, movl, %edx) 2850 SUWORD_NOERR(suword16_noerr, movw, %dx) 2851 SUWORD_NOERR(suword8_noerr, movb, %dl) 2852 2853 #endif /* __i386 */ 2854 2855 #undef SUWORD_NOERR 2856 2857 #endif /* lint */ 2858 2859 2860 #if defined(__lint) 2861 2862 /*ARGSUSED*/ 2863 int 2864 subyte(void *addr, uchar_t value) 2865 { return (0); } 2866 2867 /*ARGSUSED*/ 2868 void 2869 subyte_noerr(void *addr, uchar_t value) 2870 {} 2871 2872 /*ARGSUSED*/ 2873 int 2874 fulword(const void *addr, ulong_t *valuep) 2875 { return (0); } 2876 2877 /*ARGSUSED*/ 2878 void 2879 fulword_noerr(const void *addr, ulong_t *valuep) 2880 {} 2881 2882 /*ARGSUSED*/ 2883 int 2884 sulword(void *addr, ulong_t valuep) 2885 { return (0); } 2886 2887 /*ARGSUSED*/ 2888 void 2889 sulword_noerr(void *addr, ulong_t valuep) 2890 {} 2891 2892 #else 2893 2894 .weak subyte 2895 subyte=suword8 2896 .weak subyte_noerr 2897 subyte_noerr=suword8_noerr 2898 2899 #if defined(__amd64) 2900 2901 .weak fulword 2902 fulword=fuword64 2903 .weak fulword_noerr 2904 fulword_noerr=fuword64_noerr 2905 .weak sulword 2906 sulword=suword64 2907 .weak sulword_noerr 2908 sulword_noerr=suword64_noerr 2909 2910 #elif defined(__i386) 2911 2912 .weak fulword 2913 fulword=fuword32 2914 .weak fulword_noerr 2915 fulword_noerr=fuword32_noerr 2916 .weak sulword 2917 sulword=suword32 2918 .weak sulword_noerr 2919 sulword_noerr=suword32_noerr 2920 2921 #endif /* __i386 */ 2922 2923 #endif /* __lint */ 2924 2925 #if defined(__lint) 2926 2927 /* 2928 * Copy a block of storage - must not overlap (from + len <= to). 2929 * No fault handler installed (to be called under on_fault()) 2930 */ 2931 2932 /* ARGSUSED */ 2933 void 2934 copyout_noerr(const void *kfrom, void *uto, size_t count) 2935 {} 2936 2937 /* ARGSUSED */ 2938 void 2939 copyin_noerr(const void *ufrom, void *kto, size_t count) 2940 {} 2941 2942 /* 2943 * Zero a block of storage in user space 2944 */ 2945 2946 /* ARGSUSED */ 2947 void 2948 uzero(void *addr, size_t count) 2949 {} 2950 2951 /* 2952 * copy a block of storage in user space 2953 */ 2954 2955 /* ARGSUSED */ 2956 void 2957 ucopy(const void *ufrom, void *uto, size_t ulength) 2958 {} 2959 2960 /* 2961 * copy a string in user space 2962 */ 2963 2964 /* ARGSUSED */ 2965 void 2966 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied) 2967 {} 2968 2969 #else /* __lint */ 2970 2971 #if defined(__amd64) 2972 2973 ENTRY(copyin_noerr) 2974 movq kernelbase(%rip), %rax 2975 #ifdef DEBUG 2976 cmpq %rax, %rsi /* %rsi = kto */ 2977 jae 1f 2978 leaq .cpyin_ne_pmsg(%rip), %rdi 2979 jmp call_panic /* setup stack and call panic */ 2980 1: 2981 #endif 2982 cmpq %rax, %rdi /* ufrom < kernelbase */ 2983 jb do_copy 2984 movq %rax, %rdi /* force fault at kernelbase */ 2985 jmp do_copy 2986 SET_SIZE(copyin_noerr) 2987 2988 ENTRY(copyout_noerr) 2989 movq kernelbase(%rip), %rax 2990 #ifdef DEBUG 2991 cmpq %rax, %rdi /* %rdi = kfrom */ 2992 jae 1f 2993 leaq .cpyout_ne_pmsg(%rip), %rdi 2994 jmp call_panic /* setup stack and call panic */ 2995 1: 2996 #endif 2997 cmpq %rax, %rsi /* uto < kernelbase */ 2998 jb do_copy 2999 movq %rax, %rsi /* force fault at kernelbase */ 3000 jmp do_copy 3001 SET_SIZE(copyout_noerr) 3002 3003 ENTRY(uzero) 3004 movq kernelbase(%rip), %rax 3005 cmpq %rax, %rdi 3006 jb do_zero 3007 movq %rax, %rdi /* force fault at kernelbase */ 3008 jmp do_zero 3009 SET_SIZE(uzero) 3010 3011 ENTRY(ucopy) 3012 movq kernelbase(%rip), %rax 3013 cmpq %rax, %rdi 3014 cmovaeq %rax, %rdi /* force fault at kernelbase */ 3015 cmpq %rax, %rsi 3016 cmovaeq %rax, %rsi /* force fault at kernelbase */ 3017 jmp do_copy 3018 SET_SIZE(ucopy) 3019 3020 /* 3021 * Note, the frame pointer is required here becuase do_copystr expects 3022 * to be able to pop it off! 3023 */ 3024 ENTRY(ucopystr) 3025 pushq %rbp 3026 movq %rsp, %rbp 3027 movq kernelbase(%rip), %rax 3028 cmpq %rax, %rdi 3029 cmovaeq %rax, %rdi /* force fault at kernelbase */ 3030 cmpq %rax, %rsi 3031 cmovaeq %rax, %rsi /* force fault at kernelbase */ 3032 /* do_copystr expects lofault address in %r8 */ 3033 /* do_copystr expects whether or not we need smap in %r10 */ 3034 xorl %r10d, %r10d 3035 movq %gs:CPU_THREAD, %r8 3036 movq T_LOFAULT(%r8), %r8 3037 jmp do_copystr 3038 SET_SIZE(ucopystr) 3039 3040 #elif defined(__i386) 3041 3042 ENTRY(copyin_noerr) 3043 movl kernelbase, %eax 3044 #ifdef DEBUG 3045 cmpl %eax, 8(%esp) 3046 jae 1f 3047 pushl $.cpyin_ne_pmsg 3048 call panic 3049 1: 3050 #endif 3051 cmpl %eax, 4(%esp) 3052 jb do_copy 3053 movl %eax, 4(%esp) /* force fault at kernelbase */ 3054 jmp do_copy 3055 SET_SIZE(copyin_noerr) 3056 3057 ENTRY(copyout_noerr) 3058 movl kernelbase, %eax 3059 #ifdef DEBUG 3060 cmpl %eax, 4(%esp) 3061 jae 1f 3062 pushl $.cpyout_ne_pmsg 3063 call panic 3064 1: 3065 #endif 3066 cmpl %eax, 8(%esp) 3067 jb do_copy 3068 movl %eax, 8(%esp) /* force fault at kernelbase */ 3069 jmp do_copy 3070 SET_SIZE(copyout_noerr) 3071 3072 ENTRY(uzero) 3073 movl kernelbase, %eax 3074 cmpl %eax, 4(%esp) 3075 jb do_zero 3076 movl %eax, 4(%esp) /* force fault at kernelbase */ 3077 jmp do_zero 3078 SET_SIZE(uzero) 3079 3080 ENTRY(ucopy) 3081 movl kernelbase, %eax 3082 cmpl %eax, 4(%esp) 3083 jb 1f 3084 movl %eax, 4(%esp) /* force fault at kernelbase */ 3085 1: 3086 cmpl %eax, 8(%esp) 3087 jb do_copy 3088 movl %eax, 8(%esp) /* force fault at kernelbase */ 3089 jmp do_copy 3090 SET_SIZE(ucopy) 3091 3092 ENTRY(ucopystr) 3093 movl kernelbase, %eax 3094 cmpl %eax, 4(%esp) 3095 jb 1f 3096 movl %eax, 4(%esp) /* force fault at kernelbase */ 3097 1: 3098 cmpl %eax, 8(%esp) 3099 jb 2f 3100 movl %eax, 8(%esp) /* force fault at kernelbase */ 3101 2: 3102 /* do_copystr expects the lofault address in %eax */ 3103 movl %gs:CPU_THREAD, %eax 3104 movl T_LOFAULT(%eax), %eax 3105 jmp do_copystr 3106 SET_SIZE(ucopystr) 3107 3108 #endif /* __i386 */ 3109 3110 #ifdef DEBUG 3111 .data 3112 .kcopy_panic_msg: 3113 .string "kcopy: arguments below kernelbase" 3114 .bcopy_panic_msg: 3115 .string "bcopy: arguments below kernelbase" 3116 .kzero_panic_msg: 3117 .string "kzero: arguments below kernelbase" 3118 .bzero_panic_msg: 3119 .string "bzero: arguments below kernelbase" 3120 .copyin_panic_msg: 3121 .string "copyin: kaddr argument below kernelbase" 3122 .xcopyin_panic_msg: 3123 .string "xcopyin: kaddr argument below kernelbase" 3124 .copyout_panic_msg: 3125 .string "copyout: kaddr argument below kernelbase" 3126 .xcopyout_panic_msg: 3127 .string "xcopyout: kaddr argument below kernelbase" 3128 .copystr_panic_msg: 3129 .string "copystr: arguments in user space" 3130 .copyinstr_panic_msg: 3131 .string "copyinstr: kaddr argument not in kernel address space" 3132 .copyoutstr_panic_msg: 3133 .string "copyoutstr: kaddr argument not in kernel address space" 3134 .cpyin_ne_pmsg: 3135 .string "copyin_noerr: argument not in kernel address space" 3136 .cpyout_ne_pmsg: 3137 .string "copyout_noerr: argument not in kernel address space" 3138 #endif 3139 3140 #endif /* __lint */ 3141 3142 /* 3143 * These functions are used for SMAP, supervisor mode access protection. They 3144 * are hotpatched to become real instructions when the system starts up which is 3145 * done in mlsetup() as a part of enabling the other CR4 related features. 3146 * 3147 * Generally speaking, smap_disable() is a stac instruction and smap_enable is a 3148 * clac instruction. It's safe to call these any number of times, and in fact, 3149 * out of paranoia, the kernel will likely call it at several points. 3150 */ 3151 3152 #if defined(__lint) 3153 3154 void 3155 smap_enable(void) 3156 {} 3157 3158 void 3159 smap_disable(void) 3160 {} 3161 3162 #else 3163 3164 #if defined (__amd64) || defined(__i386) 3165 ENTRY(smap_disable) 3166 nop 3167 nop 3168 nop 3169 ret 3170 SET_SIZE(smap_disable) 3171 3172 ENTRY(smap_enable) 3173 nop 3174 nop 3175 nop 3176 ret 3177 SET_SIZE(smap_enable) 3178 3179 #endif /* __amd64 || __i386 */ 3180 3181 #endif /* __lint */ 3182 3183 #ifndef __lint 3184 3185 .data 3186 .align 4 3187 .globl _smap_enable_patch_count 3188 .type _smap_enable_patch_count,@object 3189 .size _smap_enable_patch_count, 4 3190 _smap_enable_patch_count: 3191 .long SMAP_ENABLE_COUNT 3192 3193 .globl _smap_disable_patch_count 3194 .type _smap_disable_patch_count,@object 3195 .size _smap_disable_patch_count, 4 3196 _smap_disable_patch_count: 3197 .long SMAP_DISABLE_COUNT 3198 3199 #endif /* __lint */