1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 2009, Intel Corporation 28 * All rights reserved. 29 */ 30 31 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 33 /* All Rights Reserved */ 34 35 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 36 /* All Rights Reserved */ 37 38 /* 39 * Copyright 2019 Joyent, Inc. 40 */ 41 42 #include <sys/errno.h> 43 #include <sys/asm_linkage.h> 44 45 #if defined(__lint) 46 #include <sys/types.h> 47 #include <sys/systm.h> 48 #else /* __lint */ 49 #include "assym.h" 50 #endif /* __lint */ 51 52 #define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ 53 #define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ 54 /* 55 * Non-temopral access (NTA) alignment requirement 56 */ 57 #define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */ 58 #define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1) 59 #define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */ 60 #define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1) 61 62 /* 63 * With the introduction of Broadwell, Intel has introduced supervisor mode 64 * access protection -- SMAP. SMAP forces the kernel to set certain bits to 65 * enable access of user pages (AC in rflags, defines as PS_ACHK in 66 * <sys/psw.h>). One of the challenges is that the implementation of many of the 67 * userland copy routines directly use the kernel ones. For example, copyin and 68 * copyout simply go and jump to the do_copy_fault label and traditionally let 69 * those deal with the return for them. In fact, changing that is a can of frame 70 * pointers. 71 * 72 * Rules and Constraints: 73 * 74 * 1. For anything that's not in copy.s, we have it do explicit calls to the 75 * smap related code. It usually is in a position where it is able to. This is 76 * restricted to the following three places: DTrace, resume() in swtch.s and 77 * on_fault/no_fault. If you want to add it somewhere else, we should be 78 * thinking twice. 79 * 80 * 2. We try to toggle this at the smallest window possible. This means that if 81 * we take a fault, need to try to use a copyop in copyin() or copyout(), or any 82 * other function, we will always leave with SMAP enabled (the kernel cannot 83 * access user pages). 84 * 85 * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are 86 * explicitly only allowed to be called while in an on_fault()/no_fault() handler, 87 * which already takes care of ensuring that SMAP is enabled and disabled. Note 88 * this means that when under an on_fault()/no_fault() handler, one must not 89 * call the non-*_noeer() routines. 90 * 91 * 4. The first thing we should do after coming out of an lofault handler is to 92 * make sure that we call smap_enable again to ensure that we are safely 93 * protected, as more often than not, we will have disabled smap to get there. 94 * 95 * 5. The SMAP functions, smap_enable and smap_disable may not touch any 96 * registers beyond those done by the call and ret. These routines may be called 97 * from arbitrary contexts in copy.s where we have slightly more special ABIs in 98 * place. 99 * 100 * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and 101 * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and 102 * smap_disable()). If the number of these is changed, you must update the 103 * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below. 104 * 105 * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is 106 * no known technical reason preventing it from being enabled. 107 * 108 * 8. Generally this .s file is processed by a K&R style cpp. This means that it 109 * really has a lot of feelings about whitespace. In particular, if you have a 110 * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'. 111 * 112 * 9. The smap_enable and smap_disable functions should not generally be called. 113 * They exist such that DTrace and on_trap() may use them, that's it. 114 * 115 * 10. In general, the kernel has its own value for rflags that gets used. This 116 * is maintained in a few different places which vary based on how the thread 117 * comes into existence and whether it's a user thread. In general, when the 118 * kernel takes a trap, it always will set ourselves to a known set of flags, 119 * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that 120 * PS_ACHK is cleared for us. In addition, when using the sysenter instruction, 121 * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for 122 * where that gets masked off. 123 */ 124 125 /* 126 * The optimal 64-bit bcopy and kcopy for modern x86 processors uses 127 * "rep smovq" for large sizes. Performance data shows that many calls to 128 * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for 129 * these small sizes unrolled code is used. For medium sizes loops writing 130 * 64-bytes per loop are used. Transition points were determined experimentally. 131 */ 132 #define BZERO_USE_REP (1024) 133 #define BCOPY_DFLT_REP (128) 134 #define BCOPY_NHM_REP (768) 135 136 /* 137 * Copy a block of storage, returning an error code if `from' or 138 * `to' takes a kernel pagefault which cannot be resolved. 139 * Returns errno value on pagefault error, 0 if all ok 140 */ 141 142 /* 143 * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to 144 * additional call instructions. 145 */ 146 #if defined(__amd64) 147 #define SMAP_DISABLE_COUNT 16 148 #define SMAP_ENABLE_COUNT 26 149 #elif defined(__i386) 150 #define SMAP_DISABLE_COUNT 0 151 #define SMAP_ENABLE_COUNT 0 152 #endif 153 154 #define SMAP_DISABLE_INSTR(ITER) \ 155 .globl _smap_disable_patch_/**/ITER; \ 156 _smap_disable_patch_/**/ITER/**/:; \ 157 nop; nop; nop; 158 159 #define SMAP_ENABLE_INSTR(ITER) \ 160 .globl _smap_enable_patch_/**/ITER; \ 161 _smap_enable_patch_/**/ITER/**/:; \ 162 nop; nop; nop; 163 164 #if defined(__lint) 165 166 /* ARGSUSED */ 167 int 168 kcopy(const void *from, void *to, size_t count) 169 { return (0); } 170 171 #else /* __lint */ 172 173 .globl kernelbase 174 .globl postbootkernelbase 175 176 #if defined(__amd64) 177 178 ENTRY(kcopy) 179 pushq %rbp 180 movq %rsp, %rbp 181 #ifdef DEBUG 182 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 183 jb 0f 184 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 185 jnb 1f 186 0: leaq .kcopy_panic_msg(%rip), %rdi 187 xorl %eax, %eax 188 call panic 189 1: 190 #endif 191 /* 192 * pass lofault value as 4th argument to do_copy_fault 193 */ 194 leaq _kcopy_copyerr(%rip), %rcx 195 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 196 197 do_copy_fault: 198 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 199 movq %rcx, T_LOFAULT(%r9) /* new lofault */ 200 call bcopy_altentry 201 xorl %eax, %eax /* return 0 (success) */ 202 SMAP_ENABLE_INSTR(0) 203 204 /* 205 * A fault during do_copy_fault is indicated through an errno value 206 * in %rax and we iretq from the trap handler to here. 207 */ 208 _kcopy_copyerr: 209 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 210 leave 211 ret 212 SET_SIZE(kcopy) 213 214 #elif defined(__i386) 215 216 #define ARG_FROM 8 217 #define ARG_TO 12 218 #define ARG_COUNT 16 219 220 ENTRY(kcopy) 221 #ifdef DEBUG 222 pushl %ebp 223 movl %esp, %ebp 224 movl postbootkernelbase, %eax 225 cmpl %eax, ARG_FROM(%ebp) 226 jb 0f 227 cmpl %eax, ARG_TO(%ebp) 228 jnb 1f 229 0: pushl $.kcopy_panic_msg 230 call panic 231 1: popl %ebp 232 #endif 233 lea _kcopy_copyerr, %eax /* lofault value */ 234 movl %gs:CPU_THREAD, %edx 235 236 do_copy_fault: 237 pushl %ebp 238 movl %esp, %ebp /* setup stack frame */ 239 pushl %esi 240 pushl %edi /* save registers */ 241 242 movl T_LOFAULT(%edx), %edi 243 pushl %edi /* save the current lofault */ 244 movl %eax, T_LOFAULT(%edx) /* new lofault */ 245 246 movl ARG_COUNT(%ebp), %ecx 247 movl ARG_FROM(%ebp), %esi 248 movl ARG_TO(%ebp), %edi 249 shrl $2, %ecx /* word count */ 250 rep 251 smovl 252 movl ARG_COUNT(%ebp), %ecx 253 andl $3, %ecx /* bytes left over */ 254 rep 255 smovb 256 xorl %eax, %eax 257 258 /* 259 * A fault during do_copy_fault is indicated through an errno value 260 * in %eax and we iret from the trap handler to here. 261 */ 262 _kcopy_copyerr: 263 popl %ecx 264 popl %edi 265 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */ 266 popl %esi 267 popl %ebp 268 ret 269 SET_SIZE(kcopy) 270 271 #undef ARG_FROM 272 #undef ARG_TO 273 #undef ARG_COUNT 274 275 #endif /* __i386 */ 276 #endif /* __lint */ 277 278 #if defined(__lint) 279 280 /* 281 * Copy a block of storage. Similar to kcopy but uses non-temporal 282 * instructions. 283 */ 284 285 /* ARGSUSED */ 286 int 287 kcopy_nta(const void *from, void *to, size_t count, int copy_cached) 288 { return (0); } 289 290 #else /* __lint */ 291 292 #if defined(__amd64) 293 294 #define COPY_LOOP_INIT(src, dst, cnt) \ 295 addq cnt, src; \ 296 addq cnt, dst; \ 297 shrq $3, cnt; \ 298 neg cnt 299 300 /* Copy 16 bytes per loop. Uses %rax and %r8 */ 301 #define COPY_LOOP_BODY(src, dst, cnt) \ 302 prefetchnta 0x100(src, cnt, 8); \ 303 movq (src, cnt, 8), %rax; \ 304 movq 0x8(src, cnt, 8), %r8; \ 305 movnti %rax, (dst, cnt, 8); \ 306 movnti %r8, 0x8(dst, cnt, 8); \ 307 addq $2, cnt 308 309 ENTRY(kcopy_nta) 310 pushq %rbp 311 movq %rsp, %rbp 312 #ifdef DEBUG 313 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 314 jb 0f 315 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 316 jnb 1f 317 0: leaq .kcopy_panic_msg(%rip), %rdi 318 xorl %eax, %eax 319 call panic 320 1: 321 #endif 322 323 movq %gs:CPU_THREAD, %r9 324 cmpq $0, %rcx /* No non-temporal access? */ 325 /* 326 * pass lofault value as 4th argument to do_copy_fault 327 */ 328 leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */ 329 jnz do_copy_fault /* use regular access */ 330 /* 331 * Make sure cnt is >= KCOPY_MIN_SIZE 332 */ 333 cmpq $KCOPY_MIN_SIZE, %rdx 334 jb do_copy_fault 335 336 /* 337 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 338 * count is COUNT_ALIGN_SIZE aligned. 339 */ 340 movq %rdi, %r10 341 orq %rsi, %r10 342 andq $NTA_ALIGN_MASK, %r10 343 orq %rdx, %r10 344 andq $COUNT_ALIGN_MASK, %r10 345 jnz do_copy_fault 346 347 ALTENTRY(do_copy_fault_nta) 348 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 349 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 350 movq %rcx, T_LOFAULT(%r9) /* new lofault */ 351 352 /* 353 * COPY_LOOP_BODY uses %rax and %r8 354 */ 355 COPY_LOOP_INIT(%rdi, %rsi, %rdx) 356 2: COPY_LOOP_BODY(%rdi, %rsi, %rdx) 357 jnz 2b 358 359 mfence 360 xorl %eax, %eax /* return 0 (success) */ 361 SMAP_ENABLE_INSTR(1) 362 363 _kcopy_nta_copyerr: 364 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 365 leave 366 ret 367 SET_SIZE(do_copy_fault_nta) 368 SET_SIZE(kcopy_nta) 369 370 #elif defined(__i386) 371 372 #define ARG_FROM 8 373 #define ARG_TO 12 374 #define ARG_COUNT 16 375 376 #define COPY_LOOP_INIT(src, dst, cnt) \ 377 addl cnt, src; \ 378 addl cnt, dst; \ 379 shrl $3, cnt; \ 380 neg cnt 381 382 #define COPY_LOOP_BODY(src, dst, cnt) \ 383 prefetchnta 0x100(src, cnt, 8); \ 384 movl (src, cnt, 8), %esi; \ 385 movnti %esi, (dst, cnt, 8); \ 386 movl 0x4(src, cnt, 8), %esi; \ 387 movnti %esi, 0x4(dst, cnt, 8); \ 388 movl 0x8(src, cnt, 8), %esi; \ 389 movnti %esi, 0x8(dst, cnt, 8); \ 390 movl 0xc(src, cnt, 8), %esi; \ 391 movnti %esi, 0xc(dst, cnt, 8); \ 392 addl $2, cnt 393 394 /* 395 * kcopy_nta is not implemented for 32-bit as no performance 396 * improvement was shown. We simply jump directly to kcopy 397 * and discard the 4 arguments. 398 */ 399 ENTRY(kcopy_nta) 400 jmp kcopy 401 402 lea _kcopy_nta_copyerr, %eax /* lofault value */ 403 ALTENTRY(do_copy_fault_nta) 404 pushl %ebp 405 movl %esp, %ebp /* setup stack frame */ 406 pushl %esi 407 pushl %edi 408 409 movl %gs:CPU_THREAD, %edx 410 movl T_LOFAULT(%edx), %edi 411 pushl %edi /* save the current lofault */ 412 movl %eax, T_LOFAULT(%edx) /* new lofault */ 413 414 /* COPY_LOOP_BODY needs to use %esi */ 415 movl ARG_COUNT(%ebp), %ecx 416 movl ARG_FROM(%ebp), %edi 417 movl ARG_TO(%ebp), %eax 418 COPY_LOOP_INIT(%edi, %eax, %ecx) 419 1: COPY_LOOP_BODY(%edi, %eax, %ecx) 420 jnz 1b 421 mfence 422 423 xorl %eax, %eax 424 _kcopy_nta_copyerr: 425 popl %ecx 426 popl %edi 427 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */ 428 popl %esi 429 leave 430 ret 431 SET_SIZE(do_copy_fault_nta) 432 SET_SIZE(kcopy_nta) 433 434 #undef ARG_FROM 435 #undef ARG_TO 436 #undef ARG_COUNT 437 438 #endif /* __i386 */ 439 #endif /* __lint */ 440 441 #if defined(__lint) 442 443 /* ARGSUSED */ 444 void 445 bcopy(const void *from, void *to, size_t count) 446 {} 447 448 #else /* __lint */ 449 450 #if defined(__amd64) 451 452 ENTRY(bcopy) 453 #ifdef DEBUG 454 orq %rdx, %rdx /* %rdx = count */ 455 jz 1f 456 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 457 jb 0f 458 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 459 jnb 1f 460 0: leaq .bcopy_panic_msg(%rip), %rdi 461 jmp call_panic /* setup stack and call panic */ 462 1: 463 #endif 464 /* 465 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault. 466 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy 467 * uses these registers in future they must be saved and restored. 468 */ 469 ALTENTRY(bcopy_altentry) 470 do_copy: 471 #define L(s) .bcopy/**/s 472 cmpq $0x50, %rdx /* 80 */ 473 jae bcopy_ck_size 474 475 /* 476 * Performance data shows many caller's copy small buffers. So for 477 * best perf for these sizes unrolled code is used. Store data without 478 * worrying about alignment. 479 */ 480 leaq L(fwdPxQx)(%rip), %r10 481 addq %rdx, %rdi 482 addq %rdx, %rsi 483 movslq (%r10,%rdx,4), %rcx 484 leaq (%rcx,%r10,1), %r10 485 INDIRECT_JMP_REG(r10) 486 487 .p2align 4 488 L(fwdPxQx): 489 .int L(P0Q0)-L(fwdPxQx) /* 0 */ 490 .int L(P1Q0)-L(fwdPxQx) 491 .int L(P2Q0)-L(fwdPxQx) 492 .int L(P3Q0)-L(fwdPxQx) 493 .int L(P4Q0)-L(fwdPxQx) 494 .int L(P5Q0)-L(fwdPxQx) 495 .int L(P6Q0)-L(fwdPxQx) 496 .int L(P7Q0)-L(fwdPxQx) 497 498 .int L(P0Q1)-L(fwdPxQx) /* 8 */ 499 .int L(P1Q1)-L(fwdPxQx) 500 .int L(P2Q1)-L(fwdPxQx) 501 .int L(P3Q1)-L(fwdPxQx) 502 .int L(P4Q1)-L(fwdPxQx) 503 .int L(P5Q1)-L(fwdPxQx) 504 .int L(P6Q1)-L(fwdPxQx) 505 .int L(P7Q1)-L(fwdPxQx) 506 507 .int L(P0Q2)-L(fwdPxQx) /* 16 */ 508 .int L(P1Q2)-L(fwdPxQx) 509 .int L(P2Q2)-L(fwdPxQx) 510 .int L(P3Q2)-L(fwdPxQx) 511 .int L(P4Q2)-L(fwdPxQx) 512 .int L(P5Q2)-L(fwdPxQx) 513 .int L(P6Q2)-L(fwdPxQx) 514 .int L(P7Q2)-L(fwdPxQx) 515 516 .int L(P0Q3)-L(fwdPxQx) /* 24 */ 517 .int L(P1Q3)-L(fwdPxQx) 518 .int L(P2Q3)-L(fwdPxQx) 519 .int L(P3Q3)-L(fwdPxQx) 520 .int L(P4Q3)-L(fwdPxQx) 521 .int L(P5Q3)-L(fwdPxQx) 522 .int L(P6Q3)-L(fwdPxQx) 523 .int L(P7Q3)-L(fwdPxQx) 524 525 .int L(P0Q4)-L(fwdPxQx) /* 32 */ 526 .int L(P1Q4)-L(fwdPxQx) 527 .int L(P2Q4)-L(fwdPxQx) 528 .int L(P3Q4)-L(fwdPxQx) 529 .int L(P4Q4)-L(fwdPxQx) 530 .int L(P5Q4)-L(fwdPxQx) 531 .int L(P6Q4)-L(fwdPxQx) 532 .int L(P7Q4)-L(fwdPxQx) 533 534 .int L(P0Q5)-L(fwdPxQx) /* 40 */ 535 .int L(P1Q5)-L(fwdPxQx) 536 .int L(P2Q5)-L(fwdPxQx) 537 .int L(P3Q5)-L(fwdPxQx) 538 .int L(P4Q5)-L(fwdPxQx) 539 .int L(P5Q5)-L(fwdPxQx) 540 .int L(P6Q5)-L(fwdPxQx) 541 .int L(P7Q5)-L(fwdPxQx) 542 543 .int L(P0Q6)-L(fwdPxQx) /* 48 */ 544 .int L(P1Q6)-L(fwdPxQx) 545 .int L(P2Q6)-L(fwdPxQx) 546 .int L(P3Q6)-L(fwdPxQx) 547 .int L(P4Q6)-L(fwdPxQx) 548 .int L(P5Q6)-L(fwdPxQx) 549 .int L(P6Q6)-L(fwdPxQx) 550 .int L(P7Q6)-L(fwdPxQx) 551 552 .int L(P0Q7)-L(fwdPxQx) /* 56 */ 553 .int L(P1Q7)-L(fwdPxQx) 554 .int L(P2Q7)-L(fwdPxQx) 555 .int L(P3Q7)-L(fwdPxQx) 556 .int L(P4Q7)-L(fwdPxQx) 557 .int L(P5Q7)-L(fwdPxQx) 558 .int L(P6Q7)-L(fwdPxQx) 559 .int L(P7Q7)-L(fwdPxQx) 560 561 .int L(P0Q8)-L(fwdPxQx) /* 64 */ 562 .int L(P1Q8)-L(fwdPxQx) 563 .int L(P2Q8)-L(fwdPxQx) 564 .int L(P3Q8)-L(fwdPxQx) 565 .int L(P4Q8)-L(fwdPxQx) 566 .int L(P5Q8)-L(fwdPxQx) 567 .int L(P6Q8)-L(fwdPxQx) 568 .int L(P7Q8)-L(fwdPxQx) 569 570 .int L(P0Q9)-L(fwdPxQx) /* 72 */ 571 .int L(P1Q9)-L(fwdPxQx) 572 .int L(P2Q9)-L(fwdPxQx) 573 .int L(P3Q9)-L(fwdPxQx) 574 .int L(P4Q9)-L(fwdPxQx) 575 .int L(P5Q9)-L(fwdPxQx) 576 .int L(P6Q9)-L(fwdPxQx) 577 .int L(P7Q9)-L(fwdPxQx) /* 79 */ 578 579 .p2align 4 580 L(P0Q9): 581 mov -0x48(%rdi), %rcx 582 mov %rcx, -0x48(%rsi) 583 L(P0Q8): 584 mov -0x40(%rdi), %r10 585 mov %r10, -0x40(%rsi) 586 L(P0Q7): 587 mov -0x38(%rdi), %r8 588 mov %r8, -0x38(%rsi) 589 L(P0Q6): 590 mov -0x30(%rdi), %rcx 591 mov %rcx, -0x30(%rsi) 592 L(P0Q5): 593 mov -0x28(%rdi), %r10 594 mov %r10, -0x28(%rsi) 595 L(P0Q4): 596 mov -0x20(%rdi), %r8 597 mov %r8, -0x20(%rsi) 598 L(P0Q3): 599 mov -0x18(%rdi), %rcx 600 mov %rcx, -0x18(%rsi) 601 L(P0Q2): 602 mov -0x10(%rdi), %r10 603 mov %r10, -0x10(%rsi) 604 L(P0Q1): 605 mov -0x8(%rdi), %r8 606 mov %r8, -0x8(%rsi) 607 L(P0Q0): 608 ret 609 610 .p2align 4 611 L(P1Q9): 612 mov -0x49(%rdi), %r8 613 mov %r8, -0x49(%rsi) 614 L(P1Q8): 615 mov -0x41(%rdi), %rcx 616 mov %rcx, -0x41(%rsi) 617 L(P1Q7): 618 mov -0x39(%rdi), %r10 619 mov %r10, -0x39(%rsi) 620 L(P1Q6): 621 mov -0x31(%rdi), %r8 622 mov %r8, -0x31(%rsi) 623 L(P1Q5): 624 mov -0x29(%rdi), %rcx 625 mov %rcx, -0x29(%rsi) 626 L(P1Q4): 627 mov -0x21(%rdi), %r10 628 mov %r10, -0x21(%rsi) 629 L(P1Q3): 630 mov -0x19(%rdi), %r8 631 mov %r8, -0x19(%rsi) 632 L(P1Q2): 633 mov -0x11(%rdi), %rcx 634 mov %rcx, -0x11(%rsi) 635 L(P1Q1): 636 mov -0x9(%rdi), %r10 637 mov %r10, -0x9(%rsi) 638 L(P1Q0): 639 movzbq -0x1(%rdi), %r8 640 mov %r8b, -0x1(%rsi) 641 ret 642 643 .p2align 4 644 L(P2Q9): 645 mov -0x4a(%rdi), %r8 646 mov %r8, -0x4a(%rsi) 647 L(P2Q8): 648 mov -0x42(%rdi), %rcx 649 mov %rcx, -0x42(%rsi) 650 L(P2Q7): 651 mov -0x3a(%rdi), %r10 652 mov %r10, -0x3a(%rsi) 653 L(P2Q6): 654 mov -0x32(%rdi), %r8 655 mov %r8, -0x32(%rsi) 656 L(P2Q5): 657 mov -0x2a(%rdi), %rcx 658 mov %rcx, -0x2a(%rsi) 659 L(P2Q4): 660 mov -0x22(%rdi), %r10 661 mov %r10, -0x22(%rsi) 662 L(P2Q3): 663 mov -0x1a(%rdi), %r8 664 mov %r8, -0x1a(%rsi) 665 L(P2Q2): 666 mov -0x12(%rdi), %rcx 667 mov %rcx, -0x12(%rsi) 668 L(P2Q1): 669 mov -0xa(%rdi), %r10 670 mov %r10, -0xa(%rsi) 671 L(P2Q0): 672 movzwq -0x2(%rdi), %r8 673 mov %r8w, -0x2(%rsi) 674 ret 675 676 .p2align 4 677 L(P3Q9): 678 mov -0x4b(%rdi), %r8 679 mov %r8, -0x4b(%rsi) 680 L(P3Q8): 681 mov -0x43(%rdi), %rcx 682 mov %rcx, -0x43(%rsi) 683 L(P3Q7): 684 mov -0x3b(%rdi), %r10 685 mov %r10, -0x3b(%rsi) 686 L(P3Q6): 687 mov -0x33(%rdi), %r8 688 mov %r8, -0x33(%rsi) 689 L(P3Q5): 690 mov -0x2b(%rdi), %rcx 691 mov %rcx, -0x2b(%rsi) 692 L(P3Q4): 693 mov -0x23(%rdi), %r10 694 mov %r10, -0x23(%rsi) 695 L(P3Q3): 696 mov -0x1b(%rdi), %r8 697 mov %r8, -0x1b(%rsi) 698 L(P3Q2): 699 mov -0x13(%rdi), %rcx 700 mov %rcx, -0x13(%rsi) 701 L(P3Q1): 702 mov -0xb(%rdi), %r10 703 mov %r10, -0xb(%rsi) 704 /* 705 * These trailing loads/stores have to do all their loads 1st, 706 * then do the stores. 707 */ 708 L(P3Q0): 709 movzwq -0x3(%rdi), %r8 710 movzbq -0x1(%rdi), %r10 711 mov %r8w, -0x3(%rsi) 712 mov %r10b, -0x1(%rsi) 713 ret 714 715 .p2align 4 716 L(P4Q9): 717 mov -0x4c(%rdi), %r8 718 mov %r8, -0x4c(%rsi) 719 L(P4Q8): 720 mov -0x44(%rdi), %rcx 721 mov %rcx, -0x44(%rsi) 722 L(P4Q7): 723 mov -0x3c(%rdi), %r10 724 mov %r10, -0x3c(%rsi) 725 L(P4Q6): 726 mov -0x34(%rdi), %r8 727 mov %r8, -0x34(%rsi) 728 L(P4Q5): 729 mov -0x2c(%rdi), %rcx 730 mov %rcx, -0x2c(%rsi) 731 L(P4Q4): 732 mov -0x24(%rdi), %r10 733 mov %r10, -0x24(%rsi) 734 L(P4Q3): 735 mov -0x1c(%rdi), %r8 736 mov %r8, -0x1c(%rsi) 737 L(P4Q2): 738 mov -0x14(%rdi), %rcx 739 mov %rcx, -0x14(%rsi) 740 L(P4Q1): 741 mov -0xc(%rdi), %r10 742 mov %r10, -0xc(%rsi) 743 L(P4Q0): 744 mov -0x4(%rdi), %r8d 745 mov %r8d, -0x4(%rsi) 746 ret 747 748 .p2align 4 749 L(P5Q9): 750 mov -0x4d(%rdi), %r8 751 mov %r8, -0x4d(%rsi) 752 L(P5Q8): 753 mov -0x45(%rdi), %rcx 754 mov %rcx, -0x45(%rsi) 755 L(P5Q7): 756 mov -0x3d(%rdi), %r10 757 mov %r10, -0x3d(%rsi) 758 L(P5Q6): 759 mov -0x35(%rdi), %r8 760 mov %r8, -0x35(%rsi) 761 L(P5Q5): 762 mov -0x2d(%rdi), %rcx 763 mov %rcx, -0x2d(%rsi) 764 L(P5Q4): 765 mov -0x25(%rdi), %r10 766 mov %r10, -0x25(%rsi) 767 L(P5Q3): 768 mov -0x1d(%rdi), %r8 769 mov %r8, -0x1d(%rsi) 770 L(P5Q2): 771 mov -0x15(%rdi), %rcx 772 mov %rcx, -0x15(%rsi) 773 L(P5Q1): 774 mov -0xd(%rdi), %r10 775 mov %r10, -0xd(%rsi) 776 L(P5Q0): 777 mov -0x5(%rdi), %r8d 778 movzbq -0x1(%rdi), %r10 779 mov %r8d, -0x5(%rsi) 780 mov %r10b, -0x1(%rsi) 781 ret 782 783 .p2align 4 784 L(P6Q9): 785 mov -0x4e(%rdi), %r8 786 mov %r8, -0x4e(%rsi) 787 L(P6Q8): 788 mov -0x46(%rdi), %rcx 789 mov %rcx, -0x46(%rsi) 790 L(P6Q7): 791 mov -0x3e(%rdi), %r10 792 mov %r10, -0x3e(%rsi) 793 L(P6Q6): 794 mov -0x36(%rdi), %r8 795 mov %r8, -0x36(%rsi) 796 L(P6Q5): 797 mov -0x2e(%rdi), %rcx 798 mov %rcx, -0x2e(%rsi) 799 L(P6Q4): 800 mov -0x26(%rdi), %r10 801 mov %r10, -0x26(%rsi) 802 L(P6Q3): 803 mov -0x1e(%rdi), %r8 804 mov %r8, -0x1e(%rsi) 805 L(P6Q2): 806 mov -0x16(%rdi), %rcx 807 mov %rcx, -0x16(%rsi) 808 L(P6Q1): 809 mov -0xe(%rdi), %r10 810 mov %r10, -0xe(%rsi) 811 L(P6Q0): 812 mov -0x6(%rdi), %r8d 813 movzwq -0x2(%rdi), %r10 814 mov %r8d, -0x6(%rsi) 815 mov %r10w, -0x2(%rsi) 816 ret 817 818 .p2align 4 819 L(P7Q9): 820 mov -0x4f(%rdi), %r8 821 mov %r8, -0x4f(%rsi) 822 L(P7Q8): 823 mov -0x47(%rdi), %rcx 824 mov %rcx, -0x47(%rsi) 825 L(P7Q7): 826 mov -0x3f(%rdi), %r10 827 mov %r10, -0x3f(%rsi) 828 L(P7Q6): 829 mov -0x37(%rdi), %r8 830 mov %r8, -0x37(%rsi) 831 L(P7Q5): 832 mov -0x2f(%rdi), %rcx 833 mov %rcx, -0x2f(%rsi) 834 L(P7Q4): 835 mov -0x27(%rdi), %r10 836 mov %r10, -0x27(%rsi) 837 L(P7Q3): 838 mov -0x1f(%rdi), %r8 839 mov %r8, -0x1f(%rsi) 840 L(P7Q2): 841 mov -0x17(%rdi), %rcx 842 mov %rcx, -0x17(%rsi) 843 L(P7Q1): 844 mov -0xf(%rdi), %r10 845 mov %r10, -0xf(%rsi) 846 L(P7Q0): 847 mov -0x7(%rdi), %r8d 848 movzwq -0x3(%rdi), %r10 849 movzbq -0x1(%rdi), %rcx 850 mov %r8d, -0x7(%rsi) 851 mov %r10w, -0x3(%rsi) 852 mov %cl, -0x1(%rsi) 853 ret 854 855 /* 856 * For large sizes rep smovq is fastest. 857 * Transition point determined experimentally as measured on 858 * Intel Xeon processors (incl. Nehalem and previous generations) and 859 * AMD Opteron. The transition value is patched at boot time to avoid 860 * memory reference hit. 861 */ 862 .globl bcopy_patch_start 863 bcopy_patch_start: 864 cmpq $BCOPY_NHM_REP, %rdx 865 .globl bcopy_patch_end 866 bcopy_patch_end: 867 868 .p2align 4 869 ALTENTRY(bcopy_ck_size) 870 871 cmpq $BCOPY_DFLT_REP, %rdx 872 jae L(use_rep) 873 874 /* 875 * Align to a 8-byte boundary. Avoids penalties from unaligned stores 876 * as well as from stores spanning cachelines. 877 */ 878 test $0x7, %rsi 879 jz L(aligned_loop) 880 test $0x1, %rsi 881 jz 2f 882 movzbq (%rdi), %r8 883 dec %rdx 884 inc %rdi 885 mov %r8b, (%rsi) 886 inc %rsi 887 2: 888 test $0x2, %rsi 889 jz 4f 890 movzwq (%rdi), %r8 891 sub $0x2, %rdx 892 add $0x2, %rdi 893 mov %r8w, (%rsi) 894 add $0x2, %rsi 895 4: 896 test $0x4, %rsi 897 jz L(aligned_loop) 898 mov (%rdi), %r8d 899 sub $0x4, %rdx 900 add $0x4, %rdi 901 mov %r8d, (%rsi) 902 add $0x4, %rsi 903 904 /* 905 * Copy 64-bytes per loop 906 */ 907 .p2align 4 908 L(aligned_loop): 909 mov (%rdi), %r8 910 mov 0x8(%rdi), %r10 911 lea -0x40(%rdx), %rdx 912 mov %r8, (%rsi) 913 mov %r10, 0x8(%rsi) 914 mov 0x10(%rdi), %rcx 915 mov 0x18(%rdi), %r8 916 mov %rcx, 0x10(%rsi) 917 mov %r8, 0x18(%rsi) 918 919 cmp $0x40, %rdx 920 mov 0x20(%rdi), %r10 921 mov 0x28(%rdi), %rcx 922 mov %r10, 0x20(%rsi) 923 mov %rcx, 0x28(%rsi) 924 mov 0x30(%rdi), %r8 925 mov 0x38(%rdi), %r10 926 lea 0x40(%rdi), %rdi 927 mov %r8, 0x30(%rsi) 928 mov %r10, 0x38(%rsi) 929 lea 0x40(%rsi), %rsi 930 jae L(aligned_loop) 931 932 /* 933 * Copy remaining bytes (0-63) 934 */ 935 L(do_remainder): 936 leaq L(fwdPxQx)(%rip), %r10 937 addq %rdx, %rdi 938 addq %rdx, %rsi 939 movslq (%r10,%rdx,4), %rcx 940 leaq (%rcx,%r10,1), %r10 941 INDIRECT_JMP_REG(r10) 942 943 /* 944 * Use rep smovq. Clear remainder via unrolled code 945 */ 946 .p2align 4 947 L(use_rep): 948 xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */ 949 movq %rdx, %rcx /* %rcx = count */ 950 shrq $3, %rcx /* 8-byte word count */ 951 rep 952 smovq 953 954 xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */ 955 andq $7, %rdx /* remainder */ 956 jnz L(do_remainder) 957 ret 958 #undef L 959 SET_SIZE(bcopy_ck_size) 960 961 #ifdef DEBUG 962 /* 963 * Setup frame on the run-time stack. The end of the input argument 964 * area must be aligned on a 16 byte boundary. The stack pointer %rsp, 965 * always points to the end of the latest allocated stack frame. 966 * panic(const char *format, ...) is a varargs function. When a 967 * function taking variable arguments is called, %rax must be set 968 * to eight times the number of floating point parameters passed 969 * to the function in SSE registers. 970 */ 971 call_panic: 972 pushq %rbp /* align stack properly */ 973 movq %rsp, %rbp 974 xorl %eax, %eax /* no variable arguments */ 975 call panic /* %rdi = format string */ 976 #endif 977 SET_SIZE(bcopy_altentry) 978 SET_SIZE(bcopy) 979 980 #elif defined(__i386) 981 982 #define ARG_FROM 4 983 #define ARG_TO 8 984 #define ARG_COUNT 12 985 986 ENTRY(bcopy) 987 #ifdef DEBUG 988 movl ARG_COUNT(%esp), %eax 989 orl %eax, %eax 990 jz 1f 991 movl postbootkernelbase, %eax 992 cmpl %eax, ARG_FROM(%esp) 993 jb 0f 994 cmpl %eax, ARG_TO(%esp) 995 jnb 1f 996 0: pushl %ebp 997 movl %esp, %ebp 998 pushl $.bcopy_panic_msg 999 call panic 1000 1: 1001 #endif 1002 do_copy: 1003 movl %esi, %eax /* save registers */ 1004 movl %edi, %edx 1005 movl ARG_COUNT(%esp), %ecx 1006 movl ARG_FROM(%esp), %esi 1007 movl ARG_TO(%esp), %edi 1008 1009 shrl $2, %ecx /* word count */ 1010 rep 1011 smovl 1012 movl ARG_COUNT(%esp), %ecx 1013 andl $3, %ecx /* bytes left over */ 1014 rep 1015 smovb 1016 movl %eax, %esi /* restore registers */ 1017 movl %edx, %edi 1018 ret 1019 SET_SIZE(bcopy) 1020 1021 #undef ARG_COUNT 1022 #undef ARG_FROM 1023 #undef ARG_TO 1024 1025 #endif /* __i386 */ 1026 #endif /* __lint */ 1027 1028 1029 /* 1030 * Zero a block of storage, returning an error code if we 1031 * take a kernel pagefault which cannot be resolved. 1032 * Returns errno value on pagefault error, 0 if all ok 1033 */ 1034 1035 #if defined(__lint) 1036 1037 /* ARGSUSED */ 1038 int 1039 kzero(void *addr, size_t count) 1040 { return (0); } 1041 1042 #else /* __lint */ 1043 1044 #if defined(__amd64) 1045 1046 ENTRY(kzero) 1047 #ifdef DEBUG 1048 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ 1049 jnb 0f 1050 leaq .kzero_panic_msg(%rip), %rdi 1051 jmp call_panic /* setup stack and call panic */ 1052 0: 1053 #endif 1054 /* 1055 * pass lofault value as 3rd argument for fault return 1056 */ 1057 leaq _kzeroerr(%rip), %rdx 1058 1059 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 1060 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 1061 movq %rdx, T_LOFAULT(%r9) /* new lofault */ 1062 call bzero_altentry 1063 xorl %eax, %eax 1064 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 1065 ret 1066 /* 1067 * A fault during bzero is indicated through an errno value 1068 * in %rax when we iretq to here. 1069 */ 1070 _kzeroerr: 1071 addq $8, %rsp /* pop bzero_altentry call ret addr */ 1072 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 1073 ret 1074 SET_SIZE(kzero) 1075 1076 #elif defined(__i386) 1077 1078 #define ARG_ADDR 8 1079 #define ARG_COUNT 12 1080 1081 ENTRY(kzero) 1082 #ifdef DEBUG 1083 pushl %ebp 1084 movl %esp, %ebp 1085 movl postbootkernelbase, %eax 1086 cmpl %eax, ARG_ADDR(%ebp) 1087 jnb 0f 1088 pushl $.kzero_panic_msg 1089 call panic 1090 0: popl %ebp 1091 #endif 1092 lea _kzeroerr, %eax /* kzeroerr is lofault value */ 1093 1094 pushl %ebp /* save stack base */ 1095 movl %esp, %ebp /* set new stack base */ 1096 pushl %edi /* save %edi */ 1097 1098 mov %gs:CPU_THREAD, %edx 1099 movl T_LOFAULT(%edx), %edi 1100 pushl %edi /* save the current lofault */ 1101 movl %eax, T_LOFAULT(%edx) /* new lofault */ 1102 1103 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */ 1104 movl ARG_ADDR(%ebp), %edi /* %edi <- address of bytes to clear */ 1105 shrl $2, %ecx /* Count of double words to zero */ 1106 xorl %eax, %eax /* sstol val */ 1107 rep 1108 sstol /* %ecx contains words to clear (%eax=0) */ 1109 1110 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */ 1111 andl $3, %ecx /* do mod 4 */ 1112 rep 1113 sstob /* %ecx contains residual bytes to clear */ 1114 1115 /* 1116 * A fault during kzero is indicated through an errno value 1117 * in %eax when we iret to here. 1118 */ 1119 _kzeroerr: 1120 popl %edi 1121 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */ 1122 popl %edi 1123 popl %ebp 1124 ret 1125 SET_SIZE(kzero) 1126 1127 #undef ARG_ADDR 1128 #undef ARG_COUNT 1129 1130 #endif /* __i386 */ 1131 #endif /* __lint */ 1132 1133 /* 1134 * Zero a block of storage. 1135 */ 1136 1137 #if defined(__lint) 1138 1139 /* ARGSUSED */ 1140 void 1141 bzero(void *addr, size_t count) 1142 {} 1143 1144 #else /* __lint */ 1145 1146 #if defined(__amd64) 1147 1148 ENTRY(bzero) 1149 #ifdef DEBUG 1150 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ 1151 jnb 0f 1152 leaq .bzero_panic_msg(%rip), %rdi 1153 jmp call_panic /* setup stack and call panic */ 1154 0: 1155 #endif 1156 ALTENTRY(bzero_altentry) 1157 do_zero: 1158 #define L(s) .bzero/**/s 1159 xorl %eax, %eax 1160 1161 cmpq $0x50, %rsi /* 80 */ 1162 jae L(ck_align) 1163 1164 /* 1165 * Performance data shows many caller's are zeroing small buffers. So 1166 * for best perf for these sizes unrolled code is used. Store zeros 1167 * without worrying about alignment. 1168 */ 1169 leaq L(setPxQx)(%rip), %r10 1170 addq %rsi, %rdi 1171 movslq (%r10,%rsi,4), %rcx 1172 leaq (%rcx,%r10,1), %r10 1173 INDIRECT_JMP_REG(r10) 1174 1175 .p2align 4 1176 L(setPxQx): 1177 .int L(P0Q0)-L(setPxQx) /* 0 */ 1178 .int L(P1Q0)-L(setPxQx) 1179 .int L(P2Q0)-L(setPxQx) 1180 .int L(P3Q0)-L(setPxQx) 1181 .int L(P4Q0)-L(setPxQx) 1182 .int L(P5Q0)-L(setPxQx) 1183 .int L(P6Q0)-L(setPxQx) 1184 .int L(P7Q0)-L(setPxQx) 1185 1186 .int L(P0Q1)-L(setPxQx) /* 8 */ 1187 .int L(P1Q1)-L(setPxQx) 1188 .int L(P2Q1)-L(setPxQx) 1189 .int L(P3Q1)-L(setPxQx) 1190 .int L(P4Q1)-L(setPxQx) 1191 .int L(P5Q1)-L(setPxQx) 1192 .int L(P6Q1)-L(setPxQx) 1193 .int L(P7Q1)-L(setPxQx) 1194 1195 .int L(P0Q2)-L(setPxQx) /* 16 */ 1196 .int L(P1Q2)-L(setPxQx) 1197 .int L(P2Q2)-L(setPxQx) 1198 .int L(P3Q2)-L(setPxQx) 1199 .int L(P4Q2)-L(setPxQx) 1200 .int L(P5Q2)-L(setPxQx) 1201 .int L(P6Q2)-L(setPxQx) 1202 .int L(P7Q2)-L(setPxQx) 1203 1204 .int L(P0Q3)-L(setPxQx) /* 24 */ 1205 .int L(P1Q3)-L(setPxQx) 1206 .int L(P2Q3)-L(setPxQx) 1207 .int L(P3Q3)-L(setPxQx) 1208 .int L(P4Q3)-L(setPxQx) 1209 .int L(P5Q3)-L(setPxQx) 1210 .int L(P6Q3)-L(setPxQx) 1211 .int L(P7Q3)-L(setPxQx) 1212 1213 .int L(P0Q4)-L(setPxQx) /* 32 */ 1214 .int L(P1Q4)-L(setPxQx) 1215 .int L(P2Q4)-L(setPxQx) 1216 .int L(P3Q4)-L(setPxQx) 1217 .int L(P4Q4)-L(setPxQx) 1218 .int L(P5Q4)-L(setPxQx) 1219 .int L(P6Q4)-L(setPxQx) 1220 .int L(P7Q4)-L(setPxQx) 1221 1222 .int L(P0Q5)-L(setPxQx) /* 40 */ 1223 .int L(P1Q5)-L(setPxQx) 1224 .int L(P2Q5)-L(setPxQx) 1225 .int L(P3Q5)-L(setPxQx) 1226 .int L(P4Q5)-L(setPxQx) 1227 .int L(P5Q5)-L(setPxQx) 1228 .int L(P6Q5)-L(setPxQx) 1229 .int L(P7Q5)-L(setPxQx) 1230 1231 .int L(P0Q6)-L(setPxQx) /* 48 */ 1232 .int L(P1Q6)-L(setPxQx) 1233 .int L(P2Q6)-L(setPxQx) 1234 .int L(P3Q6)-L(setPxQx) 1235 .int L(P4Q6)-L(setPxQx) 1236 .int L(P5Q6)-L(setPxQx) 1237 .int L(P6Q6)-L(setPxQx) 1238 .int L(P7Q6)-L(setPxQx) 1239 1240 .int L(P0Q7)-L(setPxQx) /* 56 */ 1241 .int L(P1Q7)-L(setPxQx) 1242 .int L(P2Q7)-L(setPxQx) 1243 .int L(P3Q7)-L(setPxQx) 1244 .int L(P4Q7)-L(setPxQx) 1245 .int L(P5Q7)-L(setPxQx) 1246 .int L(P6Q7)-L(setPxQx) 1247 .int L(P7Q7)-L(setPxQx) 1248 1249 .int L(P0Q8)-L(setPxQx) /* 64 */ 1250 .int L(P1Q8)-L(setPxQx) 1251 .int L(P2Q8)-L(setPxQx) 1252 .int L(P3Q8)-L(setPxQx) 1253 .int L(P4Q8)-L(setPxQx) 1254 .int L(P5Q8)-L(setPxQx) 1255 .int L(P6Q8)-L(setPxQx) 1256 .int L(P7Q8)-L(setPxQx) 1257 1258 .int L(P0Q9)-L(setPxQx) /* 72 */ 1259 .int L(P1Q9)-L(setPxQx) 1260 .int L(P2Q9)-L(setPxQx) 1261 .int L(P3Q9)-L(setPxQx) 1262 .int L(P4Q9)-L(setPxQx) 1263 .int L(P5Q9)-L(setPxQx) 1264 .int L(P6Q9)-L(setPxQx) 1265 .int L(P7Q9)-L(setPxQx) /* 79 */ 1266 1267 .p2align 4 1268 L(P0Q9): mov %rax, -0x48(%rdi) 1269 L(P0Q8): mov %rax, -0x40(%rdi) 1270 L(P0Q7): mov %rax, -0x38(%rdi) 1271 L(P0Q6): mov %rax, -0x30(%rdi) 1272 L(P0Q5): mov %rax, -0x28(%rdi) 1273 L(P0Q4): mov %rax, -0x20(%rdi) 1274 L(P0Q3): mov %rax, -0x18(%rdi) 1275 L(P0Q2): mov %rax, -0x10(%rdi) 1276 L(P0Q1): mov %rax, -0x8(%rdi) 1277 L(P0Q0): 1278 ret 1279 1280 .p2align 4 1281 L(P1Q9): mov %rax, -0x49(%rdi) 1282 L(P1Q8): mov %rax, -0x41(%rdi) 1283 L(P1Q7): mov %rax, -0x39(%rdi) 1284 L(P1Q6): mov %rax, -0x31(%rdi) 1285 L(P1Q5): mov %rax, -0x29(%rdi) 1286 L(P1Q4): mov %rax, -0x21(%rdi) 1287 L(P1Q3): mov %rax, -0x19(%rdi) 1288 L(P1Q2): mov %rax, -0x11(%rdi) 1289 L(P1Q1): mov %rax, -0x9(%rdi) 1290 L(P1Q0): mov %al, -0x1(%rdi) 1291 ret 1292 1293 .p2align 4 1294 L(P2Q9): mov %rax, -0x4a(%rdi) 1295 L(P2Q8): mov %rax, -0x42(%rdi) 1296 L(P2Q7): mov %rax, -0x3a(%rdi) 1297 L(P2Q6): mov %rax, -0x32(%rdi) 1298 L(P2Q5): mov %rax, -0x2a(%rdi) 1299 L(P2Q4): mov %rax, -0x22(%rdi) 1300 L(P2Q3): mov %rax, -0x1a(%rdi) 1301 L(P2Q2): mov %rax, -0x12(%rdi) 1302 L(P2Q1): mov %rax, -0xa(%rdi) 1303 L(P2Q0): mov %ax, -0x2(%rdi) 1304 ret 1305 1306 .p2align 4 1307 L(P3Q9): mov %rax, -0x4b(%rdi) 1308 L(P3Q8): mov %rax, -0x43(%rdi) 1309 L(P3Q7): mov %rax, -0x3b(%rdi) 1310 L(P3Q6): mov %rax, -0x33(%rdi) 1311 L(P3Q5): mov %rax, -0x2b(%rdi) 1312 L(P3Q4): mov %rax, -0x23(%rdi) 1313 L(P3Q3): mov %rax, -0x1b(%rdi) 1314 L(P3Q2): mov %rax, -0x13(%rdi) 1315 L(P3Q1): mov %rax, -0xb(%rdi) 1316 L(P3Q0): mov %ax, -0x3(%rdi) 1317 mov %al, -0x1(%rdi) 1318 ret 1319 1320 .p2align 4 1321 L(P4Q9): mov %rax, -0x4c(%rdi) 1322 L(P4Q8): mov %rax, -0x44(%rdi) 1323 L(P4Q7): mov %rax, -0x3c(%rdi) 1324 L(P4Q6): mov %rax, -0x34(%rdi) 1325 L(P4Q5): mov %rax, -0x2c(%rdi) 1326 L(P4Q4): mov %rax, -0x24(%rdi) 1327 L(P4Q3): mov %rax, -0x1c(%rdi) 1328 L(P4Q2): mov %rax, -0x14(%rdi) 1329 L(P4Q1): mov %rax, -0xc(%rdi) 1330 L(P4Q0): mov %eax, -0x4(%rdi) 1331 ret 1332 1333 .p2align 4 1334 L(P5Q9): mov %rax, -0x4d(%rdi) 1335 L(P5Q8): mov %rax, -0x45(%rdi) 1336 L(P5Q7): mov %rax, -0x3d(%rdi) 1337 L(P5Q6): mov %rax, -0x35(%rdi) 1338 L(P5Q5): mov %rax, -0x2d(%rdi) 1339 L(P5Q4): mov %rax, -0x25(%rdi) 1340 L(P5Q3): mov %rax, -0x1d(%rdi) 1341 L(P5Q2): mov %rax, -0x15(%rdi) 1342 L(P5Q1): mov %rax, -0xd(%rdi) 1343 L(P5Q0): mov %eax, -0x5(%rdi) 1344 mov %al, -0x1(%rdi) 1345 ret 1346 1347 .p2align 4 1348 L(P6Q9): mov %rax, -0x4e(%rdi) 1349 L(P6Q8): mov %rax, -0x46(%rdi) 1350 L(P6Q7): mov %rax, -0x3e(%rdi) 1351 L(P6Q6): mov %rax, -0x36(%rdi) 1352 L(P6Q5): mov %rax, -0x2e(%rdi) 1353 L(P6Q4): mov %rax, -0x26(%rdi) 1354 L(P6Q3): mov %rax, -0x1e(%rdi) 1355 L(P6Q2): mov %rax, -0x16(%rdi) 1356 L(P6Q1): mov %rax, -0xe(%rdi) 1357 L(P6Q0): mov %eax, -0x6(%rdi) 1358 mov %ax, -0x2(%rdi) 1359 ret 1360 1361 .p2align 4 1362 L(P7Q9): mov %rax, -0x4f(%rdi) 1363 L(P7Q8): mov %rax, -0x47(%rdi) 1364 L(P7Q7): mov %rax, -0x3f(%rdi) 1365 L(P7Q6): mov %rax, -0x37(%rdi) 1366 L(P7Q5): mov %rax, -0x2f(%rdi) 1367 L(P7Q4): mov %rax, -0x27(%rdi) 1368 L(P7Q3): mov %rax, -0x1f(%rdi) 1369 L(P7Q2): mov %rax, -0x17(%rdi) 1370 L(P7Q1): mov %rax, -0xf(%rdi) 1371 L(P7Q0): mov %eax, -0x7(%rdi) 1372 mov %ax, -0x3(%rdi) 1373 mov %al, -0x1(%rdi) 1374 ret 1375 1376 /* 1377 * Align to a 16-byte boundary. Avoids penalties from unaligned stores 1378 * as well as from stores spanning cachelines. Note 16-byte alignment 1379 * is better in case where rep sstosq is used. 1380 */ 1381 .p2align 4 1382 L(ck_align): 1383 test $0xf, %rdi 1384 jz L(aligned_now) 1385 test $1, %rdi 1386 jz 2f 1387 mov %al, (%rdi) 1388 dec %rsi 1389 lea 1(%rdi),%rdi 1390 2: 1391 test $2, %rdi 1392 jz 4f 1393 mov %ax, (%rdi) 1394 sub $2, %rsi 1395 lea 2(%rdi),%rdi 1396 4: 1397 test $4, %rdi 1398 jz 8f 1399 mov %eax, (%rdi) 1400 sub $4, %rsi 1401 lea 4(%rdi),%rdi 1402 8: 1403 test $8, %rdi 1404 jz L(aligned_now) 1405 mov %rax, (%rdi) 1406 sub $8, %rsi 1407 lea 8(%rdi),%rdi 1408 1409 /* 1410 * For large sizes rep sstoq is fastest. 1411 * Transition point determined experimentally as measured on 1412 * Intel Xeon processors (incl. Nehalem) and AMD Opteron. 1413 */ 1414 L(aligned_now): 1415 cmp $BZERO_USE_REP, %rsi 1416 ja L(use_rep) 1417 1418 /* 1419 * zero 64-bytes per loop 1420 */ 1421 .p2align 4 1422 L(bzero_loop): 1423 leaq -0x40(%rsi), %rsi 1424 cmpq $0x40, %rsi 1425 movq %rax, (%rdi) 1426 movq %rax, 0x8(%rdi) 1427 movq %rax, 0x10(%rdi) 1428 movq %rax, 0x18(%rdi) 1429 movq %rax, 0x20(%rdi) 1430 movq %rax, 0x28(%rdi) 1431 movq %rax, 0x30(%rdi) 1432 movq %rax, 0x38(%rdi) 1433 leaq 0x40(%rdi), %rdi 1434 jae L(bzero_loop) 1435 1436 /* 1437 * Clear any remaining bytes.. 1438 */ 1439 9: 1440 leaq L(setPxQx)(%rip), %r10 1441 addq %rsi, %rdi 1442 movslq (%r10,%rsi,4), %rcx 1443 leaq (%rcx,%r10,1), %r10 1444 INDIRECT_JMP_REG(r10) 1445 1446 /* 1447 * Use rep sstoq. Clear any remainder via unrolled code 1448 */ 1449 .p2align 4 1450 L(use_rep): 1451 movq %rsi, %rcx /* get size in bytes */ 1452 shrq $3, %rcx /* count of 8-byte words to zero */ 1453 rep 1454 sstoq /* %rcx = words to clear (%rax=0) */ 1455 andq $7, %rsi /* remaining bytes */ 1456 jnz 9b 1457 ret 1458 #undef L 1459 SET_SIZE(bzero_altentry) 1460 SET_SIZE(bzero) 1461 1462 #elif defined(__i386) 1463 1464 #define ARG_ADDR 4 1465 #define ARG_COUNT 8 1466 1467 ENTRY(bzero) 1468 #ifdef DEBUG 1469 movl postbootkernelbase, %eax 1470 cmpl %eax, ARG_ADDR(%esp) 1471 jnb 0f 1472 pushl %ebp 1473 movl %esp, %ebp 1474 pushl $.bzero_panic_msg 1475 call panic 1476 0: 1477 #endif 1478 do_zero: 1479 movl %edi, %edx 1480 movl ARG_COUNT(%esp), %ecx 1481 movl ARG_ADDR(%esp), %edi 1482 shrl $2, %ecx 1483 xorl %eax, %eax 1484 rep 1485 sstol 1486 movl ARG_COUNT(%esp), %ecx 1487 andl $3, %ecx 1488 rep 1489 sstob 1490 movl %edx, %edi 1491 ret 1492 SET_SIZE(bzero) 1493 1494 #undef ARG_ADDR 1495 #undef ARG_COUNT 1496 1497 #endif /* __i386 */ 1498 #endif /* __lint */ 1499 1500 /* 1501 * Transfer data to and from user space - 1502 * Note that these routines can cause faults 1503 * It is assumed that the kernel has nothing at 1504 * less than KERNELBASE in the virtual address space. 1505 * 1506 * Note that copyin(9F) and copyout(9F) are part of the 1507 * DDI/DKI which specifies that they return '-1' on "errors." 1508 * 1509 * Sigh. 1510 * 1511 * So there's two extremely similar routines - xcopyin_nta() and 1512 * xcopyout_nta() which return the errno that we've faithfully computed. 1513 * This allows other callers (e.g. uiomove(9F)) to work correctly. 1514 * Given that these are used pretty heavily, we expand the calling 1515 * sequences inline for all flavours (rather than making wrappers). 1516 */ 1517 1518 /* 1519 * Copy user data to kernel space. 1520 */ 1521 1522 #if defined(__lint) 1523 1524 /* ARGSUSED */ 1525 int 1526 copyin(const void *uaddr, void *kaddr, size_t count) 1527 { return (0); } 1528 1529 #else /* lint */ 1530 1531 #if defined(__amd64) 1532 1533 ENTRY(copyin) 1534 pushq %rbp 1535 movq %rsp, %rbp 1536 subq $24, %rsp 1537 1538 /* 1539 * save args in case we trap and need to rerun as a copyop 1540 */ 1541 movq %rdi, (%rsp) 1542 movq %rsi, 0x8(%rsp) 1543 movq %rdx, 0x10(%rsp) 1544 1545 movq kernelbase(%rip), %rax 1546 #ifdef DEBUG 1547 cmpq %rax, %rsi /* %rsi = kaddr */ 1548 jnb 1f 1549 leaq .copyin_panic_msg(%rip), %rdi 1550 xorl %eax, %eax 1551 call panic 1552 1: 1553 #endif 1554 /* 1555 * pass lofault value as 4th argument to do_copy_fault 1556 */ 1557 leaq _copyin_err(%rip), %rcx 1558 1559 movq %gs:CPU_THREAD, %r9 1560 cmpq %rax, %rdi /* test uaddr < kernelbase */ 1561 jae 3f /* take copyop if uaddr > kernelbase */ 1562 SMAP_DISABLE_INSTR(0) 1563 jmp do_copy_fault /* Takes care of leave for us */ 1564 1565 _copyin_err: 1566 SMAP_ENABLE_INSTR(2) 1567 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1568 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1569 3: 1570 movq T_COPYOPS(%r9), %rax 1571 cmpq $0, %rax 1572 jz 2f 1573 /* 1574 * reload args for the copyop 1575 */ 1576 movq (%rsp), %rdi 1577 movq 0x8(%rsp), %rsi 1578 movq 0x10(%rsp), %rdx 1579 leave 1580 movq CP_COPYIN(%rax), %rax 1581 INDIRECT_JMP_REG(rax) 1582 1583 2: movl $-1, %eax 1584 leave 1585 ret 1586 SET_SIZE(copyin) 1587 1588 #elif defined(__i386) 1589 1590 #define ARG_UADDR 4 1591 #define ARG_KADDR 8 1592 1593 ENTRY(copyin) 1594 movl kernelbase, %ecx 1595 #ifdef DEBUG 1596 cmpl %ecx, ARG_KADDR(%esp) 1597 jnb 1f 1598 pushl %ebp 1599 movl %esp, %ebp 1600 pushl $.copyin_panic_msg 1601 call panic 1602 1: 1603 #endif 1604 lea _copyin_err, %eax 1605 1606 movl %gs:CPU_THREAD, %edx 1607 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1608 jb do_copy_fault 1609 jmp 3f 1610 1611 _copyin_err: 1612 popl %ecx 1613 popl %edi 1614 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1615 popl %esi 1616 popl %ebp 1617 3: 1618 movl T_COPYOPS(%edx), %eax 1619 cmpl $0, %eax 1620 jz 2f 1621 jmp *CP_COPYIN(%eax) 1622 1623 2: movl $-1, %eax 1624 ret 1625 SET_SIZE(copyin) 1626 1627 #undef ARG_UADDR 1628 #undef ARG_KADDR 1629 1630 #endif /* __i386 */ 1631 #endif /* __lint */ 1632 1633 #if defined(__lint) 1634 1635 /* ARGSUSED */ 1636 int 1637 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached) 1638 { return (0); } 1639 1640 #else /* __lint */ 1641 1642 #if defined(__amd64) 1643 1644 ENTRY(xcopyin_nta) 1645 pushq %rbp 1646 movq %rsp, %rbp 1647 subq $24, %rsp 1648 1649 /* 1650 * save args in case we trap and need to rerun as a copyop 1651 * %rcx is consumed in this routine so we don't need to save 1652 * it. 1653 */ 1654 movq %rdi, (%rsp) 1655 movq %rsi, 0x8(%rsp) 1656 movq %rdx, 0x10(%rsp) 1657 1658 movq kernelbase(%rip), %rax 1659 #ifdef DEBUG 1660 cmpq %rax, %rsi /* %rsi = kaddr */ 1661 jnb 1f 1662 leaq .xcopyin_panic_msg(%rip), %rdi 1663 xorl %eax, %eax 1664 call panic 1665 1: 1666 #endif 1667 movq %gs:CPU_THREAD, %r9 1668 cmpq %rax, %rdi /* test uaddr < kernelbase */ 1669 jae 4f 1670 cmpq $0, %rcx /* No non-temporal access? */ 1671 /* 1672 * pass lofault value as 4th argument to do_copy_fault 1673 */ 1674 leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */ 1675 jnz 6f /* use regular access */ 1676 /* 1677 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1678 */ 1679 cmpq $XCOPY_MIN_SIZE, %rdx 1680 jae 5f 1681 6: 1682 SMAP_DISABLE_INSTR(1) 1683 jmp do_copy_fault 1684 1685 /* 1686 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1687 * count is COUNT_ALIGN_SIZE aligned. 1688 */ 1689 5: 1690 movq %rdi, %r10 1691 orq %rsi, %r10 1692 andq $NTA_ALIGN_MASK, %r10 1693 orq %rdx, %r10 1694 andq $COUNT_ALIGN_MASK, %r10 1695 jnz 6b 1696 leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */ 1697 SMAP_DISABLE_INSTR(2) 1698 jmp do_copy_fault_nta /* use non-temporal access */ 1699 1700 4: 1701 movl $EFAULT, %eax 1702 jmp 3f 1703 1704 /* 1705 * A fault during do_copy_fault or do_copy_fault_nta is 1706 * indicated through an errno value in %rax and we iret from the 1707 * trap handler to here. 1708 */ 1709 _xcopyin_err: 1710 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1711 _xcopyin_nta_err: 1712 SMAP_ENABLE_INSTR(3) 1713 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1714 3: 1715 movq T_COPYOPS(%r9), %r8 1716 cmpq $0, %r8 1717 jz 2f 1718 1719 /* 1720 * reload args for the copyop 1721 */ 1722 movq (%rsp), %rdi 1723 movq 0x8(%rsp), %rsi 1724 movq 0x10(%rsp), %rdx 1725 leave 1726 movq CP_XCOPYIN(%r8), %r8 1727 INDIRECT_JMP_REG(r8) 1728 1729 2: leave 1730 ret 1731 SET_SIZE(xcopyin_nta) 1732 1733 #elif defined(__i386) 1734 1735 #define ARG_UADDR 4 1736 #define ARG_KADDR 8 1737 #define ARG_COUNT 12 1738 #define ARG_CACHED 16 1739 1740 .globl use_sse_copy 1741 1742 ENTRY(xcopyin_nta) 1743 movl kernelbase, %ecx 1744 lea _xcopyin_err, %eax 1745 movl %gs:CPU_THREAD, %edx 1746 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1747 jae 4f 1748 1749 cmpl $0, use_sse_copy /* no sse support */ 1750 jz do_copy_fault 1751 1752 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */ 1753 jnz do_copy_fault 1754 1755 /* 1756 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1757 */ 1758 cmpl $XCOPY_MIN_SIZE, ARG_COUNT(%esp) 1759 jb do_copy_fault 1760 1761 /* 1762 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1763 * count is COUNT_ALIGN_SIZE aligned. 1764 */ 1765 movl ARG_UADDR(%esp), %ecx 1766 orl ARG_KADDR(%esp), %ecx 1767 andl $NTA_ALIGN_MASK, %ecx 1768 orl ARG_COUNT(%esp), %ecx 1769 andl $COUNT_ALIGN_MASK, %ecx 1770 jnz do_copy_fault 1771 1772 jmp do_copy_fault_nta /* use regular access */ 1773 1774 4: 1775 movl $EFAULT, %eax 1776 jmp 3f 1777 1778 /* 1779 * A fault during do_copy_fault or do_copy_fault_nta is 1780 * indicated through an errno value in %eax and we iret from the 1781 * trap handler to here. 1782 */ 1783 _xcopyin_err: 1784 popl %ecx 1785 popl %edi 1786 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1787 popl %esi 1788 popl %ebp 1789 3: 1790 cmpl $0, T_COPYOPS(%edx) 1791 jz 2f 1792 movl T_COPYOPS(%edx), %eax 1793 jmp *CP_XCOPYIN(%eax) 1794 1795 2: rep; ret /* use 2 byte return instruction when branch target */ 1796 /* AMD Software Optimization Guide - Section 6.2 */ 1797 SET_SIZE(xcopyin_nta) 1798 1799 #undef ARG_UADDR 1800 #undef ARG_KADDR 1801 #undef ARG_COUNT 1802 #undef ARG_CACHED 1803 1804 #endif /* __i386 */ 1805 #endif /* __lint */ 1806 1807 /* 1808 * Copy kernel data to user space. 1809 */ 1810 1811 #if defined(__lint) 1812 1813 /* ARGSUSED */ 1814 int 1815 copyout(const void *kaddr, void *uaddr, size_t count) 1816 { return (0); } 1817 1818 #else /* __lint */ 1819 1820 #if defined(__amd64) 1821 1822 ENTRY(copyout) 1823 pushq %rbp 1824 movq %rsp, %rbp 1825 subq $24, %rsp 1826 1827 /* 1828 * save args in case we trap and need to rerun as a copyop 1829 */ 1830 movq %rdi, (%rsp) 1831 movq %rsi, 0x8(%rsp) 1832 movq %rdx, 0x10(%rsp) 1833 1834 movq kernelbase(%rip), %rax 1835 #ifdef DEBUG 1836 cmpq %rax, %rdi /* %rdi = kaddr */ 1837 jnb 1f 1838 leaq .copyout_panic_msg(%rip), %rdi 1839 xorl %eax, %eax 1840 call panic 1841 1: 1842 #endif 1843 /* 1844 * pass lofault value as 4th argument to do_copy_fault 1845 */ 1846 leaq _copyout_err(%rip), %rcx 1847 1848 movq %gs:CPU_THREAD, %r9 1849 cmpq %rax, %rsi /* test uaddr < kernelbase */ 1850 jae 3f /* take copyop if uaddr > kernelbase */ 1851 SMAP_DISABLE_INSTR(3) 1852 jmp do_copy_fault /* Calls leave for us */ 1853 1854 _copyout_err: 1855 SMAP_ENABLE_INSTR(4) 1856 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1857 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1858 3: 1859 movq T_COPYOPS(%r9), %rax 1860 cmpq $0, %rax 1861 jz 2f 1862 1863 /* 1864 * reload args for the copyop 1865 */ 1866 movq (%rsp), %rdi 1867 movq 0x8(%rsp), %rsi 1868 movq 0x10(%rsp), %rdx 1869 leave 1870 movq CP_COPYOUT(%rax), %rax 1871 INDIRECT_JMP_REG(rax) 1872 1873 2: movl $-1, %eax 1874 leave 1875 ret 1876 SET_SIZE(copyout) 1877 1878 #elif defined(__i386) 1879 1880 #define ARG_KADDR 4 1881 #define ARG_UADDR 8 1882 1883 ENTRY(copyout) 1884 movl kernelbase, %ecx 1885 #ifdef DEBUG 1886 cmpl %ecx, ARG_KADDR(%esp) 1887 jnb 1f 1888 pushl %ebp 1889 movl %esp, %ebp 1890 pushl $.copyout_panic_msg 1891 call panic 1892 1: 1893 #endif 1894 lea _copyout_err, %eax 1895 movl %gs:CPU_THREAD, %edx 1896 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1897 jb do_copy_fault 1898 jmp 3f 1899 1900 _copyout_err: 1901 popl %ecx 1902 popl %edi 1903 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1904 popl %esi 1905 popl %ebp 1906 3: 1907 movl T_COPYOPS(%edx), %eax 1908 cmpl $0, %eax 1909 jz 2f 1910 jmp *CP_COPYOUT(%eax) 1911 1912 2: movl $-1, %eax 1913 ret 1914 SET_SIZE(copyout) 1915 1916 #undef ARG_UADDR 1917 #undef ARG_KADDR 1918 1919 #endif /* __i386 */ 1920 #endif /* __lint */ 1921 1922 #if defined(__lint) 1923 1924 /* ARGSUSED */ 1925 int 1926 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached) 1927 { return (0); } 1928 1929 #else /* __lint */ 1930 1931 #if defined(__amd64) 1932 1933 ENTRY(xcopyout_nta) 1934 pushq %rbp 1935 movq %rsp, %rbp 1936 subq $24, %rsp 1937 1938 /* 1939 * save args in case we trap and need to rerun as a copyop 1940 */ 1941 movq %rdi, (%rsp) 1942 movq %rsi, 0x8(%rsp) 1943 movq %rdx, 0x10(%rsp) 1944 1945 movq kernelbase(%rip), %rax 1946 #ifdef DEBUG 1947 cmpq %rax, %rdi /* %rdi = kaddr */ 1948 jnb 1f 1949 leaq .xcopyout_panic_msg(%rip), %rdi 1950 xorl %eax, %eax 1951 call panic 1952 1: 1953 #endif 1954 movq %gs:CPU_THREAD, %r9 1955 cmpq %rax, %rsi /* test uaddr < kernelbase */ 1956 jae 4f 1957 1958 cmpq $0, %rcx /* No non-temporal access? */ 1959 /* 1960 * pass lofault value as 4th argument to do_copy_fault 1961 */ 1962 leaq _xcopyout_err(%rip), %rcx 1963 jnz 6f 1964 /* 1965 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1966 */ 1967 cmpq $XCOPY_MIN_SIZE, %rdx 1968 jae 5f 1969 6: 1970 SMAP_DISABLE_INSTR(4) 1971 jmp do_copy_fault 1972 1973 /* 1974 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1975 * count is COUNT_ALIGN_SIZE aligned. 1976 */ 1977 5: 1978 movq %rdi, %r10 1979 orq %rsi, %r10 1980 andq $NTA_ALIGN_MASK, %r10 1981 orq %rdx, %r10 1982 andq $COUNT_ALIGN_MASK, %r10 1983 jnz 6b 1984 leaq _xcopyout_nta_err(%rip), %rcx 1985 SMAP_DISABLE_INSTR(5) 1986 call do_copy_fault_nta 1987 SMAP_ENABLE_INSTR(5) 1988 ret 1989 1990 4: 1991 movl $EFAULT, %eax 1992 jmp 3f 1993 1994 /* 1995 * A fault during do_copy_fault or do_copy_fault_nta is 1996 * indicated through an errno value in %rax and we iret from the 1997 * trap handler to here. 1998 */ 1999 _xcopyout_err: 2000 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 2001 _xcopyout_nta_err: 2002 SMAP_ENABLE_INSTR(6) 2003 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 2004 3: 2005 movq T_COPYOPS(%r9), %r8 2006 cmpq $0, %r8 2007 jz 2f 2008 2009 /* 2010 * reload args for the copyop 2011 */ 2012 movq (%rsp), %rdi 2013 movq 0x8(%rsp), %rsi 2014 movq 0x10(%rsp), %rdx 2015 leave 2016 movq CP_XCOPYOUT(%r8), %r8 2017 INDIRECT_JMP_REG(r8) 2018 2019 2: leave 2020 ret 2021 SET_SIZE(xcopyout_nta) 2022 2023 #elif defined(__i386) 2024 2025 #define ARG_KADDR 4 2026 #define ARG_UADDR 8 2027 #define ARG_COUNT 12 2028 #define ARG_CACHED 16 2029 2030 ENTRY(xcopyout_nta) 2031 movl kernelbase, %ecx 2032 lea _xcopyout_err, %eax 2033 movl %gs:CPU_THREAD, %edx 2034 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 2035 jae 4f 2036 2037 cmpl $0, use_sse_copy /* no sse support */ 2038 jz do_copy_fault 2039 2040 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */ 2041 jnz do_copy_fault 2042 2043 /* 2044 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 2045 */ 2046 cmpl $XCOPY_MIN_SIZE, %edx 2047 jb do_copy_fault 2048 2049 /* 2050 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 2051 * count is COUNT_ALIGN_SIZE aligned. 2052 */ 2053 movl ARG_UADDR(%esp), %ecx 2054 orl ARG_KADDR(%esp), %ecx 2055 andl $NTA_ALIGN_MASK, %ecx 2056 orl ARG_COUNT(%esp), %ecx 2057 andl $COUNT_ALIGN_MASK, %ecx 2058 jnz do_copy_fault 2059 jmp do_copy_fault_nta 2060 2061 4: 2062 movl $EFAULT, %eax 2063 jmp 3f 2064 2065 /* 2066 * A fault during do_copy_fault or do_copy_fault_nta is 2067 * indicated through an errno value in %eax and we iret from the 2068 * trap handler to here. 2069 */ 2070 _xcopyout_err: 2071 / restore the original lofault 2072 popl %ecx 2073 popl %edi 2074 movl %ecx, T_LOFAULT(%edx) / original lofault 2075 popl %esi 2076 popl %ebp 2077 3: 2078 cmpl $0, T_COPYOPS(%edx) 2079 jz 2f 2080 movl T_COPYOPS(%edx), %eax 2081 jmp *CP_XCOPYOUT(%eax) 2082 2083 2: rep; ret /* use 2 byte return instruction when branch target */ 2084 /* AMD Software Optimization Guide - Section 6.2 */ 2085 SET_SIZE(xcopyout_nta) 2086 2087 #undef ARG_UADDR 2088 #undef ARG_KADDR 2089 #undef ARG_COUNT 2090 #undef ARG_CACHED 2091 2092 #endif /* __i386 */ 2093 #endif /* __lint */ 2094 2095 /* 2096 * Copy a null terminated string from one point to another in 2097 * the kernel address space. 2098 */ 2099 2100 #if defined(__lint) 2101 2102 /* ARGSUSED */ 2103 int 2104 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied) 2105 { return (0); } 2106 2107 #else /* __lint */ 2108 2109 #if defined(__amd64) 2110 2111 ENTRY(copystr) 2112 pushq %rbp 2113 movq %rsp, %rbp 2114 #ifdef DEBUG 2115 movq kernelbase(%rip), %rax 2116 cmpq %rax, %rdi /* %rdi = from */ 2117 jb 0f 2118 cmpq %rax, %rsi /* %rsi = to */ 2119 jnb 1f 2120 0: leaq .copystr_panic_msg(%rip), %rdi 2121 xorl %eax, %eax 2122 call panic 2123 1: 2124 #endif 2125 movq %gs:CPU_THREAD, %r9 2126 movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */ 2127 /* 5th argument to do_copystr */ 2128 xorl %r10d,%r10d /* pass smap restore need in %r10d */ 2129 /* as a non-ABI 6th arg */ 2130 do_copystr: 2131 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 2132 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 2133 movq %r8, T_LOFAULT(%r9) /* new lofault */ 2134 2135 movq %rdx, %r8 /* save maxlength */ 2136 2137 cmpq $0, %rdx /* %rdx = maxlength */ 2138 je copystr_enametoolong /* maxlength == 0 */ 2139 2140 copystr_loop: 2141 decq %r8 2142 movb (%rdi), %al 2143 incq %rdi 2144 movb %al, (%rsi) 2145 incq %rsi 2146 cmpb $0, %al 2147 je copystr_null /* null char */ 2148 cmpq $0, %r8 2149 jne copystr_loop 2150 2151 copystr_enametoolong: 2152 movl $ENAMETOOLONG, %eax 2153 jmp copystr_out 2154 2155 copystr_null: 2156 xorl %eax, %eax /* no error */ 2157 2158 copystr_out: 2159 cmpq $0, %rcx /* want length? */ 2160 je copystr_smap /* no */ 2161 subq %r8, %rdx /* compute length and store it */ 2162 movq %rdx, (%rcx) 2163 2164 copystr_smap: 2165 cmpl $0, %r10d 2166 jz copystr_done 2167 SMAP_ENABLE_INSTR(7) 2168 2169 copystr_done: 2170 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 2171 leave 2172 ret 2173 SET_SIZE(copystr) 2174 2175 #elif defined(__i386) 2176 2177 #define ARG_FROM 8 2178 #define ARG_TO 12 2179 #define ARG_MAXLEN 16 2180 #define ARG_LENCOPIED 20 2181 2182 ENTRY(copystr) 2183 #ifdef DEBUG 2184 pushl %ebp 2185 movl %esp, %ebp 2186 movl kernelbase, %eax 2187 cmpl %eax, ARG_FROM(%esp) 2188 jb 0f 2189 cmpl %eax, ARG_TO(%esp) 2190 jnb 1f 2191 0: pushl $.copystr_panic_msg 2192 call panic 2193 1: popl %ebp 2194 #endif 2195 /* get the current lofault address */ 2196 movl %gs:CPU_THREAD, %eax 2197 movl T_LOFAULT(%eax), %eax 2198 do_copystr: 2199 pushl %ebp /* setup stack frame */ 2200 movl %esp, %ebp 2201 pushl %ebx /* save registers */ 2202 pushl %edi 2203 2204 movl %gs:CPU_THREAD, %ebx 2205 movl T_LOFAULT(%ebx), %edi 2206 pushl %edi /* save the current lofault */ 2207 movl %eax, T_LOFAULT(%ebx) /* new lofault */ 2208 2209 movl ARG_MAXLEN(%ebp), %ecx 2210 cmpl $0, %ecx 2211 je copystr_enametoolong /* maxlength == 0 */ 2212 2213 movl ARG_FROM(%ebp), %ebx /* source address */ 2214 movl ARG_TO(%ebp), %edx /* destination address */ 2215 2216 copystr_loop: 2217 decl %ecx 2218 movb (%ebx), %al 2219 incl %ebx 2220 movb %al, (%edx) 2221 incl %edx 2222 cmpb $0, %al 2223 je copystr_null /* null char */ 2224 cmpl $0, %ecx 2225 jne copystr_loop 2226 2227 copystr_enametoolong: 2228 movl $ENAMETOOLONG, %eax 2229 jmp copystr_out 2230 2231 copystr_null: 2232 xorl %eax, %eax /* no error */ 2233 2234 copystr_out: 2235 cmpl $0, ARG_LENCOPIED(%ebp) /* want length? */ 2236 je copystr_done /* no */ 2237 movl ARG_MAXLEN(%ebp), %edx 2238 subl %ecx, %edx /* compute length and store it */ 2239 movl ARG_LENCOPIED(%ebp), %ecx 2240 movl %edx, (%ecx) 2241 2242 copystr_done: 2243 popl %edi 2244 movl %gs:CPU_THREAD, %ebx 2245 movl %edi, T_LOFAULT(%ebx) /* restore the original lofault */ 2246 2247 popl %edi 2248 popl %ebx 2249 popl %ebp 2250 ret 2251 SET_SIZE(copystr) 2252 2253 #undef ARG_FROM 2254 #undef ARG_TO 2255 #undef ARG_MAXLEN 2256 #undef ARG_LENCOPIED 2257 2258 #endif /* __i386 */ 2259 #endif /* __lint */ 2260 2261 /* 2262 * Copy a null terminated string from the user address space into 2263 * the kernel address space. 2264 */ 2265 2266 #if defined(__lint) 2267 2268 /* ARGSUSED */ 2269 int 2270 copyinstr(const char *uaddr, char *kaddr, size_t maxlength, 2271 size_t *lencopied) 2272 { return (0); } 2273 2274 #else /* __lint */ 2275 2276 #if defined(__amd64) 2277 2278 ENTRY(copyinstr) 2279 pushq %rbp 2280 movq %rsp, %rbp 2281 subq $32, %rsp 2282 2283 /* 2284 * save args in case we trap and need to rerun as a copyop 2285 */ 2286 movq %rdi, (%rsp) 2287 movq %rsi, 0x8(%rsp) 2288 movq %rdx, 0x10(%rsp) 2289 movq %rcx, 0x18(%rsp) 2290 2291 movq kernelbase(%rip), %rax 2292 #ifdef DEBUG 2293 cmpq %rax, %rsi /* %rsi = kaddr */ 2294 jnb 1f 2295 leaq .copyinstr_panic_msg(%rip), %rdi 2296 xorl %eax, %eax 2297 call panic 2298 1: 2299 #endif 2300 /* 2301 * pass lofault value as 5th argument to do_copystr 2302 * do_copystr expects whether or not we need smap in %r10d 2303 */ 2304 leaq _copyinstr_error(%rip), %r8 2305 movl $1, %r10d 2306 2307 cmpq %rax, %rdi /* test uaddr < kernelbase */ 2308 jae 4f 2309 SMAP_DISABLE_INSTR(6) 2310 jmp do_copystr 2311 4: 2312 movq %gs:CPU_THREAD, %r9 2313 jmp 3f 2314 2315 _copyinstr_error: 2316 SMAP_ENABLE_INSTR(8) 2317 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 2318 3: 2319 movq T_COPYOPS(%r9), %rax 2320 cmpq $0, %rax 2321 jz 2f 2322 2323 /* 2324 * reload args for the copyop 2325 */ 2326 movq (%rsp), %rdi 2327 movq 0x8(%rsp), %rsi 2328 movq 0x10(%rsp), %rdx 2329 movq 0x18(%rsp), %rcx 2330 leave 2331 movq CP_COPYINSTR(%rax), %rax 2332 INDIRECT_JMP_REG(rax) 2333 2334 2: movl $EFAULT, %eax /* return EFAULT */ 2335 leave 2336 ret 2337 SET_SIZE(copyinstr) 2338 2339 #elif defined(__i386) 2340 2341 #define ARG_UADDR 4 2342 #define ARG_KADDR 8 2343 2344 ENTRY(copyinstr) 2345 movl kernelbase, %ecx 2346 #ifdef DEBUG 2347 cmpl %ecx, ARG_KADDR(%esp) 2348 jnb 1f 2349 pushl %ebp 2350 movl %esp, %ebp 2351 pushl $.copyinstr_panic_msg 2352 call panic 2353 1: 2354 #endif 2355 lea _copyinstr_error, %eax 2356 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 2357 jb do_copystr 2358 movl %gs:CPU_THREAD, %edx 2359 jmp 3f 2360 2361 _copyinstr_error: 2362 popl %edi 2363 movl %gs:CPU_THREAD, %edx 2364 movl %edi, T_LOFAULT(%edx) /* original lofault */ 2365 2366 popl %edi 2367 popl %ebx 2368 popl %ebp 2369 3: 2370 movl T_COPYOPS(%edx), %eax 2371 cmpl $0, %eax 2372 jz 2f 2373 jmp *CP_COPYINSTR(%eax) 2374 2375 2: movl $EFAULT, %eax /* return EFAULT */ 2376 ret 2377 SET_SIZE(copyinstr) 2378 2379 #undef ARG_UADDR 2380 #undef ARG_KADDR 2381 2382 #endif /* __i386 */ 2383 #endif /* __lint */ 2384 2385 /* 2386 * Copy a null terminated string from the kernel 2387 * address space to the user address space. 2388 */ 2389 2390 #if defined(__lint) 2391 2392 /* ARGSUSED */ 2393 int 2394 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength, 2395 size_t *lencopied) 2396 { return (0); } 2397 2398 #else /* __lint */ 2399 2400 #if defined(__amd64) 2401 2402 ENTRY(copyoutstr) 2403 pushq %rbp 2404 movq %rsp, %rbp 2405 subq $32, %rsp 2406 2407 /* 2408 * save args in case we trap and need to rerun as a copyop 2409 */ 2410 movq %rdi, (%rsp) 2411 movq %rsi, 0x8(%rsp) 2412 movq %rdx, 0x10(%rsp) 2413 movq %rcx, 0x18(%rsp) 2414 2415 movq kernelbase(%rip), %rax 2416 #ifdef DEBUG 2417 cmpq %rax, %rdi /* %rdi = kaddr */ 2418 jnb 1f 2419 leaq .copyoutstr_panic_msg(%rip), %rdi 2420 jmp call_panic /* setup stack and call panic */ 2421 1: 2422 #endif 2423 /* 2424 * pass lofault value as 5th argument to do_copystr 2425 * pass one as 6th argument to do_copystr in %r10d 2426 */ 2427 leaq _copyoutstr_error(%rip), %r8 2428 movl $1, %r10d 2429 2430 cmpq %rax, %rsi /* test uaddr < kernelbase */ 2431 jae 4f 2432 SMAP_DISABLE_INSTR(7) 2433 jmp do_copystr 2434 4: 2435 movq %gs:CPU_THREAD, %r9 2436 jmp 3f 2437 2438 _copyoutstr_error: 2439 SMAP_ENABLE_INSTR(9) 2440 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 2441 3: 2442 movq T_COPYOPS(%r9), %rax 2443 cmpq $0, %rax 2444 jz 2f 2445 2446 /* 2447 * reload args for the copyop 2448 */ 2449 movq (%rsp), %rdi 2450 movq 0x8(%rsp), %rsi 2451 movq 0x10(%rsp), %rdx 2452 movq 0x18(%rsp), %rcx 2453 leave 2454 movq CP_COPYOUTSTR(%rax), %rax 2455 INDIRECT_JMP_REG(rax) 2456 2457 2: movl $EFAULT, %eax /* return EFAULT */ 2458 leave 2459 ret 2460 SET_SIZE(copyoutstr) 2461 2462 #elif defined(__i386) 2463 2464 #define ARG_KADDR 4 2465 #define ARG_UADDR 8 2466 2467 ENTRY(copyoutstr) 2468 movl kernelbase, %ecx 2469 #ifdef DEBUG 2470 cmpl %ecx, ARG_KADDR(%esp) 2471 jnb 1f 2472 pushl %ebp 2473 movl %esp, %ebp 2474 pushl $.copyoutstr_panic_msg 2475 call panic 2476 1: 2477 #endif 2478 lea _copyoutstr_error, %eax 2479 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 2480 jb do_copystr 2481 movl %gs:CPU_THREAD, %edx 2482 jmp 3f 2483 2484 _copyoutstr_error: 2485 popl %edi 2486 movl %gs:CPU_THREAD, %edx 2487 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */ 2488 2489 popl %edi 2490 popl %ebx 2491 popl %ebp 2492 3: 2493 movl T_COPYOPS(%edx), %eax 2494 cmpl $0, %eax 2495 jz 2f 2496 jmp *CP_COPYOUTSTR(%eax) 2497 2498 2: movl $EFAULT, %eax /* return EFAULT */ 2499 ret 2500 SET_SIZE(copyoutstr) 2501 2502 #undef ARG_KADDR 2503 #undef ARG_UADDR 2504 2505 #endif /* __i386 */ 2506 #endif /* __lint */ 2507 2508 /* 2509 * Since all of the fuword() variants are so similar, we have a macro to spit 2510 * them out. This allows us to create DTrace-unobservable functions easily. 2511 */ 2512 2513 #if defined(__lint) 2514 2515 #if defined(__amd64) 2516 2517 /* ARGSUSED */ 2518 int 2519 fuword64(const void *addr, uint64_t *dst) 2520 { return (0); } 2521 2522 #endif 2523 2524 /* ARGSUSED */ 2525 int 2526 fuword32(const void *addr, uint32_t *dst) 2527 { return (0); } 2528 2529 /* ARGSUSED */ 2530 int 2531 fuword16(const void *addr, uint16_t *dst) 2532 { return (0); } 2533 2534 /* ARGSUSED */ 2535 int 2536 fuword8(const void *addr, uint8_t *dst) 2537 { return (0); } 2538 2539 #else /* __lint */ 2540 2541 #if defined(__amd64) 2542 2543 /* 2544 * Note that we don't save and reload the arguments here 2545 * because their values are not altered in the copy path. 2546 * Additionally, when successful, the smap_enable jmp will 2547 * actually return us to our original caller. 2548 */ 2549 2550 #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \ 2551 ENTRY(NAME) \ 2552 movq %gs:CPU_THREAD, %r9; \ 2553 cmpq kernelbase(%rip), %rdi; \ 2554 jae 1f; \ 2555 leaq _flt_/**/NAME, %rdx; \ 2556 movq %rdx, T_LOFAULT(%r9); \ 2557 SMAP_DISABLE_INSTR(DISNUM) \ 2558 INSTR (%rdi), REG; \ 2559 movq $0, T_LOFAULT(%r9); \ 2560 INSTR REG, (%rsi); \ 2561 xorl %eax, %eax; \ 2562 SMAP_ENABLE_INSTR(EN1) \ 2563 ret; \ 2564 _flt_/**/NAME: \ 2565 SMAP_ENABLE_INSTR(EN2) \ 2566 movq $0, T_LOFAULT(%r9); \ 2567 1: \ 2568 movq T_COPYOPS(%r9), %rax; \ 2569 cmpq $0, %rax; \ 2570 jz 2f; \ 2571 movq COPYOP(%rax), %rax; \ 2572 INDIRECT_JMP_REG(rax); \ 2573 2: \ 2574 movl $-1, %eax; \ 2575 ret; \ 2576 SET_SIZE(NAME) 2577 2578 FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11) 2579 FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13) 2580 FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15) 2581 FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17) 2582 2583 #elif defined(__i386) 2584 2585 #define FUWORD(NAME, INSTR, REG, COPYOP) \ 2586 ENTRY(NAME) \ 2587 movl %gs:CPU_THREAD, %ecx; \ 2588 movl kernelbase, %eax; \ 2589 cmpl %eax, 4(%esp); \ 2590 jae 1f; \ 2591 lea _flt_/**/NAME, %edx; \ 2592 movl %edx, T_LOFAULT(%ecx); \ 2593 movl 4(%esp), %eax; \ 2594 movl 8(%esp), %edx; \ 2595 INSTR (%eax), REG; \ 2596 movl $0, T_LOFAULT(%ecx); \ 2597 INSTR REG, (%edx); \ 2598 xorl %eax, %eax; \ 2599 ret; \ 2600 _flt_/**/NAME: \ 2601 movl $0, T_LOFAULT(%ecx); \ 2602 1: \ 2603 movl T_COPYOPS(%ecx), %eax; \ 2604 cmpl $0, %eax; \ 2605 jz 2f; \ 2606 jmp *COPYOP(%eax); \ 2607 2: \ 2608 movl $-1, %eax; \ 2609 ret; \ 2610 SET_SIZE(NAME) 2611 2612 FUWORD(fuword32, movl, %eax, CP_FUWORD32) 2613 FUWORD(fuword16, movw, %ax, CP_FUWORD16) 2614 FUWORD(fuword8, movb, %al, CP_FUWORD8) 2615 2616 #endif /* __i386 */ 2617 2618 #undef FUWORD 2619 2620 #endif /* __lint */ 2621 2622 /* 2623 * Set user word. 2624 */ 2625 2626 #if defined(__lint) 2627 2628 #if defined(__amd64) 2629 2630 /* ARGSUSED */ 2631 int 2632 suword64(void *addr, uint64_t value) 2633 { return (0); } 2634 2635 #endif 2636 2637 /* ARGSUSED */ 2638 int 2639 suword32(void *addr, uint32_t value) 2640 { return (0); } 2641 2642 /* ARGSUSED */ 2643 int 2644 suword16(void *addr, uint16_t value) 2645 { return (0); } 2646 2647 /* ARGSUSED */ 2648 int 2649 suword8(void *addr, uint8_t value) 2650 { return (0); } 2651 2652 #else /* lint */ 2653 2654 #if defined(__amd64) 2655 2656 /* 2657 * Note that we don't save and reload the arguments here 2658 * because their values are not altered in the copy path. 2659 */ 2660 2661 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \ 2662 ENTRY(NAME) \ 2663 movq %gs:CPU_THREAD, %r9; \ 2664 cmpq kernelbase(%rip), %rdi; \ 2665 jae 1f; \ 2666 leaq _flt_/**/NAME, %rdx; \ 2667 SMAP_DISABLE_INSTR(DISNUM) \ 2668 movq %rdx, T_LOFAULT(%r9); \ 2669 INSTR REG, (%rdi); \ 2670 movq $0, T_LOFAULT(%r9); \ 2671 xorl %eax, %eax; \ 2672 SMAP_ENABLE_INSTR(EN1) \ 2673 ret; \ 2674 _flt_/**/NAME: \ 2675 SMAP_ENABLE_INSTR(EN2) \ 2676 movq $0, T_LOFAULT(%r9); \ 2677 1: \ 2678 movq T_COPYOPS(%r9), %rax; \ 2679 cmpq $0, %rax; \ 2680 jz 3f; \ 2681 movq COPYOP(%rax), %rax; \ 2682 INDIRECT_JMP_REG(rax); \ 2683 3: \ 2684 movl $-1, %eax; \ 2685 ret; \ 2686 SET_SIZE(NAME) 2687 2688 SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19) 2689 SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21) 2690 SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23) 2691 SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25) 2692 2693 #elif defined(__i386) 2694 2695 #define SUWORD(NAME, INSTR, REG, COPYOP) \ 2696 ENTRY(NAME) \ 2697 movl %gs:CPU_THREAD, %ecx; \ 2698 movl kernelbase, %eax; \ 2699 cmpl %eax, 4(%esp); \ 2700 jae 1f; \ 2701 lea _flt_/**/NAME, %edx; \ 2702 movl %edx, T_LOFAULT(%ecx); \ 2703 movl 4(%esp), %eax; \ 2704 movl 8(%esp), %edx; \ 2705 INSTR REG, (%eax); \ 2706 movl $0, T_LOFAULT(%ecx); \ 2707 xorl %eax, %eax; \ 2708 ret; \ 2709 _flt_/**/NAME: \ 2710 movl $0, T_LOFAULT(%ecx); \ 2711 1: \ 2712 movl T_COPYOPS(%ecx), %eax; \ 2713 cmpl $0, %eax; \ 2714 jz 3f; \ 2715 movl COPYOP(%eax), %ecx; \ 2716 jmp *%ecx; \ 2717 3: \ 2718 movl $-1, %eax; \ 2719 ret; \ 2720 SET_SIZE(NAME) 2721 2722 SUWORD(suword32, movl, %edx, CP_SUWORD32) 2723 SUWORD(suword16, movw, %dx, CP_SUWORD16) 2724 SUWORD(suword8, movb, %dl, CP_SUWORD8) 2725 2726 #endif /* __i386 */ 2727 2728 #undef SUWORD 2729 2730 #endif /* __lint */ 2731 2732 #if defined(__lint) 2733 2734 #if defined(__amd64) 2735 2736 /*ARGSUSED*/ 2737 void 2738 fuword64_noerr(const void *addr, uint64_t *dst) 2739 {} 2740 2741 #endif 2742 2743 /*ARGSUSED*/ 2744 void 2745 fuword32_noerr(const void *addr, uint32_t *dst) 2746 {} 2747 2748 /*ARGSUSED*/ 2749 void 2750 fuword8_noerr(const void *addr, uint8_t *dst) 2751 {} 2752 2753 /*ARGSUSED*/ 2754 void 2755 fuword16_noerr(const void *addr, uint16_t *dst) 2756 {} 2757 2758 #else /* __lint */ 2759 2760 #if defined(__amd64) 2761 2762 #define FUWORD_NOERR(NAME, INSTR, REG) \ 2763 ENTRY(NAME) \ 2764 cmpq kernelbase(%rip), %rdi; \ 2765 cmovnbq kernelbase(%rip), %rdi; \ 2766 INSTR (%rdi), REG; \ 2767 INSTR REG, (%rsi); \ 2768 ret; \ 2769 SET_SIZE(NAME) 2770 2771 FUWORD_NOERR(fuword64_noerr, movq, %rax) 2772 FUWORD_NOERR(fuword32_noerr, movl, %eax) 2773 FUWORD_NOERR(fuword16_noerr, movw, %ax) 2774 FUWORD_NOERR(fuword8_noerr, movb, %al) 2775 2776 #elif defined(__i386) 2777 2778 #define FUWORD_NOERR(NAME, INSTR, REG) \ 2779 ENTRY(NAME) \ 2780 movl 4(%esp), %eax; \ 2781 cmpl kernelbase, %eax; \ 2782 jb 1f; \ 2783 movl kernelbase, %eax; \ 2784 1: movl 8(%esp), %edx; \ 2785 INSTR (%eax), REG; \ 2786 INSTR REG, (%edx); \ 2787 ret; \ 2788 SET_SIZE(NAME) 2789 2790 FUWORD_NOERR(fuword32_noerr, movl, %ecx) 2791 FUWORD_NOERR(fuword16_noerr, movw, %cx) 2792 FUWORD_NOERR(fuword8_noerr, movb, %cl) 2793 2794 #endif /* __i386 */ 2795 2796 #undef FUWORD_NOERR 2797 2798 #endif /* __lint */ 2799 2800 #if defined(__lint) 2801 2802 #if defined(__amd64) 2803 2804 /*ARGSUSED*/ 2805 void 2806 suword64_noerr(void *addr, uint64_t value) 2807 {} 2808 2809 #endif 2810 2811 /*ARGSUSED*/ 2812 void 2813 suword32_noerr(void *addr, uint32_t value) 2814 {} 2815 2816 /*ARGSUSED*/ 2817 void 2818 suword16_noerr(void *addr, uint16_t value) 2819 {} 2820 2821 /*ARGSUSED*/ 2822 void 2823 suword8_noerr(void *addr, uint8_t value) 2824 {} 2825 2826 #else /* lint */ 2827 2828 #if defined(__amd64) 2829 2830 #define SUWORD_NOERR(NAME, INSTR, REG) \ 2831 ENTRY(NAME) \ 2832 cmpq kernelbase(%rip), %rdi; \ 2833 cmovnbq kernelbase(%rip), %rdi; \ 2834 INSTR REG, (%rdi); \ 2835 ret; \ 2836 SET_SIZE(NAME) 2837 2838 SUWORD_NOERR(suword64_noerr, movq, %rsi) 2839 SUWORD_NOERR(suword32_noerr, movl, %esi) 2840 SUWORD_NOERR(suword16_noerr, movw, %si) 2841 SUWORD_NOERR(suword8_noerr, movb, %sil) 2842 2843 #elif defined(__i386) 2844 2845 #define SUWORD_NOERR(NAME, INSTR, REG) \ 2846 ENTRY(NAME) \ 2847 movl 4(%esp), %eax; \ 2848 cmpl kernelbase, %eax; \ 2849 jb 1f; \ 2850 movl kernelbase, %eax; \ 2851 1: \ 2852 movl 8(%esp), %edx; \ 2853 INSTR REG, (%eax); \ 2854 ret; \ 2855 SET_SIZE(NAME) 2856 2857 SUWORD_NOERR(suword32_noerr, movl, %edx) 2858 SUWORD_NOERR(suword16_noerr, movw, %dx) 2859 SUWORD_NOERR(suword8_noerr, movb, %dl) 2860 2861 #endif /* __i386 */ 2862 2863 #undef SUWORD_NOERR 2864 2865 #endif /* lint */ 2866 2867 2868 #if defined(__lint) 2869 2870 /*ARGSUSED*/ 2871 int 2872 subyte(void *addr, uchar_t value) 2873 { return (0); } 2874 2875 /*ARGSUSED*/ 2876 void 2877 subyte_noerr(void *addr, uchar_t value) 2878 {} 2879 2880 /*ARGSUSED*/ 2881 int 2882 fulword(const void *addr, ulong_t *valuep) 2883 { return (0); } 2884 2885 /*ARGSUSED*/ 2886 void 2887 fulword_noerr(const void *addr, ulong_t *valuep) 2888 {} 2889 2890 /*ARGSUSED*/ 2891 int 2892 sulword(void *addr, ulong_t valuep) 2893 { return (0); } 2894 2895 /*ARGSUSED*/ 2896 void 2897 sulword_noerr(void *addr, ulong_t valuep) 2898 {} 2899 2900 #else 2901 2902 .weak subyte 2903 subyte=suword8 2904 .weak subyte_noerr 2905 subyte_noerr=suword8_noerr 2906 2907 #if defined(__amd64) 2908 2909 .weak fulword 2910 fulword=fuword64 2911 .weak fulword_noerr 2912 fulword_noerr=fuword64_noerr 2913 .weak sulword 2914 sulword=suword64 2915 .weak sulword_noerr 2916 sulword_noerr=suword64_noerr 2917 2918 #elif defined(__i386) 2919 2920 .weak fulword 2921 fulword=fuword32 2922 .weak fulword_noerr 2923 fulword_noerr=fuword32_noerr 2924 .weak sulword 2925 sulword=suword32 2926 .weak sulword_noerr 2927 sulword_noerr=suword32_noerr 2928 2929 #endif /* __i386 */ 2930 2931 #endif /* __lint */ 2932 2933 #if defined(__lint) 2934 2935 /* 2936 * Copy a block of storage - must not overlap (from + len <= to). 2937 * No fault handler installed (to be called under on_fault()) 2938 */ 2939 2940 /* ARGSUSED */ 2941 void 2942 copyout_noerr(const void *kfrom, void *uto, size_t count) 2943 {} 2944 2945 /* ARGSUSED */ 2946 void 2947 copyin_noerr(const void *ufrom, void *kto, size_t count) 2948 {} 2949 2950 /* 2951 * Zero a block of storage in user space 2952 */ 2953 2954 /* ARGSUSED */ 2955 void 2956 uzero(void *addr, size_t count) 2957 {} 2958 2959 /* 2960 * copy a block of storage in user space 2961 */ 2962 2963 /* ARGSUSED */ 2964 void 2965 ucopy(const void *ufrom, void *uto, size_t ulength) 2966 {} 2967 2968 /* 2969 * copy a string in user space 2970 */ 2971 2972 /* ARGSUSED */ 2973 void 2974 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied) 2975 {} 2976 2977 #else /* __lint */ 2978 2979 #if defined(__amd64) 2980 2981 ENTRY(copyin_noerr) 2982 movq kernelbase(%rip), %rax 2983 #ifdef DEBUG 2984 cmpq %rax, %rsi /* %rsi = kto */ 2985 jae 1f 2986 leaq .cpyin_ne_pmsg(%rip), %rdi 2987 jmp call_panic /* setup stack and call panic */ 2988 1: 2989 #endif 2990 cmpq %rax, %rdi /* ufrom < kernelbase */ 2991 jb do_copy 2992 movq %rax, %rdi /* force fault at kernelbase */ 2993 jmp do_copy 2994 SET_SIZE(copyin_noerr) 2995 2996 ENTRY(copyout_noerr) 2997 movq kernelbase(%rip), %rax 2998 #ifdef DEBUG 2999 cmpq %rax, %rdi /* %rdi = kfrom */ 3000 jae 1f 3001 leaq .cpyout_ne_pmsg(%rip), %rdi 3002 jmp call_panic /* setup stack and call panic */ 3003 1: 3004 #endif 3005 cmpq %rax, %rsi /* uto < kernelbase */ 3006 jb do_copy 3007 movq %rax, %rsi /* force fault at kernelbase */ 3008 jmp do_copy 3009 SET_SIZE(copyout_noerr) 3010 3011 ENTRY(uzero) 3012 movq kernelbase(%rip), %rax 3013 cmpq %rax, %rdi 3014 jb do_zero 3015 movq %rax, %rdi /* force fault at kernelbase */ 3016 jmp do_zero 3017 SET_SIZE(uzero) 3018 3019 ENTRY(ucopy) 3020 movq kernelbase(%rip), %rax 3021 cmpq %rax, %rdi 3022 cmovaeq %rax, %rdi /* force fault at kernelbase */ 3023 cmpq %rax, %rsi 3024 cmovaeq %rax, %rsi /* force fault at kernelbase */ 3025 jmp do_copy 3026 SET_SIZE(ucopy) 3027 3028 /* 3029 * Note, the frame pointer is required here becuase do_copystr expects 3030 * to be able to pop it off! 3031 */ 3032 ENTRY(ucopystr) 3033 pushq %rbp 3034 movq %rsp, %rbp 3035 movq kernelbase(%rip), %rax 3036 cmpq %rax, %rdi 3037 cmovaeq %rax, %rdi /* force fault at kernelbase */ 3038 cmpq %rax, %rsi 3039 cmovaeq %rax, %rsi /* force fault at kernelbase */ 3040 /* do_copystr expects lofault address in %r8 */ 3041 /* do_copystr expects whether or not we need smap in %r10 */ 3042 xorl %r10d, %r10d 3043 movq %gs:CPU_THREAD, %r8 3044 movq T_LOFAULT(%r8), %r8 3045 jmp do_copystr 3046 SET_SIZE(ucopystr) 3047 3048 #elif defined(__i386) 3049 3050 ENTRY(copyin_noerr) 3051 movl kernelbase, %eax 3052 #ifdef DEBUG 3053 cmpl %eax, 8(%esp) 3054 jae 1f 3055 pushl $.cpyin_ne_pmsg 3056 call panic 3057 1: 3058 #endif 3059 cmpl %eax, 4(%esp) 3060 jb do_copy 3061 movl %eax, 4(%esp) /* force fault at kernelbase */ 3062 jmp do_copy 3063 SET_SIZE(copyin_noerr) 3064 3065 ENTRY(copyout_noerr) 3066 movl kernelbase, %eax 3067 #ifdef DEBUG 3068 cmpl %eax, 4(%esp) 3069 jae 1f 3070 pushl $.cpyout_ne_pmsg 3071 call panic 3072 1: 3073 #endif 3074 cmpl %eax, 8(%esp) 3075 jb do_copy 3076 movl %eax, 8(%esp) /* force fault at kernelbase */ 3077 jmp do_copy 3078 SET_SIZE(copyout_noerr) 3079 3080 ENTRY(uzero) 3081 movl kernelbase, %eax 3082 cmpl %eax, 4(%esp) 3083 jb do_zero 3084 movl %eax, 4(%esp) /* force fault at kernelbase */ 3085 jmp do_zero 3086 SET_SIZE(uzero) 3087 3088 ENTRY(ucopy) 3089 movl kernelbase, %eax 3090 cmpl %eax, 4(%esp) 3091 jb 1f 3092 movl %eax, 4(%esp) /* force fault at kernelbase */ 3093 1: 3094 cmpl %eax, 8(%esp) 3095 jb do_copy 3096 movl %eax, 8(%esp) /* force fault at kernelbase */ 3097 jmp do_copy 3098 SET_SIZE(ucopy) 3099 3100 ENTRY(ucopystr) 3101 movl kernelbase, %eax 3102 cmpl %eax, 4(%esp) 3103 jb 1f 3104 movl %eax, 4(%esp) /* force fault at kernelbase */ 3105 1: 3106 cmpl %eax, 8(%esp) 3107 jb 2f 3108 movl %eax, 8(%esp) /* force fault at kernelbase */ 3109 2: 3110 /* do_copystr expects the lofault address in %eax */ 3111 movl %gs:CPU_THREAD, %eax 3112 movl T_LOFAULT(%eax), %eax 3113 jmp do_copystr 3114 SET_SIZE(ucopystr) 3115 3116 #endif /* __i386 */ 3117 3118 #ifdef DEBUG 3119 .data 3120 .kcopy_panic_msg: 3121 .string "kcopy: arguments below kernelbase" 3122 .bcopy_panic_msg: 3123 .string "bcopy: arguments below kernelbase" 3124 .kzero_panic_msg: 3125 .string "kzero: arguments below kernelbase" 3126 .bzero_panic_msg: 3127 .string "bzero: arguments below kernelbase" 3128 .copyin_panic_msg: 3129 .string "copyin: kaddr argument below kernelbase" 3130 .xcopyin_panic_msg: 3131 .string "xcopyin: kaddr argument below kernelbase" 3132 .copyout_panic_msg: 3133 .string "copyout: kaddr argument below kernelbase" 3134 .xcopyout_panic_msg: 3135 .string "xcopyout: kaddr argument below kernelbase" 3136 .copystr_panic_msg: 3137 .string "copystr: arguments in user space" 3138 .copyinstr_panic_msg: 3139 .string "copyinstr: kaddr argument not in kernel address space" 3140 .copyoutstr_panic_msg: 3141 .string "copyoutstr: kaddr argument not in kernel address space" 3142 .cpyin_ne_pmsg: 3143 .string "copyin_noerr: argument not in kernel address space" 3144 .cpyout_ne_pmsg: 3145 .string "copyout_noerr: argument not in kernel address space" 3146 #endif 3147 3148 #endif /* __lint */ 3149 3150 /* 3151 * These functions are used for SMAP, supervisor mode access protection. They 3152 * are hotpatched to become real instructions when the system starts up which is 3153 * done in mlsetup() as a part of enabling the other CR4 related features. 3154 * 3155 * Generally speaking, smap_disable() is a stac instruction and smap_enable is a 3156 * clac instruction. It's safe to call these any number of times, and in fact, 3157 * out of paranoia, the kernel will likely call it at several points. 3158 */ 3159 3160 #if defined(__lint) 3161 3162 void 3163 smap_enable(void) 3164 {} 3165 3166 void 3167 smap_disable(void) 3168 {} 3169 3170 #else 3171 3172 #if defined (__amd64) || defined(__i386) 3173 ENTRY(smap_disable) 3174 nop 3175 nop 3176 nop 3177 ret 3178 SET_SIZE(smap_disable) 3179 3180 ENTRY(smap_enable) 3181 nop 3182 nop 3183 nop 3184 ret 3185 SET_SIZE(smap_enable) 3186 3187 #endif /* __amd64 || __i386 */ 3188 3189 #endif /* __lint */ 3190 3191 #ifndef __lint 3192 3193 .data 3194 .align 4 3195 .globl _smap_enable_patch_count 3196 .type _smap_enable_patch_count,@object 3197 .size _smap_enable_patch_count, 4 3198 _smap_enable_patch_count: 3199 .long SMAP_ENABLE_COUNT 3200 3201 .globl _smap_disable_patch_count 3202 .type _smap_disable_patch_count,@object 3203 .size _smap_disable_patch_count, 4 3204 _smap_disable_patch_count: 3205 .long SMAP_DISABLE_COUNT 3206 3207 #endif /* __lint */