1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2009, Intel Corporation
28 * All rights reserved.
29 */
30
31 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
33 /* All Rights Reserved */
34
35 /* Copyright (c) 1987, 1988 Microsoft Corporation */
36 /* All Rights Reserved */
37
38 /*
39 * Copyright (c) 2018 Joyent, Inc.
40 */
41
42 #include <sys/errno.h>
43 #include <sys/asm_linkage.h>
44
45 #if defined(__lint)
46 #include <sys/types.h>
47 #include <sys/systm.h>
48 #else /* __lint */
49 #include "assym.h"
50 #endif /* __lint */
51
52 #define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
53 #define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
54 /*
55 * Non-temopral access (NTA) alignment requirement
56 */
57 #define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */
58 #define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1)
59 #define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */
60 #define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1)
61
62 /*
63 * With the introduction of Broadwell, Intel has introduced supervisor mode
64 * access protection -- SMAP. SMAP forces the kernel to set certain bits to
65 * enable access of user pages (AC in rflags, defines as PS_ACHK in
66 * <sys/psw.h>). One of the challenges is that the implementation of many of the
67 * userland copy routines directly use the kernel ones. For example, copyin and
68 * copyout simply go and jump to the do_copy_fault label and traditionally let
69 * those deal with the return for them. In fact, changing that is a can of frame
70 * pointers.
71 *
72 * Rules and Constraints:
73 *
74 * 1. For anything that's not in copy.s, we have it do explicit calls to the
75 * smap related code. It usually is in a position where it is able to. This is
76 * restricted to the following three places: DTrace, resume() in swtch.s and
77 * on_fault/no_fault. If you want to add it somewhere else, we should be
78 * thinking twice.
79 *
80 * 2. We try to toggle this at the smallest window possible. This means that if
81 * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
82 * other function, we will always leave with SMAP enabled (the kernel cannot
83 * access user pages).
84 *
85 * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
86 * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
87 * which already takes care of ensuring that SMAP is enabled and disabled. Note
88 * this means that when under an on_fault()/no_fault() handler, one must not
89 * call the non-*_noeer() routines.
90 *
91 * 4. The first thing we should do after coming out of an lofault handler is to
92 * make sure that we call smap_enable again to ensure that we are safely
93 * protected, as more often than not, we will have disabled smap to get there.
94 *
95 * 5. The SMAP functions, smap_enable and smap_disable may not touch any
96 * registers beyond those done by the call and ret. These routines may be called
97 * from arbitrary contexts in copy.s where we have slightly more special ABIs in
98 * place.
99 *
100 * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
101 * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and
102 * smap_disable()). If the number of these is changed, you must update the
103 * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
104 *
105 * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is
106 * no known technical reason preventing it from being enabled.
107 *
108 * 8. Generally this .s file is processed by a K&R style cpp. This means that it
109 * really has a lot of feelings about whitespace. In particular, if you have a
110 * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
111 *
112 * 9. The smap_enable and smap_disable functions should not generally be called.
113 * They exist such that DTrace and on_trap() may use them, that's it.
114 *
115 * 10. In general, the kernel has its own value for rflags that gets used. This
116 * is maintained in a few different places which vary based on how the thread
117 * comes into existence and whether it's a user thread. In general, when the
118 * kernel takes a trap, it always will set ourselves to a known set of flags,
119 * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
120 * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
121 * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
122 * where that gets masked off.
123 */
124
125 /*
126 * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
127 * "rep smovq" for large sizes. Performance data shows that many calls to
128 * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
129 * these small sizes unrolled code is used. For medium sizes loops writing
130 * 64-bytes per loop are used. Transition points were determined experimentally.
131 */
132 #define BZERO_USE_REP (1024)
133 #define BCOPY_DFLT_REP (128)
134 #define BCOPY_NHM_REP (768)
135
136 /*
137 * Copy a block of storage, returning an error code if `from' or
138 * `to' takes a kernel pagefault which cannot be resolved.
139 * Returns errno value on pagefault error, 0 if all ok
140 */
141
142 /*
143 * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
144 * additional call instructions.
145 */
146 #if defined(__amd64)
147 #define SMAP_DISABLE_COUNT 16
148 #define SMAP_ENABLE_COUNT 26
149 #elif defined(__i386)
150 #define SMAP_DISABLE_COUNT 0
151 #define SMAP_ENABLE_COUNT 0
152 #endif
153
154 #define SMAP_DISABLE_INSTR(ITER) \
155 .globl _smap_disable_patch_/**/ITER; \
156 _smap_disable_patch_/**/ITER/**/:; \
157 nop; nop; nop;
158
159 #define SMAP_ENABLE_INSTR(ITER) \
160 .globl _smap_enable_patch_/**/ITER; \
161 _smap_enable_patch_/**/ITER/**/:; \
162 nop; nop; nop;
163
164 #if defined(__lint)
165
166 /* ARGSUSED */
167 int
168 kcopy(const void *from, void *to, size_t count)
169 { return (0); }
170
171 #else /* __lint */
172
173 .globl kernelbase
174 .globl postbootkernelbase
175
176 #if defined(__amd64)
177
178 ENTRY(kcopy)
179 pushq %rbp
180 movq %rsp, %rbp
181 #ifdef DEBUG
182 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
183 jb 0f
184 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
185 jnb 1f
186 0: leaq .kcopy_panic_msg(%rip), %rdi
187 xorl %eax, %eax
188 call panic
189 1:
190 #endif
191 /*
192 * pass lofault value as 4th argument to do_copy_fault
193 */
194 leaq _kcopy_copyerr(%rip), %rcx
195 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
196
197 do_copy_fault:
198 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
199 movq %rcx, T_LOFAULT(%r9) /* new lofault */
200 call bcopy_altentry
201 xorl %eax, %eax /* return 0 (success) */
202 SMAP_ENABLE_INSTR(0)
203
204 /*
205 * A fault during do_copy_fault is indicated through an errno value
206 * in %rax and we iretq from the trap handler to here.
207 */
208 _kcopy_copyerr:
209 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
210 leave
211 ret
212 SET_SIZE(kcopy)
213
214 #elif defined(__i386)
215
216 #define ARG_FROM 8
217 #define ARG_TO 12
218 #define ARG_COUNT 16
219
220 ENTRY(kcopy)
221 #ifdef DEBUG
222 pushl %ebp
223 movl %esp, %ebp
224 movl postbootkernelbase, %eax
225 cmpl %eax, ARG_FROM(%ebp)
226 jb 0f
227 cmpl %eax, ARG_TO(%ebp)
228 jnb 1f
229 0: pushl $.kcopy_panic_msg
230 call panic
231 1: popl %ebp
232 #endif
233 lea _kcopy_copyerr, %eax /* lofault value */
234 movl %gs:CPU_THREAD, %edx
235
236 do_copy_fault:
237 pushl %ebp
238 movl %esp, %ebp /* setup stack frame */
239 pushl %esi
240 pushl %edi /* save registers */
241
242 movl T_LOFAULT(%edx), %edi
243 pushl %edi /* save the current lofault */
244 movl %eax, T_LOFAULT(%edx) /* new lofault */
245
246 movl ARG_COUNT(%ebp), %ecx
247 movl ARG_FROM(%ebp), %esi
248 movl ARG_TO(%ebp), %edi
249 shrl $2, %ecx /* word count */
250 rep
251 smovl
252 movl ARG_COUNT(%ebp), %ecx
253 andl $3, %ecx /* bytes left over */
254 rep
255 smovb
256 xorl %eax, %eax
257
258 /*
259 * A fault during do_copy_fault is indicated through an errno value
260 * in %eax and we iret from the trap handler to here.
261 */
262 _kcopy_copyerr:
263 popl %ecx
264 popl %edi
265 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */
266 popl %esi
267 popl %ebp
268 ret
269 SET_SIZE(kcopy)
270
271 #undef ARG_FROM
272 #undef ARG_TO
273 #undef ARG_COUNT
274
275 #endif /* __i386 */
276 #endif /* __lint */
277
278 #if defined(__lint)
279
280 /*
281 * Copy a block of storage. Similar to kcopy but uses non-temporal
282 * instructions.
283 */
284
285 /* ARGSUSED */
286 int
287 kcopy_nta(const void *from, void *to, size_t count, int copy_cached)
288 { return (0); }
289
290 #else /* __lint */
291
292 #if defined(__amd64)
293
294 #define COPY_LOOP_INIT(src, dst, cnt) \
295 addq cnt, src; \
296 addq cnt, dst; \
297 shrq $3, cnt; \
298 neg cnt
299
300 /* Copy 16 bytes per loop. Uses %rax and %r8 */
301 #define COPY_LOOP_BODY(src, dst, cnt) \
302 prefetchnta 0x100(src, cnt, 8); \
303 movq (src, cnt, 8), %rax; \
304 movq 0x8(src, cnt, 8), %r8; \
305 movnti %rax, (dst, cnt, 8); \
306 movnti %r8, 0x8(dst, cnt, 8); \
307 addq $2, cnt
308
309 ENTRY(kcopy_nta)
310 pushq %rbp
311 movq %rsp, %rbp
312 #ifdef DEBUG
313 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
314 jb 0f
315 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
316 jnb 1f
317 0: leaq .kcopy_panic_msg(%rip), %rdi
318 xorl %eax, %eax
319 call panic
320 1:
321 #endif
322
323 movq %gs:CPU_THREAD, %r9
324 cmpq $0, %rcx /* No non-temporal access? */
325 /*
326 * pass lofault value as 4th argument to do_copy_fault
327 */
328 leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */
329 jnz do_copy_fault /* use regular access */
330 /*
331 * Make sure cnt is >= KCOPY_MIN_SIZE
332 */
333 cmpq $KCOPY_MIN_SIZE, %rdx
334 jb do_copy_fault
335
336 /*
337 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
338 * count is COUNT_ALIGN_SIZE aligned.
339 */
340 movq %rdi, %r10
341 orq %rsi, %r10
342 andq $NTA_ALIGN_MASK, %r10
343 orq %rdx, %r10
344 andq $COUNT_ALIGN_MASK, %r10
345 jnz do_copy_fault
346
347 ALTENTRY(do_copy_fault_nta)
348 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
349 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
350 movq %rcx, T_LOFAULT(%r9) /* new lofault */
351
352 /*
353 * COPY_LOOP_BODY uses %rax and %r8
354 */
355 COPY_LOOP_INIT(%rdi, %rsi, %rdx)
356 2: COPY_LOOP_BODY(%rdi, %rsi, %rdx)
357 jnz 2b
358
359 mfence
360 xorl %eax, %eax /* return 0 (success) */
361 SMAP_ENABLE_INSTR(1)
362
363 _kcopy_nta_copyerr:
364 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
365 leave
366 ret
367 SET_SIZE(do_copy_fault_nta)
368 SET_SIZE(kcopy_nta)
369
370 #elif defined(__i386)
371
372 #define ARG_FROM 8
373 #define ARG_TO 12
374 #define ARG_COUNT 16
375
376 #define COPY_LOOP_INIT(src, dst, cnt) \
377 addl cnt, src; \
378 addl cnt, dst; \
379 shrl $3, cnt; \
380 neg cnt
381
382 #define COPY_LOOP_BODY(src, dst, cnt) \
383 prefetchnta 0x100(src, cnt, 8); \
384 movl (src, cnt, 8), %esi; \
385 movnti %esi, (dst, cnt, 8); \
386 movl 0x4(src, cnt, 8), %esi; \
387 movnti %esi, 0x4(dst, cnt, 8); \
388 movl 0x8(src, cnt, 8), %esi; \
389 movnti %esi, 0x8(dst, cnt, 8); \
390 movl 0xc(src, cnt, 8), %esi; \
391 movnti %esi, 0xc(dst, cnt, 8); \
392 addl $2, cnt
393
394 /*
395 * kcopy_nta is not implemented for 32-bit as no performance
396 * improvement was shown. We simply jump directly to kcopy
397 * and discard the 4 arguments.
398 */
399 ENTRY(kcopy_nta)
400 jmp kcopy
401
402 lea _kcopy_nta_copyerr, %eax /* lofault value */
403 ALTENTRY(do_copy_fault_nta)
404 pushl %ebp
405 movl %esp, %ebp /* setup stack frame */
406 pushl %esi
407 pushl %edi
408
409 movl %gs:CPU_THREAD, %edx
410 movl T_LOFAULT(%edx), %edi
411 pushl %edi /* save the current lofault */
412 movl %eax, T_LOFAULT(%edx) /* new lofault */
413
414 /* COPY_LOOP_BODY needs to use %esi */
415 movl ARG_COUNT(%ebp), %ecx
416 movl ARG_FROM(%ebp), %edi
417 movl ARG_TO(%ebp), %eax
418 COPY_LOOP_INIT(%edi, %eax, %ecx)
419 1: COPY_LOOP_BODY(%edi, %eax, %ecx)
420 jnz 1b
421 mfence
422
423 xorl %eax, %eax
424 _kcopy_nta_copyerr:
425 popl %ecx
426 popl %edi
427 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */
428 popl %esi
429 leave
430 ret
431 SET_SIZE(do_copy_fault_nta)
432 SET_SIZE(kcopy_nta)
433
434 #undef ARG_FROM
435 #undef ARG_TO
436 #undef ARG_COUNT
437
438 #endif /* __i386 */
439 #endif /* __lint */
440
441 #if defined(__lint)
442
443 /* ARGSUSED */
444 void
445 bcopy(const void *from, void *to, size_t count)
446 {}
447
448 #else /* __lint */
449
450 #if defined(__amd64)
451
452 ENTRY(bcopy)
453 #ifdef DEBUG
454 orq %rdx, %rdx /* %rdx = count */
455 jz 1f
456 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
457 jb 0f
458 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
459 jnb 1f
460 0: leaq .bcopy_panic_msg(%rip), %rdi
461 jmp call_panic /* setup stack and call panic */
462 1:
463 #endif
464 /*
465 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
466 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
467 * uses these registers in future they must be saved and restored.
468 */
469 ALTENTRY(bcopy_altentry)
470 do_copy:
471 #define L(s) .bcopy/**/s
472 cmpq $0x50, %rdx /* 80 */
473 jae bcopy_ck_size
474
475 /*
476 * Performance data shows many caller's copy small buffers. So for
477 * best perf for these sizes unrolled code is used. Store data without
478 * worrying about alignment.
479 */
480 leaq L(fwdPxQx)(%rip), %r10
481 addq %rdx, %rdi
482 addq %rdx, %rsi
483 movslq (%r10,%rdx,4), %rcx
484 leaq (%rcx,%r10,1), %r10
485 jmpq *%r10
486
487 .p2align 4
488 L(fwdPxQx):
489 .int L(P0Q0)-L(fwdPxQx) /* 0 */
490 .int L(P1Q0)-L(fwdPxQx)
491 .int L(P2Q0)-L(fwdPxQx)
492 .int L(P3Q0)-L(fwdPxQx)
493 .int L(P4Q0)-L(fwdPxQx)
494 .int L(P5Q0)-L(fwdPxQx)
495 .int L(P6Q0)-L(fwdPxQx)
496 .int L(P7Q0)-L(fwdPxQx)
497
498 .int L(P0Q1)-L(fwdPxQx) /* 8 */
499 .int L(P1Q1)-L(fwdPxQx)
500 .int L(P2Q1)-L(fwdPxQx)
501 .int L(P3Q1)-L(fwdPxQx)
502 .int L(P4Q1)-L(fwdPxQx)
503 .int L(P5Q1)-L(fwdPxQx)
504 .int L(P6Q1)-L(fwdPxQx)
505 .int L(P7Q1)-L(fwdPxQx)
506
507 .int L(P0Q2)-L(fwdPxQx) /* 16 */
508 .int L(P1Q2)-L(fwdPxQx)
509 .int L(P2Q2)-L(fwdPxQx)
510 .int L(P3Q2)-L(fwdPxQx)
511 .int L(P4Q2)-L(fwdPxQx)
512 .int L(P5Q2)-L(fwdPxQx)
513 .int L(P6Q2)-L(fwdPxQx)
514 .int L(P7Q2)-L(fwdPxQx)
515
516 .int L(P0Q3)-L(fwdPxQx) /* 24 */
517 .int L(P1Q3)-L(fwdPxQx)
518 .int L(P2Q3)-L(fwdPxQx)
519 .int L(P3Q3)-L(fwdPxQx)
520 .int L(P4Q3)-L(fwdPxQx)
521 .int L(P5Q3)-L(fwdPxQx)
522 .int L(P6Q3)-L(fwdPxQx)
523 .int L(P7Q3)-L(fwdPxQx)
524
525 .int L(P0Q4)-L(fwdPxQx) /* 32 */
526 .int L(P1Q4)-L(fwdPxQx)
527 .int L(P2Q4)-L(fwdPxQx)
528 .int L(P3Q4)-L(fwdPxQx)
529 .int L(P4Q4)-L(fwdPxQx)
530 .int L(P5Q4)-L(fwdPxQx)
531 .int L(P6Q4)-L(fwdPxQx)
532 .int L(P7Q4)-L(fwdPxQx)
533
534 .int L(P0Q5)-L(fwdPxQx) /* 40 */
535 .int L(P1Q5)-L(fwdPxQx)
536 .int L(P2Q5)-L(fwdPxQx)
537 .int L(P3Q5)-L(fwdPxQx)
538 .int L(P4Q5)-L(fwdPxQx)
539 .int L(P5Q5)-L(fwdPxQx)
540 .int L(P6Q5)-L(fwdPxQx)
541 .int L(P7Q5)-L(fwdPxQx)
542
543 .int L(P0Q6)-L(fwdPxQx) /* 48 */
544 .int L(P1Q6)-L(fwdPxQx)
545 .int L(P2Q6)-L(fwdPxQx)
546 .int L(P3Q6)-L(fwdPxQx)
547 .int L(P4Q6)-L(fwdPxQx)
548 .int L(P5Q6)-L(fwdPxQx)
549 .int L(P6Q6)-L(fwdPxQx)
550 .int L(P7Q6)-L(fwdPxQx)
551
552 .int L(P0Q7)-L(fwdPxQx) /* 56 */
553 .int L(P1Q7)-L(fwdPxQx)
554 .int L(P2Q7)-L(fwdPxQx)
555 .int L(P3Q7)-L(fwdPxQx)
556 .int L(P4Q7)-L(fwdPxQx)
557 .int L(P5Q7)-L(fwdPxQx)
558 .int L(P6Q7)-L(fwdPxQx)
559 .int L(P7Q7)-L(fwdPxQx)
560
561 .int L(P0Q8)-L(fwdPxQx) /* 64 */
562 .int L(P1Q8)-L(fwdPxQx)
563 .int L(P2Q8)-L(fwdPxQx)
564 .int L(P3Q8)-L(fwdPxQx)
565 .int L(P4Q8)-L(fwdPxQx)
566 .int L(P5Q8)-L(fwdPxQx)
567 .int L(P6Q8)-L(fwdPxQx)
568 .int L(P7Q8)-L(fwdPxQx)
569
570 .int L(P0Q9)-L(fwdPxQx) /* 72 */
571 .int L(P1Q9)-L(fwdPxQx)
572 .int L(P2Q9)-L(fwdPxQx)
573 .int L(P3Q9)-L(fwdPxQx)
574 .int L(P4Q9)-L(fwdPxQx)
575 .int L(P5Q9)-L(fwdPxQx)
576 .int L(P6Q9)-L(fwdPxQx)
577 .int L(P7Q9)-L(fwdPxQx) /* 79 */
578
579 .p2align 4
580 L(P0Q9):
581 mov -0x48(%rdi), %rcx
582 mov %rcx, -0x48(%rsi)
583 L(P0Q8):
584 mov -0x40(%rdi), %r10
585 mov %r10, -0x40(%rsi)
586 L(P0Q7):
587 mov -0x38(%rdi), %r8
588 mov %r8, -0x38(%rsi)
589 L(P0Q6):
590 mov -0x30(%rdi), %rcx
591 mov %rcx, -0x30(%rsi)
592 L(P0Q5):
593 mov -0x28(%rdi), %r10
594 mov %r10, -0x28(%rsi)
595 L(P0Q4):
596 mov -0x20(%rdi), %r8
597 mov %r8, -0x20(%rsi)
598 L(P0Q3):
599 mov -0x18(%rdi), %rcx
600 mov %rcx, -0x18(%rsi)
601 L(P0Q2):
602 mov -0x10(%rdi), %r10
603 mov %r10, -0x10(%rsi)
604 L(P0Q1):
605 mov -0x8(%rdi), %r8
606 mov %r8, -0x8(%rsi)
607 L(P0Q0):
608 ret
609
610 .p2align 4
611 L(P1Q9):
612 mov -0x49(%rdi), %r8
613 mov %r8, -0x49(%rsi)
614 L(P1Q8):
615 mov -0x41(%rdi), %rcx
616 mov %rcx, -0x41(%rsi)
617 L(P1Q7):
618 mov -0x39(%rdi), %r10
619 mov %r10, -0x39(%rsi)
620 L(P1Q6):
621 mov -0x31(%rdi), %r8
622 mov %r8, -0x31(%rsi)
623 L(P1Q5):
624 mov -0x29(%rdi), %rcx
625 mov %rcx, -0x29(%rsi)
626 L(P1Q4):
627 mov -0x21(%rdi), %r10
628 mov %r10, -0x21(%rsi)
629 L(P1Q3):
630 mov -0x19(%rdi), %r8
631 mov %r8, -0x19(%rsi)
632 L(P1Q2):
633 mov -0x11(%rdi), %rcx
634 mov %rcx, -0x11(%rsi)
635 L(P1Q1):
636 mov -0x9(%rdi), %r10
637 mov %r10, -0x9(%rsi)
638 L(P1Q0):
639 movzbq -0x1(%rdi), %r8
640 mov %r8b, -0x1(%rsi)
641 ret
642
643 .p2align 4
644 L(P2Q9):
645 mov -0x4a(%rdi), %r8
646 mov %r8, -0x4a(%rsi)
647 L(P2Q8):
648 mov -0x42(%rdi), %rcx
649 mov %rcx, -0x42(%rsi)
650 L(P2Q7):
651 mov -0x3a(%rdi), %r10
652 mov %r10, -0x3a(%rsi)
653 L(P2Q6):
654 mov -0x32(%rdi), %r8
655 mov %r8, -0x32(%rsi)
656 L(P2Q5):
657 mov -0x2a(%rdi), %rcx
658 mov %rcx, -0x2a(%rsi)
659 L(P2Q4):
660 mov -0x22(%rdi), %r10
661 mov %r10, -0x22(%rsi)
662 L(P2Q3):
663 mov -0x1a(%rdi), %r8
664 mov %r8, -0x1a(%rsi)
665 L(P2Q2):
666 mov -0x12(%rdi), %rcx
667 mov %rcx, -0x12(%rsi)
668 L(P2Q1):
669 mov -0xa(%rdi), %r10
670 mov %r10, -0xa(%rsi)
671 L(P2Q0):
672 movzwq -0x2(%rdi), %r8
673 mov %r8w, -0x2(%rsi)
674 ret
675
676 .p2align 4
677 L(P3Q9):
678 mov -0x4b(%rdi), %r8
679 mov %r8, -0x4b(%rsi)
680 L(P3Q8):
681 mov -0x43(%rdi), %rcx
682 mov %rcx, -0x43(%rsi)
683 L(P3Q7):
684 mov -0x3b(%rdi), %r10
685 mov %r10, -0x3b(%rsi)
686 L(P3Q6):
687 mov -0x33(%rdi), %r8
688 mov %r8, -0x33(%rsi)
689 L(P3Q5):
690 mov -0x2b(%rdi), %rcx
691 mov %rcx, -0x2b(%rsi)
692 L(P3Q4):
693 mov -0x23(%rdi), %r10
694 mov %r10, -0x23(%rsi)
695 L(P3Q3):
696 mov -0x1b(%rdi), %r8
697 mov %r8, -0x1b(%rsi)
698 L(P3Q2):
699 mov -0x13(%rdi), %rcx
700 mov %rcx, -0x13(%rsi)
701 L(P3Q1):
702 mov -0xb(%rdi), %r10
703 mov %r10, -0xb(%rsi)
704 /*
705 * These trailing loads/stores have to do all their loads 1st,
706 * then do the stores.
707 */
708 L(P3Q0):
709 movzwq -0x3(%rdi), %r8
710 movzbq -0x1(%rdi), %r10
711 mov %r8w, -0x3(%rsi)
712 mov %r10b, -0x1(%rsi)
713 ret
714
715 .p2align 4
716 L(P4Q9):
717 mov -0x4c(%rdi), %r8
718 mov %r8, -0x4c(%rsi)
719 L(P4Q8):
720 mov -0x44(%rdi), %rcx
721 mov %rcx, -0x44(%rsi)
722 L(P4Q7):
723 mov -0x3c(%rdi), %r10
724 mov %r10, -0x3c(%rsi)
725 L(P4Q6):
726 mov -0x34(%rdi), %r8
727 mov %r8, -0x34(%rsi)
728 L(P4Q5):
729 mov -0x2c(%rdi), %rcx
730 mov %rcx, -0x2c(%rsi)
731 L(P4Q4):
732 mov -0x24(%rdi), %r10
733 mov %r10, -0x24(%rsi)
734 L(P4Q3):
735 mov -0x1c(%rdi), %r8
736 mov %r8, -0x1c(%rsi)
737 L(P4Q2):
738 mov -0x14(%rdi), %rcx
739 mov %rcx, -0x14(%rsi)
740 L(P4Q1):
741 mov -0xc(%rdi), %r10
742 mov %r10, -0xc(%rsi)
743 L(P4Q0):
744 mov -0x4(%rdi), %r8d
745 mov %r8d, -0x4(%rsi)
746 ret
747
748 .p2align 4
749 L(P5Q9):
750 mov -0x4d(%rdi), %r8
751 mov %r8, -0x4d(%rsi)
752 L(P5Q8):
753 mov -0x45(%rdi), %rcx
754 mov %rcx, -0x45(%rsi)
755 L(P5Q7):
756 mov -0x3d(%rdi), %r10
757 mov %r10, -0x3d(%rsi)
758 L(P5Q6):
759 mov -0x35(%rdi), %r8
760 mov %r8, -0x35(%rsi)
761 L(P5Q5):
762 mov -0x2d(%rdi), %rcx
763 mov %rcx, -0x2d(%rsi)
764 L(P5Q4):
765 mov -0x25(%rdi), %r10
766 mov %r10, -0x25(%rsi)
767 L(P5Q3):
768 mov -0x1d(%rdi), %r8
769 mov %r8, -0x1d(%rsi)
770 L(P5Q2):
771 mov -0x15(%rdi), %rcx
772 mov %rcx, -0x15(%rsi)
773 L(P5Q1):
774 mov -0xd(%rdi), %r10
775 mov %r10, -0xd(%rsi)
776 L(P5Q0):
777 mov -0x5(%rdi), %r8d
778 movzbq -0x1(%rdi), %r10
779 mov %r8d, -0x5(%rsi)
780 mov %r10b, -0x1(%rsi)
781 ret
782
783 .p2align 4
784 L(P6Q9):
785 mov -0x4e(%rdi), %r8
786 mov %r8, -0x4e(%rsi)
787 L(P6Q8):
788 mov -0x46(%rdi), %rcx
789 mov %rcx, -0x46(%rsi)
790 L(P6Q7):
791 mov -0x3e(%rdi), %r10
792 mov %r10, -0x3e(%rsi)
793 L(P6Q6):
794 mov -0x36(%rdi), %r8
795 mov %r8, -0x36(%rsi)
796 L(P6Q5):
797 mov -0x2e(%rdi), %rcx
798 mov %rcx, -0x2e(%rsi)
799 L(P6Q4):
800 mov -0x26(%rdi), %r10
801 mov %r10, -0x26(%rsi)
802 L(P6Q3):
803 mov -0x1e(%rdi), %r8
804 mov %r8, -0x1e(%rsi)
805 L(P6Q2):
806 mov -0x16(%rdi), %rcx
807 mov %rcx, -0x16(%rsi)
808 L(P6Q1):
809 mov -0xe(%rdi), %r10
810 mov %r10, -0xe(%rsi)
811 L(P6Q0):
812 mov -0x6(%rdi), %r8d
813 movzwq -0x2(%rdi), %r10
814 mov %r8d, -0x6(%rsi)
815 mov %r10w, -0x2(%rsi)
816 ret
817
818 .p2align 4
819 L(P7Q9):
820 mov -0x4f(%rdi), %r8
821 mov %r8, -0x4f(%rsi)
822 L(P7Q8):
823 mov -0x47(%rdi), %rcx
824 mov %rcx, -0x47(%rsi)
825 L(P7Q7):
826 mov -0x3f(%rdi), %r10
827 mov %r10, -0x3f(%rsi)
828 L(P7Q6):
829 mov -0x37(%rdi), %r8
830 mov %r8, -0x37(%rsi)
831 L(P7Q5):
832 mov -0x2f(%rdi), %rcx
833 mov %rcx, -0x2f(%rsi)
834 L(P7Q4):
835 mov -0x27(%rdi), %r10
836 mov %r10, -0x27(%rsi)
837 L(P7Q3):
838 mov -0x1f(%rdi), %r8
839 mov %r8, -0x1f(%rsi)
840 L(P7Q2):
841 mov -0x17(%rdi), %rcx
842 mov %rcx, -0x17(%rsi)
843 L(P7Q1):
844 mov -0xf(%rdi), %r10
845 mov %r10, -0xf(%rsi)
846 L(P7Q0):
847 mov -0x7(%rdi), %r8d
848 movzwq -0x3(%rdi), %r10
849 movzbq -0x1(%rdi), %rcx
850 mov %r8d, -0x7(%rsi)
851 mov %r10w, -0x3(%rsi)
852 mov %cl, -0x1(%rsi)
853 ret
854
855 /*
856 * For large sizes rep smovq is fastest.
857 * Transition point determined experimentally as measured on
858 * Intel Xeon processors (incl. Nehalem and previous generations) and
859 * AMD Opteron. The transition value is patched at boot time to avoid
860 * memory reference hit.
861 */
862 .globl bcopy_patch_start
863 bcopy_patch_start:
864 cmpq $BCOPY_NHM_REP, %rdx
865 .globl bcopy_patch_end
866 bcopy_patch_end:
867
868 .p2align 4
869 ALTENTRY(bcopy_ck_size)
870
871 cmpq $BCOPY_DFLT_REP, %rdx
872 jae L(use_rep)
873
874 /*
875 * Align to a 8-byte boundary. Avoids penalties from unaligned stores
876 * as well as from stores spanning cachelines.
877 */
878 test $0x7, %rsi
879 jz L(aligned_loop)
880 test $0x1, %rsi
881 jz 2f
882 movzbq (%rdi), %r8
883 dec %rdx
884 inc %rdi
885 mov %r8b, (%rsi)
886 inc %rsi
887 2:
888 test $0x2, %rsi
889 jz 4f
890 movzwq (%rdi), %r8
891 sub $0x2, %rdx
892 add $0x2, %rdi
893 mov %r8w, (%rsi)
894 add $0x2, %rsi
895 4:
896 test $0x4, %rsi
897 jz L(aligned_loop)
898 mov (%rdi), %r8d
899 sub $0x4, %rdx
900 add $0x4, %rdi
901 mov %r8d, (%rsi)
902 add $0x4, %rsi
903
904 /*
905 * Copy 64-bytes per loop
906 */
907 .p2align 4
908 L(aligned_loop):
909 mov (%rdi), %r8
910 mov 0x8(%rdi), %r10
911 lea -0x40(%rdx), %rdx
912 mov %r8, (%rsi)
913 mov %r10, 0x8(%rsi)
914 mov 0x10(%rdi), %rcx
915 mov 0x18(%rdi), %r8
916 mov %rcx, 0x10(%rsi)
917 mov %r8, 0x18(%rsi)
918
919 cmp $0x40, %rdx
920 mov 0x20(%rdi), %r10
921 mov 0x28(%rdi), %rcx
922 mov %r10, 0x20(%rsi)
923 mov %rcx, 0x28(%rsi)
924 mov 0x30(%rdi), %r8
925 mov 0x38(%rdi), %r10
926 lea 0x40(%rdi), %rdi
927 mov %r8, 0x30(%rsi)
928 mov %r10, 0x38(%rsi)
929 lea 0x40(%rsi), %rsi
930 jae L(aligned_loop)
931
932 /*
933 * Copy remaining bytes (0-63)
934 */
935 L(do_remainder):
936 leaq L(fwdPxQx)(%rip), %r10
937 addq %rdx, %rdi
938 addq %rdx, %rsi
939 movslq (%r10,%rdx,4), %rcx
940 leaq (%rcx,%r10,1), %r10
941 jmpq *%r10
942
943 /*
944 * Use rep smovq. Clear remainder via unrolled code
945 */
946 .p2align 4
947 L(use_rep):
948 xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */
949 movq %rdx, %rcx /* %rcx = count */
950 shrq $3, %rcx /* 8-byte word count */
951 rep
952 smovq
953
954 xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */
955 andq $7, %rdx /* remainder */
956 jnz L(do_remainder)
957 ret
958 #undef L
959 SET_SIZE(bcopy_ck_size)
960
961 #ifdef DEBUG
962 /*
963 * Setup frame on the run-time stack. The end of the input argument
964 * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
965 * always points to the end of the latest allocated stack frame.
966 * panic(const char *format, ...) is a varargs function. When a
967 * function taking variable arguments is called, %rax must be set
968 * to eight times the number of floating point parameters passed
969 * to the function in SSE registers.
970 */
971 call_panic:
972 pushq %rbp /* align stack properly */
973 movq %rsp, %rbp
974 xorl %eax, %eax /* no variable arguments */
975 call panic /* %rdi = format string */
976 #endif
977 SET_SIZE(bcopy_altentry)
978 SET_SIZE(bcopy)
979
980 #elif defined(__i386)
981
982 #define ARG_FROM 4
983 #define ARG_TO 8
984 #define ARG_COUNT 12
985
986 ENTRY(bcopy)
987 #ifdef DEBUG
988 movl ARG_COUNT(%esp), %eax
989 orl %eax, %eax
990 jz 1f
991 movl postbootkernelbase, %eax
992 cmpl %eax, ARG_FROM(%esp)
993 jb 0f
994 cmpl %eax, ARG_TO(%esp)
995 jnb 1f
996 0: pushl %ebp
997 movl %esp, %ebp
998 pushl $.bcopy_panic_msg
999 call panic
1000 1:
1001 #endif
1002 do_copy:
1003 movl %esi, %eax /* save registers */
1004 movl %edi, %edx
1005 movl ARG_COUNT(%esp), %ecx
1006 movl ARG_FROM(%esp), %esi
1007 movl ARG_TO(%esp), %edi
1008
1009 shrl $2, %ecx /* word count */
1010 rep
1011 smovl
1012 movl ARG_COUNT(%esp), %ecx
1013 andl $3, %ecx /* bytes left over */
1014 rep
1015 smovb
1016 movl %eax, %esi /* restore registers */
1017 movl %edx, %edi
1018 ret
1019 SET_SIZE(bcopy)
1020
1021 #undef ARG_COUNT
1022 #undef ARG_FROM
1023 #undef ARG_TO
1024
1025 #endif /* __i386 */
1026 #endif /* __lint */
1027
1028
1029 /*
1030 * Zero a block of storage, returning an error code if we
1031 * take a kernel pagefault which cannot be resolved.
1032 * Returns errno value on pagefault error, 0 if all ok
1033 */
1034
1035 #if defined(__lint)
1036
1037 /* ARGSUSED */
1038 int
1039 kzero(void *addr, size_t count)
1040 { return (0); }
1041
1042 #else /* __lint */
1043
1044 #if defined(__amd64)
1045
1046 ENTRY(kzero)
1047 #ifdef DEBUG
1048 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
1049 jnb 0f
1050 leaq .kzero_panic_msg(%rip), %rdi
1051 jmp call_panic /* setup stack and call panic */
1052 0:
1053 #endif
1054 /*
1055 * pass lofault value as 3rd argument for fault return
1056 */
1057 leaq _kzeroerr(%rip), %rdx
1058
1059 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
1060 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
1061 movq %rdx, T_LOFAULT(%r9) /* new lofault */
1062 call bzero_altentry
1063 xorl %eax, %eax
1064 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
1065 ret
1066 /*
1067 * A fault during bzero is indicated through an errno value
1068 * in %rax when we iretq to here.
1069 */
1070 _kzeroerr:
1071 addq $8, %rsp /* pop bzero_altentry call ret addr */
1072 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
1073 ret
1074 SET_SIZE(kzero)
1075
1076 #elif defined(__i386)
1077
1078 #define ARG_ADDR 8
1079 #define ARG_COUNT 12
1080
1081 ENTRY(kzero)
1082 #ifdef DEBUG
1083 pushl %ebp
1084 movl %esp, %ebp
1085 movl postbootkernelbase, %eax
1086 cmpl %eax, ARG_ADDR(%ebp)
1087 jnb 0f
1088 pushl $.kzero_panic_msg
1089 call panic
1090 0: popl %ebp
1091 #endif
1092 lea _kzeroerr, %eax /* kzeroerr is lofault value */
1093
1094 pushl %ebp /* save stack base */
1095 movl %esp, %ebp /* set new stack base */
1096 pushl %edi /* save %edi */
1097
1098 mov %gs:CPU_THREAD, %edx
1099 movl T_LOFAULT(%edx), %edi
1100 pushl %edi /* save the current lofault */
1101 movl %eax, T_LOFAULT(%edx) /* new lofault */
1102
1103 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */
1104 movl ARG_ADDR(%ebp), %edi /* %edi <- address of bytes to clear */
1105 shrl $2, %ecx /* Count of double words to zero */
1106 xorl %eax, %eax /* sstol val */
1107 rep
1108 sstol /* %ecx contains words to clear (%eax=0) */
1109
1110 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */
1111 andl $3, %ecx /* do mod 4 */
1112 rep
1113 sstob /* %ecx contains residual bytes to clear */
1114
1115 /*
1116 * A fault during kzero is indicated through an errno value
1117 * in %eax when we iret to here.
1118 */
1119 _kzeroerr:
1120 popl %edi
1121 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */
1122 popl %edi
1123 popl %ebp
1124 ret
1125 SET_SIZE(kzero)
1126
1127 #undef ARG_ADDR
1128 #undef ARG_COUNT
1129
1130 #endif /* __i386 */
1131 #endif /* __lint */
1132
1133 /*
1134 * Zero a block of storage.
1135 */
1136
1137 #if defined(__lint)
1138
1139 /* ARGSUSED */
1140 void
1141 bzero(void *addr, size_t count)
1142 {}
1143
1144 #else /* __lint */
1145
1146 #if defined(__amd64)
1147
1148 ENTRY(bzero)
1149 #ifdef DEBUG
1150 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
1151 jnb 0f
1152 leaq .bzero_panic_msg(%rip), %rdi
1153 jmp call_panic /* setup stack and call panic */
1154 0:
1155 #endif
1156 ALTENTRY(bzero_altentry)
1157 do_zero:
1158 #define L(s) .bzero/**/s
1159 xorl %eax, %eax
1160
1161 cmpq $0x50, %rsi /* 80 */
1162 jae L(ck_align)
1163
1164 /*
1165 * Performance data shows many caller's are zeroing small buffers. So
1166 * for best perf for these sizes unrolled code is used. Store zeros
1167 * without worrying about alignment.
1168 */
1169 leaq L(setPxQx)(%rip), %r10
1170 addq %rsi, %rdi
1171 movslq (%r10,%rsi,4), %rcx
1172 leaq (%rcx,%r10,1), %r10
1173 jmpq *%r10
1174
1175 .p2align 4
1176 L(setPxQx):
1177 .int L(P0Q0)-L(setPxQx) /* 0 */
1178 .int L(P1Q0)-L(setPxQx)
1179 .int L(P2Q0)-L(setPxQx)
1180 .int L(P3Q0)-L(setPxQx)
1181 .int L(P4Q0)-L(setPxQx)
1182 .int L(P5Q0)-L(setPxQx)
1183 .int L(P6Q0)-L(setPxQx)
1184 .int L(P7Q0)-L(setPxQx)
1185
1186 .int L(P0Q1)-L(setPxQx) /* 8 */
1187 .int L(P1Q1)-L(setPxQx)
1188 .int L(P2Q1)-L(setPxQx)
1189 .int L(P3Q1)-L(setPxQx)
1190 .int L(P4Q1)-L(setPxQx)
1191 .int L(P5Q1)-L(setPxQx)
1192 .int L(P6Q1)-L(setPxQx)
1193 .int L(P7Q1)-L(setPxQx)
1194
1195 .int L(P0Q2)-L(setPxQx) /* 16 */
1196 .int L(P1Q2)-L(setPxQx)
1197 .int L(P2Q2)-L(setPxQx)
1198 .int L(P3Q2)-L(setPxQx)
1199 .int L(P4Q2)-L(setPxQx)
1200 .int L(P5Q2)-L(setPxQx)
1201 .int L(P6Q2)-L(setPxQx)
1202 .int L(P7Q2)-L(setPxQx)
1203
1204 .int L(P0Q3)-L(setPxQx) /* 24 */
1205 .int L(P1Q3)-L(setPxQx)
1206 .int L(P2Q3)-L(setPxQx)
1207 .int L(P3Q3)-L(setPxQx)
1208 .int L(P4Q3)-L(setPxQx)
1209 .int L(P5Q3)-L(setPxQx)
1210 .int L(P6Q3)-L(setPxQx)
1211 .int L(P7Q3)-L(setPxQx)
1212
1213 .int L(P0Q4)-L(setPxQx) /* 32 */
1214 .int L(P1Q4)-L(setPxQx)
1215 .int L(P2Q4)-L(setPxQx)
1216 .int L(P3Q4)-L(setPxQx)
1217 .int L(P4Q4)-L(setPxQx)
1218 .int L(P5Q4)-L(setPxQx)
1219 .int L(P6Q4)-L(setPxQx)
1220 .int L(P7Q4)-L(setPxQx)
1221
1222 .int L(P0Q5)-L(setPxQx) /* 40 */
1223 .int L(P1Q5)-L(setPxQx)
1224 .int L(P2Q5)-L(setPxQx)
1225 .int L(P3Q5)-L(setPxQx)
1226 .int L(P4Q5)-L(setPxQx)
1227 .int L(P5Q5)-L(setPxQx)
1228 .int L(P6Q5)-L(setPxQx)
1229 .int L(P7Q5)-L(setPxQx)
1230
1231 .int L(P0Q6)-L(setPxQx) /* 48 */
1232 .int L(P1Q6)-L(setPxQx)
1233 .int L(P2Q6)-L(setPxQx)
1234 .int L(P3Q6)-L(setPxQx)
1235 .int L(P4Q6)-L(setPxQx)
1236 .int L(P5Q6)-L(setPxQx)
1237 .int L(P6Q6)-L(setPxQx)
1238 .int L(P7Q6)-L(setPxQx)
1239
1240 .int L(P0Q7)-L(setPxQx) /* 56 */
1241 .int L(P1Q7)-L(setPxQx)
1242 .int L(P2Q7)-L(setPxQx)
1243 .int L(P3Q7)-L(setPxQx)
1244 .int L(P4Q7)-L(setPxQx)
1245 .int L(P5Q7)-L(setPxQx)
1246 .int L(P6Q7)-L(setPxQx)
1247 .int L(P7Q7)-L(setPxQx)
1248
1249 .int L(P0Q8)-L(setPxQx) /* 64 */
1250 .int L(P1Q8)-L(setPxQx)
1251 .int L(P2Q8)-L(setPxQx)
1252 .int L(P3Q8)-L(setPxQx)
1253 .int L(P4Q8)-L(setPxQx)
1254 .int L(P5Q8)-L(setPxQx)
1255 .int L(P6Q8)-L(setPxQx)
1256 .int L(P7Q8)-L(setPxQx)
1257
1258 .int L(P0Q9)-L(setPxQx) /* 72 */
1259 .int L(P1Q9)-L(setPxQx)
1260 .int L(P2Q9)-L(setPxQx)
1261 .int L(P3Q9)-L(setPxQx)
1262 .int L(P4Q9)-L(setPxQx)
1263 .int L(P5Q9)-L(setPxQx)
1264 .int L(P6Q9)-L(setPxQx)
1265 .int L(P7Q9)-L(setPxQx) /* 79 */
1266
1267 .p2align 4
1268 L(P0Q9): mov %rax, -0x48(%rdi)
1269 L(P0Q8): mov %rax, -0x40(%rdi)
1270 L(P0Q7): mov %rax, -0x38(%rdi)
1271 L(P0Q6): mov %rax, -0x30(%rdi)
1272 L(P0Q5): mov %rax, -0x28(%rdi)
1273 L(P0Q4): mov %rax, -0x20(%rdi)
1274 L(P0Q3): mov %rax, -0x18(%rdi)
1275 L(P0Q2): mov %rax, -0x10(%rdi)
1276 L(P0Q1): mov %rax, -0x8(%rdi)
1277 L(P0Q0):
1278 ret
1279
1280 .p2align 4
1281 L(P1Q9): mov %rax, -0x49(%rdi)
1282 L(P1Q8): mov %rax, -0x41(%rdi)
1283 L(P1Q7): mov %rax, -0x39(%rdi)
1284 L(P1Q6): mov %rax, -0x31(%rdi)
1285 L(P1Q5): mov %rax, -0x29(%rdi)
1286 L(P1Q4): mov %rax, -0x21(%rdi)
1287 L(P1Q3): mov %rax, -0x19(%rdi)
1288 L(P1Q2): mov %rax, -0x11(%rdi)
1289 L(P1Q1): mov %rax, -0x9(%rdi)
1290 L(P1Q0): mov %al, -0x1(%rdi)
1291 ret
1292
1293 .p2align 4
1294 L(P2Q9): mov %rax, -0x4a(%rdi)
1295 L(P2Q8): mov %rax, -0x42(%rdi)
1296 L(P2Q7): mov %rax, -0x3a(%rdi)
1297 L(P2Q6): mov %rax, -0x32(%rdi)
1298 L(P2Q5): mov %rax, -0x2a(%rdi)
1299 L(P2Q4): mov %rax, -0x22(%rdi)
1300 L(P2Q3): mov %rax, -0x1a(%rdi)
1301 L(P2Q2): mov %rax, -0x12(%rdi)
1302 L(P2Q1): mov %rax, -0xa(%rdi)
1303 L(P2Q0): mov %ax, -0x2(%rdi)
1304 ret
1305
1306 .p2align 4
1307 L(P3Q9): mov %rax, -0x4b(%rdi)
1308 L(P3Q8): mov %rax, -0x43(%rdi)
1309 L(P3Q7): mov %rax, -0x3b(%rdi)
1310 L(P3Q6): mov %rax, -0x33(%rdi)
1311 L(P3Q5): mov %rax, -0x2b(%rdi)
1312 L(P3Q4): mov %rax, -0x23(%rdi)
1313 L(P3Q3): mov %rax, -0x1b(%rdi)
1314 L(P3Q2): mov %rax, -0x13(%rdi)
1315 L(P3Q1): mov %rax, -0xb(%rdi)
1316 L(P3Q0): mov %ax, -0x3(%rdi)
1317 mov %al, -0x1(%rdi)
1318 ret
1319
1320 .p2align 4
1321 L(P4Q9): mov %rax, -0x4c(%rdi)
1322 L(P4Q8): mov %rax, -0x44(%rdi)
1323 L(P4Q7): mov %rax, -0x3c(%rdi)
1324 L(P4Q6): mov %rax, -0x34(%rdi)
1325 L(P4Q5): mov %rax, -0x2c(%rdi)
1326 L(P4Q4): mov %rax, -0x24(%rdi)
1327 L(P4Q3): mov %rax, -0x1c(%rdi)
1328 L(P4Q2): mov %rax, -0x14(%rdi)
1329 L(P4Q1): mov %rax, -0xc(%rdi)
1330 L(P4Q0): mov %eax, -0x4(%rdi)
1331 ret
1332
1333 .p2align 4
1334 L(P5Q9): mov %rax, -0x4d(%rdi)
1335 L(P5Q8): mov %rax, -0x45(%rdi)
1336 L(P5Q7): mov %rax, -0x3d(%rdi)
1337 L(P5Q6): mov %rax, -0x35(%rdi)
1338 L(P5Q5): mov %rax, -0x2d(%rdi)
1339 L(P5Q4): mov %rax, -0x25(%rdi)
1340 L(P5Q3): mov %rax, -0x1d(%rdi)
1341 L(P5Q2): mov %rax, -0x15(%rdi)
1342 L(P5Q1): mov %rax, -0xd(%rdi)
1343 L(P5Q0): mov %eax, -0x5(%rdi)
1344 mov %al, -0x1(%rdi)
1345 ret
1346
1347 .p2align 4
1348 L(P6Q9): mov %rax, -0x4e(%rdi)
1349 L(P6Q8): mov %rax, -0x46(%rdi)
1350 L(P6Q7): mov %rax, -0x3e(%rdi)
1351 L(P6Q6): mov %rax, -0x36(%rdi)
1352 L(P6Q5): mov %rax, -0x2e(%rdi)
1353 L(P6Q4): mov %rax, -0x26(%rdi)
1354 L(P6Q3): mov %rax, -0x1e(%rdi)
1355 L(P6Q2): mov %rax, -0x16(%rdi)
1356 L(P6Q1): mov %rax, -0xe(%rdi)
1357 L(P6Q0): mov %eax, -0x6(%rdi)
1358 mov %ax, -0x2(%rdi)
1359 ret
1360
1361 .p2align 4
1362 L(P7Q9): mov %rax, -0x4f(%rdi)
1363 L(P7Q8): mov %rax, -0x47(%rdi)
1364 L(P7Q7): mov %rax, -0x3f(%rdi)
1365 L(P7Q6): mov %rax, -0x37(%rdi)
1366 L(P7Q5): mov %rax, -0x2f(%rdi)
1367 L(P7Q4): mov %rax, -0x27(%rdi)
1368 L(P7Q3): mov %rax, -0x1f(%rdi)
1369 L(P7Q2): mov %rax, -0x17(%rdi)
1370 L(P7Q1): mov %rax, -0xf(%rdi)
1371 L(P7Q0): mov %eax, -0x7(%rdi)
1372 mov %ax, -0x3(%rdi)
1373 mov %al, -0x1(%rdi)
1374 ret
1375
1376 /*
1377 * Align to a 16-byte boundary. Avoids penalties from unaligned stores
1378 * as well as from stores spanning cachelines. Note 16-byte alignment
1379 * is better in case where rep sstosq is used.
1380 */
1381 .p2align 4
1382 L(ck_align):
1383 test $0xf, %rdi
1384 jz L(aligned_now)
1385 test $1, %rdi
1386 jz 2f
1387 mov %al, (%rdi)
1388 dec %rsi
1389 lea 1(%rdi),%rdi
1390 2:
1391 test $2, %rdi
1392 jz 4f
1393 mov %ax, (%rdi)
1394 sub $2, %rsi
1395 lea 2(%rdi),%rdi
1396 4:
1397 test $4, %rdi
1398 jz 8f
1399 mov %eax, (%rdi)
1400 sub $4, %rsi
1401 lea 4(%rdi),%rdi
1402 8:
1403 test $8, %rdi
1404 jz L(aligned_now)
1405 mov %rax, (%rdi)
1406 sub $8, %rsi
1407 lea 8(%rdi),%rdi
1408
1409 /*
1410 * For large sizes rep sstoq is fastest.
1411 * Transition point determined experimentally as measured on
1412 * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
1413 */
1414 L(aligned_now):
1415 cmp $BZERO_USE_REP, %rsi
1416 ja L(use_rep)
1417
1418 /*
1419 * zero 64-bytes per loop
1420 */
1421 .p2align 4
1422 L(bzero_loop):
1423 leaq -0x40(%rsi), %rsi
1424 cmpq $0x40, %rsi
1425 movq %rax, (%rdi)
1426 movq %rax, 0x8(%rdi)
1427 movq %rax, 0x10(%rdi)
1428 movq %rax, 0x18(%rdi)
1429 movq %rax, 0x20(%rdi)
1430 movq %rax, 0x28(%rdi)
1431 movq %rax, 0x30(%rdi)
1432 movq %rax, 0x38(%rdi)
1433 leaq 0x40(%rdi), %rdi
1434 jae L(bzero_loop)
1435
1436 /*
1437 * Clear any remaining bytes..
1438 */
1439 9:
1440 leaq L(setPxQx)(%rip), %r10
1441 addq %rsi, %rdi
1442 movslq (%r10,%rsi,4), %rcx
1443 leaq (%rcx,%r10,1), %r10
1444 jmpq *%r10
1445
1446 /*
1447 * Use rep sstoq. Clear any remainder via unrolled code
1448 */
1449 .p2align 4
1450 L(use_rep):
1451 movq %rsi, %rcx /* get size in bytes */
1452 shrq $3, %rcx /* count of 8-byte words to zero */
1453 rep
1454 sstoq /* %rcx = words to clear (%rax=0) */
1455 andq $7, %rsi /* remaining bytes */
1456 jnz 9b
1457 ret
1458 #undef L
1459 SET_SIZE(bzero_altentry)
1460 SET_SIZE(bzero)
1461
1462 #elif defined(__i386)
1463
1464 #define ARG_ADDR 4
1465 #define ARG_COUNT 8
1466
1467 ENTRY(bzero)
1468 #ifdef DEBUG
1469 movl postbootkernelbase, %eax
1470 cmpl %eax, ARG_ADDR(%esp)
1471 jnb 0f
1472 pushl %ebp
1473 movl %esp, %ebp
1474 pushl $.bzero_panic_msg
1475 call panic
1476 0:
1477 #endif
1478 do_zero:
1479 movl %edi, %edx
1480 movl ARG_COUNT(%esp), %ecx
1481 movl ARG_ADDR(%esp), %edi
1482 shrl $2, %ecx
1483 xorl %eax, %eax
1484 rep
1485 sstol
1486 movl ARG_COUNT(%esp), %ecx
1487 andl $3, %ecx
1488 rep
1489 sstob
1490 movl %edx, %edi
1491 ret
1492 SET_SIZE(bzero)
1493
1494 #undef ARG_ADDR
1495 #undef ARG_COUNT
1496
1497 #endif /* __i386 */
1498 #endif /* __lint */
1499
1500 /*
1501 * Transfer data to and from user space -
1502 * Note that these routines can cause faults
1503 * It is assumed that the kernel has nothing at
1504 * less than KERNELBASE in the virtual address space.
1505 *
1506 * Note that copyin(9F) and copyout(9F) are part of the
1507 * DDI/DKI which specifies that they return '-1' on "errors."
1508 *
1509 * Sigh.
1510 *
1511 * So there's two extremely similar routines - xcopyin_nta() and
1512 * xcopyout_nta() which return the errno that we've faithfully computed.
1513 * This allows other callers (e.g. uiomove(9F)) to work correctly.
1514 * Given that these are used pretty heavily, we expand the calling
1515 * sequences inline for all flavours (rather than making wrappers).
1516 */
1517
1518 /*
1519 * Copy user data to kernel space.
1520 */
1521
1522 #if defined(__lint)
1523
1524 /* ARGSUSED */
1525 int
1526 copyin(const void *uaddr, void *kaddr, size_t count)
1527 { return (0); }
1528
1529 #else /* lint */
1530
1531 #if defined(__amd64)
1532
1533 ENTRY(copyin)
1534 pushq %rbp
1535 movq %rsp, %rbp
1536 subq $24, %rsp
1537
1538 /*
1539 * save args in case we trap and need to rerun as a copyop
1540 */
1541 movq %rdi, (%rsp)
1542 movq %rsi, 0x8(%rsp)
1543 movq %rdx, 0x10(%rsp)
1544
1545 movq kernelbase(%rip), %rax
1546 #ifdef DEBUG
1547 cmpq %rax, %rsi /* %rsi = kaddr */
1548 jnb 1f
1549 leaq .copyin_panic_msg(%rip), %rdi
1550 xorl %eax, %eax
1551 call panic
1552 1:
1553 #endif
1554 /*
1555 * pass lofault value as 4th argument to do_copy_fault
1556 */
1557 leaq _copyin_err(%rip), %rcx
1558
1559 movq %gs:CPU_THREAD, %r9
1560 cmpq %rax, %rdi /* test uaddr < kernelbase */
1561 jae 3f /* take copyop if uaddr > kernelbase */
1562 SMAP_DISABLE_INSTR(0)
1563 jmp do_copy_fault /* Takes care of leave for us */
1564
1565 _copyin_err:
1566 SMAP_ENABLE_INSTR(2)
1567 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1568 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1569 3:
1570 movq T_COPYOPS(%r9), %rax
1571 cmpq $0, %rax
1572 jz 2f
1573 /*
1574 * reload args for the copyop
1575 */
1576 movq (%rsp), %rdi
1577 movq 0x8(%rsp), %rsi
1578 movq 0x10(%rsp), %rdx
1579 leave
1580 jmp *CP_COPYIN(%rax)
1581
1582 2: movl $-1, %eax
1583 leave
1584 ret
1585 SET_SIZE(copyin)
1586
1587 #elif defined(__i386)
1588
1589 #define ARG_UADDR 4
1590 #define ARG_KADDR 8
1591
1592 ENTRY(copyin)
1593 movl kernelbase, %ecx
1594 #ifdef DEBUG
1595 cmpl %ecx, ARG_KADDR(%esp)
1596 jnb 1f
1597 pushl %ebp
1598 movl %esp, %ebp
1599 pushl $.copyin_panic_msg
1600 call panic
1601 1:
1602 #endif
1603 lea _copyin_err, %eax
1604
1605 movl %gs:CPU_THREAD, %edx
1606 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1607 jb do_copy_fault
1608 jmp 3f
1609
1610 _copyin_err:
1611 popl %ecx
1612 popl %edi
1613 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1614 popl %esi
1615 popl %ebp
1616 3:
1617 movl T_COPYOPS(%edx), %eax
1618 cmpl $0, %eax
1619 jz 2f
1620 jmp *CP_COPYIN(%eax)
1621
1622 2: movl $-1, %eax
1623 ret
1624 SET_SIZE(copyin)
1625
1626 #undef ARG_UADDR
1627 #undef ARG_KADDR
1628
1629 #endif /* __i386 */
1630 #endif /* __lint */
1631
1632 #if defined(__lint)
1633
1634 /* ARGSUSED */
1635 int
1636 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached)
1637 { return (0); }
1638
1639 #else /* __lint */
1640
1641 #if defined(__amd64)
1642
1643 ENTRY(xcopyin_nta)
1644 pushq %rbp
1645 movq %rsp, %rbp
1646 subq $24, %rsp
1647
1648 /*
1649 * save args in case we trap and need to rerun as a copyop
1650 * %rcx is consumed in this routine so we don't need to save
1651 * it.
1652 */
1653 movq %rdi, (%rsp)
1654 movq %rsi, 0x8(%rsp)
1655 movq %rdx, 0x10(%rsp)
1656
1657 movq kernelbase(%rip), %rax
1658 #ifdef DEBUG
1659 cmpq %rax, %rsi /* %rsi = kaddr */
1660 jnb 1f
1661 leaq .xcopyin_panic_msg(%rip), %rdi
1662 xorl %eax, %eax
1663 call panic
1664 1:
1665 #endif
1666 movq %gs:CPU_THREAD, %r9
1667 cmpq %rax, %rdi /* test uaddr < kernelbase */
1668 jae 4f
1669 cmpq $0, %rcx /* No non-temporal access? */
1670 /*
1671 * pass lofault value as 4th argument to do_copy_fault
1672 */
1673 leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */
1674 jnz 6f /* use regular access */
1675 /*
1676 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1677 */
1678 cmpq $XCOPY_MIN_SIZE, %rdx
1679 jae 5f
1680 6:
1681 SMAP_DISABLE_INSTR(1)
1682 jmp do_copy_fault
1683
1684 /*
1685 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1686 * count is COUNT_ALIGN_SIZE aligned.
1687 */
1688 5:
1689 movq %rdi, %r10
1690 orq %rsi, %r10
1691 andq $NTA_ALIGN_MASK, %r10
1692 orq %rdx, %r10
1693 andq $COUNT_ALIGN_MASK, %r10
1694 jnz 6b
1695 leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */
1696 SMAP_DISABLE_INSTR(2)
1697 jmp do_copy_fault_nta /* use non-temporal access */
1698
1699 4:
1700 movl $EFAULT, %eax
1701 jmp 3f
1702
1703 /*
1704 * A fault during do_copy_fault or do_copy_fault_nta is
1705 * indicated through an errno value in %rax and we iret from the
1706 * trap handler to here.
1707 */
1708 _xcopyin_err:
1709 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1710 _xcopyin_nta_err:
1711 SMAP_ENABLE_INSTR(3)
1712 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1713 3:
1714 movq T_COPYOPS(%r9), %r8
1715 cmpq $0, %r8
1716 jz 2f
1717
1718 /*
1719 * reload args for the copyop
1720 */
1721 movq (%rsp), %rdi
1722 movq 0x8(%rsp), %rsi
1723 movq 0x10(%rsp), %rdx
1724 leave
1725 jmp *CP_XCOPYIN(%r8)
1726
1727 2: leave
1728 ret
1729 SET_SIZE(xcopyin_nta)
1730
1731 #elif defined(__i386)
1732
1733 #define ARG_UADDR 4
1734 #define ARG_KADDR 8
1735 #define ARG_COUNT 12
1736 #define ARG_CACHED 16
1737
1738 .globl use_sse_copy
1739
1740 ENTRY(xcopyin_nta)
1741 movl kernelbase, %ecx
1742 lea _xcopyin_err, %eax
1743 movl %gs:CPU_THREAD, %edx
1744 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1745 jae 4f
1746
1747 cmpl $0, use_sse_copy /* no sse support */
1748 jz do_copy_fault
1749
1750 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */
1751 jnz do_copy_fault
1752
1753 /*
1754 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1755 */
1756 cmpl $XCOPY_MIN_SIZE, ARG_COUNT(%esp)
1757 jb do_copy_fault
1758
1759 /*
1760 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1761 * count is COUNT_ALIGN_SIZE aligned.
1762 */
1763 movl ARG_UADDR(%esp), %ecx
1764 orl ARG_KADDR(%esp), %ecx
1765 andl $NTA_ALIGN_MASK, %ecx
1766 orl ARG_COUNT(%esp), %ecx
1767 andl $COUNT_ALIGN_MASK, %ecx
1768 jnz do_copy_fault
1769
1770 jmp do_copy_fault_nta /* use regular access */
1771
1772 4:
1773 movl $EFAULT, %eax
1774 jmp 3f
1775
1776 /*
1777 * A fault during do_copy_fault or do_copy_fault_nta is
1778 * indicated through an errno value in %eax and we iret from the
1779 * trap handler to here.
1780 */
1781 _xcopyin_err:
1782 popl %ecx
1783 popl %edi
1784 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1785 popl %esi
1786 popl %ebp
1787 3:
1788 cmpl $0, T_COPYOPS(%edx)
1789 jz 2f
1790 movl T_COPYOPS(%edx), %eax
1791 jmp *CP_XCOPYIN(%eax)
1792
1793 2: rep; ret /* use 2 byte return instruction when branch target */
1794 /* AMD Software Optimization Guide - Section 6.2 */
1795 SET_SIZE(xcopyin_nta)
1796
1797 #undef ARG_UADDR
1798 #undef ARG_KADDR
1799 #undef ARG_COUNT
1800 #undef ARG_CACHED
1801
1802 #endif /* __i386 */
1803 #endif /* __lint */
1804
1805 /*
1806 * Copy kernel data to user space.
1807 */
1808
1809 #if defined(__lint)
1810
1811 /* ARGSUSED */
1812 int
1813 copyout(const void *kaddr, void *uaddr, size_t count)
1814 { return (0); }
1815
1816 #else /* __lint */
1817
1818 #if defined(__amd64)
1819
1820 ENTRY(copyout)
1821 pushq %rbp
1822 movq %rsp, %rbp
1823 subq $24, %rsp
1824
1825 /*
1826 * save args in case we trap and need to rerun as a copyop
1827 */
1828 movq %rdi, (%rsp)
1829 movq %rsi, 0x8(%rsp)
1830 movq %rdx, 0x10(%rsp)
1831
1832 movq kernelbase(%rip), %rax
1833 #ifdef DEBUG
1834 cmpq %rax, %rdi /* %rdi = kaddr */
1835 jnb 1f
1836 leaq .copyout_panic_msg(%rip), %rdi
1837 xorl %eax, %eax
1838 call panic
1839 1:
1840 #endif
1841 /*
1842 * pass lofault value as 4th argument to do_copy_fault
1843 */
1844 leaq _copyout_err(%rip), %rcx
1845
1846 movq %gs:CPU_THREAD, %r9
1847 cmpq %rax, %rsi /* test uaddr < kernelbase */
1848 jae 3f /* take copyop if uaddr > kernelbase */
1849 SMAP_DISABLE_INSTR(3)
1850 jmp do_copy_fault /* Calls leave for us */
1851
1852 _copyout_err:
1853 SMAP_ENABLE_INSTR(4)
1854 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1855 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1856 3:
1857 movq T_COPYOPS(%r9), %rax
1858 cmpq $0, %rax
1859 jz 2f
1860
1861 /*
1862 * reload args for the copyop
1863 */
1864 movq (%rsp), %rdi
1865 movq 0x8(%rsp), %rsi
1866 movq 0x10(%rsp), %rdx
1867 leave
1868 jmp *CP_COPYOUT(%rax)
1869
1870 2: movl $-1, %eax
1871 leave
1872 ret
1873 SET_SIZE(copyout)
1874
1875 #elif defined(__i386)
1876
1877 #define ARG_KADDR 4
1878 #define ARG_UADDR 8
1879
1880 ENTRY(copyout)
1881 movl kernelbase, %ecx
1882 #ifdef DEBUG
1883 cmpl %ecx, ARG_KADDR(%esp)
1884 jnb 1f
1885 pushl %ebp
1886 movl %esp, %ebp
1887 pushl $.copyout_panic_msg
1888 call panic
1889 1:
1890 #endif
1891 lea _copyout_err, %eax
1892 movl %gs:CPU_THREAD, %edx
1893 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1894 jb do_copy_fault
1895 jmp 3f
1896
1897 _copyout_err:
1898 popl %ecx
1899 popl %edi
1900 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1901 popl %esi
1902 popl %ebp
1903 3:
1904 movl T_COPYOPS(%edx), %eax
1905 cmpl $0, %eax
1906 jz 2f
1907 jmp *CP_COPYOUT(%eax)
1908
1909 2: movl $-1, %eax
1910 ret
1911 SET_SIZE(copyout)
1912
1913 #undef ARG_UADDR
1914 #undef ARG_KADDR
1915
1916 #endif /* __i386 */
1917 #endif /* __lint */
1918
1919 #if defined(__lint)
1920
1921 /* ARGSUSED */
1922 int
1923 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached)
1924 { return (0); }
1925
1926 #else /* __lint */
1927
1928 #if defined(__amd64)
1929
1930 ENTRY(xcopyout_nta)
1931 pushq %rbp
1932 movq %rsp, %rbp
1933 subq $24, %rsp
1934
1935 /*
1936 * save args in case we trap and need to rerun as a copyop
1937 */
1938 movq %rdi, (%rsp)
1939 movq %rsi, 0x8(%rsp)
1940 movq %rdx, 0x10(%rsp)
1941
1942 movq kernelbase(%rip), %rax
1943 #ifdef DEBUG
1944 cmpq %rax, %rdi /* %rdi = kaddr */
1945 jnb 1f
1946 leaq .xcopyout_panic_msg(%rip), %rdi
1947 xorl %eax, %eax
1948 call panic
1949 1:
1950 #endif
1951 movq %gs:CPU_THREAD, %r9
1952 cmpq %rax, %rsi /* test uaddr < kernelbase */
1953 jae 4f
1954
1955 cmpq $0, %rcx /* No non-temporal access? */
1956 /*
1957 * pass lofault value as 4th argument to do_copy_fault
1958 */
1959 leaq _xcopyout_err(%rip), %rcx
1960 jnz 6f
1961 /*
1962 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1963 */
1964 cmpq $XCOPY_MIN_SIZE, %rdx
1965 jae 5f
1966 6:
1967 SMAP_DISABLE_INSTR(4)
1968 jmp do_copy_fault
1969
1970 /*
1971 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1972 * count is COUNT_ALIGN_SIZE aligned.
1973 */
1974 5:
1975 movq %rdi, %r10
1976 orq %rsi, %r10
1977 andq $NTA_ALIGN_MASK, %r10
1978 orq %rdx, %r10
1979 andq $COUNT_ALIGN_MASK, %r10
1980 jnz 6b
1981 leaq _xcopyout_nta_err(%rip), %rcx
1982 SMAP_DISABLE_INSTR(5)
1983 call do_copy_fault_nta
1984 SMAP_ENABLE_INSTR(5)
1985 ret
1986
1987 4:
1988 movl $EFAULT, %eax
1989 jmp 3f
1990
1991 /*
1992 * A fault during do_copy_fault or do_copy_fault_nta is
1993 * indicated through an errno value in %rax and we iret from the
1994 * trap handler to here.
1995 */
1996 _xcopyout_err:
1997 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1998 _xcopyout_nta_err:
1999 SMAP_ENABLE_INSTR(6)
2000 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2001 3:
2002 movq T_COPYOPS(%r9), %r8
2003 cmpq $0, %r8
2004 jz 2f
2005
2006 /*
2007 * reload args for the copyop
2008 */
2009 movq (%rsp), %rdi
2010 movq 0x8(%rsp), %rsi
2011 movq 0x10(%rsp), %rdx
2012 leave
2013 jmp *CP_XCOPYOUT(%r8)
2014
2015 2: leave
2016 ret
2017 SET_SIZE(xcopyout_nta)
2018
2019 #elif defined(__i386)
2020
2021 #define ARG_KADDR 4
2022 #define ARG_UADDR 8
2023 #define ARG_COUNT 12
2024 #define ARG_CACHED 16
2025
2026 ENTRY(xcopyout_nta)
2027 movl kernelbase, %ecx
2028 lea _xcopyout_err, %eax
2029 movl %gs:CPU_THREAD, %edx
2030 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2031 jae 4f
2032
2033 cmpl $0, use_sse_copy /* no sse support */
2034 jz do_copy_fault
2035
2036 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */
2037 jnz do_copy_fault
2038
2039 /*
2040 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
2041 */
2042 cmpl $XCOPY_MIN_SIZE, %edx
2043 jb do_copy_fault
2044
2045 /*
2046 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2047 * count is COUNT_ALIGN_SIZE aligned.
2048 */
2049 movl ARG_UADDR(%esp), %ecx
2050 orl ARG_KADDR(%esp), %ecx
2051 andl $NTA_ALIGN_MASK, %ecx
2052 orl ARG_COUNT(%esp), %ecx
2053 andl $COUNT_ALIGN_MASK, %ecx
2054 jnz do_copy_fault
2055 jmp do_copy_fault_nta
2056
2057 4:
2058 movl $EFAULT, %eax
2059 jmp 3f
2060
2061 /*
2062 * A fault during do_copy_fault or do_copy_fault_nta is
2063 * indicated through an errno value in %eax and we iret from the
2064 * trap handler to here.
2065 */
2066 _xcopyout_err:
2067 / restore the original lofault
2068 popl %ecx
2069 popl %edi
2070 movl %ecx, T_LOFAULT(%edx) / original lofault
2071 popl %esi
2072 popl %ebp
2073 3:
2074 cmpl $0, T_COPYOPS(%edx)
2075 jz 2f
2076 movl T_COPYOPS(%edx), %eax
2077 jmp *CP_XCOPYOUT(%eax)
2078
2079 2: rep; ret /* use 2 byte return instruction when branch target */
2080 /* AMD Software Optimization Guide - Section 6.2 */
2081 SET_SIZE(xcopyout_nta)
2082
2083 #undef ARG_UADDR
2084 #undef ARG_KADDR
2085 #undef ARG_COUNT
2086 #undef ARG_CACHED
2087
2088 #endif /* __i386 */
2089 #endif /* __lint */
2090
2091 /*
2092 * Copy a null terminated string from one point to another in
2093 * the kernel address space.
2094 */
2095
2096 #if defined(__lint)
2097
2098 /* ARGSUSED */
2099 int
2100 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied)
2101 { return (0); }
2102
2103 #else /* __lint */
2104
2105 #if defined(__amd64)
2106
2107 ENTRY(copystr)
2108 pushq %rbp
2109 movq %rsp, %rbp
2110 #ifdef DEBUG
2111 movq kernelbase(%rip), %rax
2112 cmpq %rax, %rdi /* %rdi = from */
2113 jb 0f
2114 cmpq %rax, %rsi /* %rsi = to */
2115 jnb 1f
2116 0: leaq .copystr_panic_msg(%rip), %rdi
2117 xorl %eax, %eax
2118 call panic
2119 1:
2120 #endif
2121 movq %gs:CPU_THREAD, %r9
2122 movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */
2123 /* 5th argument to do_copystr */
2124 xorl %r10d,%r10d /* pass smap restore need in %r10d */
2125 /* as a non-ABI 6th arg */
2126 do_copystr:
2127 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
2128 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
2129 movq %r8, T_LOFAULT(%r9) /* new lofault */
2130
2131 movq %rdx, %r8 /* save maxlength */
2132
2133 cmpq $0, %rdx /* %rdx = maxlength */
2134 je copystr_enametoolong /* maxlength == 0 */
2135
2136 copystr_loop:
2137 decq %r8
2138 movb (%rdi), %al
2139 incq %rdi
2140 movb %al, (%rsi)
2141 incq %rsi
2142 cmpb $0, %al
2143 je copystr_null /* null char */
2144 cmpq $0, %r8
2145 jne copystr_loop
2146
2147 copystr_enametoolong:
2148 movl $ENAMETOOLONG, %eax
2149 jmp copystr_out
2150
2151 copystr_null:
2152 xorl %eax, %eax /* no error */
2153
2154 copystr_out:
2155 cmpq $0, %rcx /* want length? */
2156 je copystr_smap /* no */
2157 subq %r8, %rdx /* compute length and store it */
2158 movq %rdx, (%rcx)
2159
2160 copystr_smap:
2161 cmpl $0, %r10d
2162 jz copystr_done
2163 SMAP_ENABLE_INSTR(7)
2164
2165 copystr_done:
2166 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2167 leave
2168 ret
2169 SET_SIZE(copystr)
2170
2171 #elif defined(__i386)
2172
2173 #define ARG_FROM 8
2174 #define ARG_TO 12
2175 #define ARG_MAXLEN 16
2176 #define ARG_LENCOPIED 20
2177
2178 ENTRY(copystr)
2179 #ifdef DEBUG
2180 pushl %ebp
2181 movl %esp, %ebp
2182 movl kernelbase, %eax
2183 cmpl %eax, ARG_FROM(%esp)
2184 jb 0f
2185 cmpl %eax, ARG_TO(%esp)
2186 jnb 1f
2187 0: pushl $.copystr_panic_msg
2188 call panic
2189 1: popl %ebp
2190 #endif
2191 /* get the current lofault address */
2192 movl %gs:CPU_THREAD, %eax
2193 movl T_LOFAULT(%eax), %eax
2194 do_copystr:
2195 pushl %ebp /* setup stack frame */
2196 movl %esp, %ebp
2197 pushl %ebx /* save registers */
2198 pushl %edi
2199
2200 movl %gs:CPU_THREAD, %ebx
2201 movl T_LOFAULT(%ebx), %edi
2202 pushl %edi /* save the current lofault */
2203 movl %eax, T_LOFAULT(%ebx) /* new lofault */
2204
2205 movl ARG_MAXLEN(%ebp), %ecx
2206 cmpl $0, %ecx
2207 je copystr_enametoolong /* maxlength == 0 */
2208
2209 movl ARG_FROM(%ebp), %ebx /* source address */
2210 movl ARG_TO(%ebp), %edx /* destination address */
2211
2212 copystr_loop:
2213 decl %ecx
2214 movb (%ebx), %al
2215 incl %ebx
2216 movb %al, (%edx)
2217 incl %edx
2218 cmpb $0, %al
2219 je copystr_null /* null char */
2220 cmpl $0, %ecx
2221 jne copystr_loop
2222
2223 copystr_enametoolong:
2224 movl $ENAMETOOLONG, %eax
2225 jmp copystr_out
2226
2227 copystr_null:
2228 xorl %eax, %eax /* no error */
2229
2230 copystr_out:
2231 cmpl $0, ARG_LENCOPIED(%ebp) /* want length? */
2232 je copystr_done /* no */
2233 movl ARG_MAXLEN(%ebp), %edx
2234 subl %ecx, %edx /* compute length and store it */
2235 movl ARG_LENCOPIED(%ebp), %ecx
2236 movl %edx, (%ecx)
2237
2238 copystr_done:
2239 popl %edi
2240 movl %gs:CPU_THREAD, %ebx
2241 movl %edi, T_LOFAULT(%ebx) /* restore the original lofault */
2242
2243 popl %edi
2244 popl %ebx
2245 popl %ebp
2246 ret
2247 SET_SIZE(copystr)
2248
2249 #undef ARG_FROM
2250 #undef ARG_TO
2251 #undef ARG_MAXLEN
2252 #undef ARG_LENCOPIED
2253
2254 #endif /* __i386 */
2255 #endif /* __lint */
2256
2257 /*
2258 * Copy a null terminated string from the user address space into
2259 * the kernel address space.
2260 */
2261
2262 #if defined(__lint)
2263
2264 /* ARGSUSED */
2265 int
2266 copyinstr(const char *uaddr, char *kaddr, size_t maxlength,
2267 size_t *lencopied)
2268 { return (0); }
2269
2270 #else /* __lint */
2271
2272 #if defined(__amd64)
2273
2274 ENTRY(copyinstr)
2275 pushq %rbp
2276 movq %rsp, %rbp
2277 subq $32, %rsp
2278
2279 /*
2280 * save args in case we trap and need to rerun as a copyop
2281 */
2282 movq %rdi, (%rsp)
2283 movq %rsi, 0x8(%rsp)
2284 movq %rdx, 0x10(%rsp)
2285 movq %rcx, 0x18(%rsp)
2286
2287 movq kernelbase(%rip), %rax
2288 #ifdef DEBUG
2289 cmpq %rax, %rsi /* %rsi = kaddr */
2290 jnb 1f
2291 leaq .copyinstr_panic_msg(%rip), %rdi
2292 xorl %eax, %eax
2293 call panic
2294 1:
2295 #endif
2296 /*
2297 * pass lofault value as 5th argument to do_copystr
2298 * do_copystr expects whether or not we need smap in %r10d
2299 */
2300 leaq _copyinstr_error(%rip), %r8
2301 movl $1, %r10d
2302
2303 cmpq %rax, %rdi /* test uaddr < kernelbase */
2304 jae 4f
2305 SMAP_DISABLE_INSTR(6)
2306 jmp do_copystr
2307 4:
2308 movq %gs:CPU_THREAD, %r9
2309 jmp 3f
2310
2311 _copyinstr_error:
2312 SMAP_ENABLE_INSTR(8)
2313 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2314 3:
2315 movq T_COPYOPS(%r9), %rax
2316 cmpq $0, %rax
2317 jz 2f
2318
2319 /*
2320 * reload args for the copyop
2321 */
2322 movq (%rsp), %rdi
2323 movq 0x8(%rsp), %rsi
2324 movq 0x10(%rsp), %rdx
2325 movq 0x18(%rsp), %rcx
2326 leave
2327 jmp *CP_COPYINSTR(%rax)
2328
2329 2: movl $EFAULT, %eax /* return EFAULT */
2330 leave
2331 ret
2332 SET_SIZE(copyinstr)
2333
2334 #elif defined(__i386)
2335
2336 #define ARG_UADDR 4
2337 #define ARG_KADDR 8
2338
2339 ENTRY(copyinstr)
2340 movl kernelbase, %ecx
2341 #ifdef DEBUG
2342 cmpl %ecx, ARG_KADDR(%esp)
2343 jnb 1f
2344 pushl %ebp
2345 movl %esp, %ebp
2346 pushl $.copyinstr_panic_msg
2347 call panic
2348 1:
2349 #endif
2350 lea _copyinstr_error, %eax
2351 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2352 jb do_copystr
2353 movl %gs:CPU_THREAD, %edx
2354 jmp 3f
2355
2356 _copyinstr_error:
2357 popl %edi
2358 movl %gs:CPU_THREAD, %edx
2359 movl %edi, T_LOFAULT(%edx) /* original lofault */
2360
2361 popl %edi
2362 popl %ebx
2363 popl %ebp
2364 3:
2365 movl T_COPYOPS(%edx), %eax
2366 cmpl $0, %eax
2367 jz 2f
2368 jmp *CP_COPYINSTR(%eax)
2369
2370 2: movl $EFAULT, %eax /* return EFAULT */
2371 ret
2372 SET_SIZE(copyinstr)
2373
2374 #undef ARG_UADDR
2375 #undef ARG_KADDR
2376
2377 #endif /* __i386 */
2378 #endif /* __lint */
2379
2380 /*
2381 * Copy a null terminated string from the kernel
2382 * address space to the user address space.
2383 */
2384
2385 #if defined(__lint)
2386
2387 /* ARGSUSED */
2388 int
2389 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength,
2390 size_t *lencopied)
2391 { return (0); }
2392
2393 #else /* __lint */
2394
2395 #if defined(__amd64)
2396
2397 ENTRY(copyoutstr)
2398 pushq %rbp
2399 movq %rsp, %rbp
2400 subq $32, %rsp
2401
2402 /*
2403 * save args in case we trap and need to rerun as a copyop
2404 */
2405 movq %rdi, (%rsp)
2406 movq %rsi, 0x8(%rsp)
2407 movq %rdx, 0x10(%rsp)
2408 movq %rcx, 0x18(%rsp)
2409
2410 movq kernelbase(%rip), %rax
2411 #ifdef DEBUG
2412 cmpq %rax, %rdi /* %rdi = kaddr */
2413 jnb 1f
2414 leaq .copyoutstr_panic_msg(%rip), %rdi
2415 jmp call_panic /* setup stack and call panic */
2416 1:
2417 #endif
2418 /*
2419 * pass lofault value as 5th argument to do_copystr
2420 * pass one as 6th argument to do_copystr in %r10d
2421 */
2422 leaq _copyoutstr_error(%rip), %r8
2423 movl $1, %r10d
2424
2425 cmpq %rax, %rsi /* test uaddr < kernelbase */
2426 jae 4f
2427 SMAP_DISABLE_INSTR(7)
2428 jmp do_copystr
2429 4:
2430 movq %gs:CPU_THREAD, %r9
2431 jmp 3f
2432
2433 _copyoutstr_error:
2434 SMAP_ENABLE_INSTR(9)
2435 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2436 3:
2437 movq T_COPYOPS(%r9), %rax
2438 cmpq $0, %rax
2439 jz 2f
2440
2441 /*
2442 * reload args for the copyop
2443 */
2444 movq (%rsp), %rdi
2445 movq 0x8(%rsp), %rsi
2446 movq 0x10(%rsp), %rdx
2447 movq 0x18(%rsp), %rcx
2448 leave
2449 jmp *CP_COPYOUTSTR(%rax)
2450
2451 2: movl $EFAULT, %eax /* return EFAULT */
2452 leave
2453 ret
2454 SET_SIZE(copyoutstr)
2455
2456 #elif defined(__i386)
2457
2458 #define ARG_KADDR 4
2459 #define ARG_UADDR 8
2460
2461 ENTRY(copyoutstr)
2462 movl kernelbase, %ecx
2463 #ifdef DEBUG
2464 cmpl %ecx, ARG_KADDR(%esp)
2465 jnb 1f
2466 pushl %ebp
2467 movl %esp, %ebp
2468 pushl $.copyoutstr_panic_msg
2469 call panic
2470 1:
2471 #endif
2472 lea _copyoutstr_error, %eax
2473 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2474 jb do_copystr
2475 movl %gs:CPU_THREAD, %edx
2476 jmp 3f
2477
2478 _copyoutstr_error:
2479 popl %edi
2480 movl %gs:CPU_THREAD, %edx
2481 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */
2482
2483 popl %edi
2484 popl %ebx
2485 popl %ebp
2486 3:
2487 movl T_COPYOPS(%edx), %eax
2488 cmpl $0, %eax
2489 jz 2f
2490 jmp *CP_COPYOUTSTR(%eax)
2491
2492 2: movl $EFAULT, %eax /* return EFAULT */
2493 ret
2494 SET_SIZE(copyoutstr)
2495
2496 #undef ARG_KADDR
2497 #undef ARG_UADDR
2498
2499 #endif /* __i386 */
2500 #endif /* __lint */
2501
2502 /*
2503 * Since all of the fuword() variants are so similar, we have a macro to spit
2504 * them out. This allows us to create DTrace-unobservable functions easily.
2505 */
2506
2507 #if defined(__lint)
2508
2509 #if defined(__amd64)
2510
2511 /* ARGSUSED */
2512 int
2513 fuword64(const void *addr, uint64_t *dst)
2514 { return (0); }
2515
2516 #endif
2517
2518 /* ARGSUSED */
2519 int
2520 fuword32(const void *addr, uint32_t *dst)
2521 { return (0); }
2522
2523 /* ARGSUSED */
2524 int
2525 fuword16(const void *addr, uint16_t *dst)
2526 { return (0); }
2527
2528 /* ARGSUSED */
2529 int
2530 fuword8(const void *addr, uint8_t *dst)
2531 { return (0); }
2532
2533 #else /* __lint */
2534
2535 #if defined(__amd64)
2536
2537 /*
2538 * Note that we don't save and reload the arguments here
2539 * because their values are not altered in the copy path.
2540 * Additionally, when successful, the smap_enable jmp will
2541 * actually return us to our original caller.
2542 */
2543
2544 #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2545 ENTRY(NAME) \
2546 movq %gs:CPU_THREAD, %r9; \
2547 cmpq kernelbase(%rip), %rdi; \
2548 jae 1f; \
2549 leaq _flt_/**/NAME, %rdx; \
2550 movq %rdx, T_LOFAULT(%r9); \
2551 SMAP_DISABLE_INSTR(DISNUM) \
2552 INSTR (%rdi), REG; \
2553 movq $0, T_LOFAULT(%r9); \
2554 INSTR REG, (%rsi); \
2555 xorl %eax, %eax; \
2556 SMAP_ENABLE_INSTR(EN1) \
2557 ret; \
2558 _flt_/**/NAME: \
2559 SMAP_ENABLE_INSTR(EN2) \
2560 movq $0, T_LOFAULT(%r9); \
2561 1: \
2562 movq T_COPYOPS(%r9), %rax; \
2563 cmpq $0, %rax; \
2564 jz 2f; \
2565 jmp *COPYOP(%rax); \
2566 2: \
2567 movl $-1, %eax; \
2568 ret; \
2569 SET_SIZE(NAME)
2570
2571 FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2572 FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2573 FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2574 FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2575
2576 #elif defined(__i386)
2577
2578 #define FUWORD(NAME, INSTR, REG, COPYOP) \
2579 ENTRY(NAME) \
2580 movl %gs:CPU_THREAD, %ecx; \
2581 movl kernelbase, %eax; \
2582 cmpl %eax, 4(%esp); \
2583 jae 1f; \
2584 lea _flt_/**/NAME, %edx; \
2585 movl %edx, T_LOFAULT(%ecx); \
2586 movl 4(%esp), %eax; \
2587 movl 8(%esp), %edx; \
2588 INSTR (%eax), REG; \
2589 movl $0, T_LOFAULT(%ecx); \
2590 INSTR REG, (%edx); \
2591 xorl %eax, %eax; \
2592 ret; \
2593 _flt_/**/NAME: \
2594 movl $0, T_LOFAULT(%ecx); \
2595 1: \
2596 movl T_COPYOPS(%ecx), %eax; \
2597 cmpl $0, %eax; \
2598 jz 2f; \
2599 jmp *COPYOP(%eax); \
2600 2: \
2601 movl $-1, %eax; \
2602 ret; \
2603 SET_SIZE(NAME)
2604
2605 FUWORD(fuword32, movl, %eax, CP_FUWORD32)
2606 FUWORD(fuword16, movw, %ax, CP_FUWORD16)
2607 FUWORD(fuword8, movb, %al, CP_FUWORD8)
2608
2609 #endif /* __i386 */
2610
2611 #undef FUWORD
2612
2613 #endif /* __lint */
2614
2615 /*
2616 * Set user word.
2617 */
2618
2619 #if defined(__lint)
2620
2621 #if defined(__amd64)
2622
2623 /* ARGSUSED */
2624 int
2625 suword64(void *addr, uint64_t value)
2626 { return (0); }
2627
2628 #endif
2629
2630 /* ARGSUSED */
2631 int
2632 suword32(void *addr, uint32_t value)
2633 { return (0); }
2634
2635 /* ARGSUSED */
2636 int
2637 suword16(void *addr, uint16_t value)
2638 { return (0); }
2639
2640 /* ARGSUSED */
2641 int
2642 suword8(void *addr, uint8_t value)
2643 { return (0); }
2644
2645 #else /* lint */
2646
2647 #if defined(__amd64)
2648
2649 /*
2650 * Note that we don't save and reload the arguments here
2651 * because their values are not altered in the copy path.
2652 */
2653
2654 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2655 ENTRY(NAME) \
2656 movq %gs:CPU_THREAD, %r9; \
2657 cmpq kernelbase(%rip), %rdi; \
2658 jae 1f; \
2659 leaq _flt_/**/NAME, %rdx; \
2660 SMAP_DISABLE_INSTR(DISNUM) \
2661 movq %rdx, T_LOFAULT(%r9); \
2662 INSTR REG, (%rdi); \
2663 movq $0, T_LOFAULT(%r9); \
2664 xorl %eax, %eax; \
2665 SMAP_ENABLE_INSTR(EN1) \
2666 ret; \
2667 _flt_/**/NAME: \
2668 SMAP_ENABLE_INSTR(EN2) \
2669 movq $0, T_LOFAULT(%r9); \
2670 1: \
2671 movq T_COPYOPS(%r9), %rax; \
2672 cmpq $0, %rax; \
2673 jz 3f; \
2674 jmp *COPYOP(%rax); \
2675 3: \
2676 movl $-1, %eax; \
2677 ret; \
2678 SET_SIZE(NAME)
2679
2680 SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2681 SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2682 SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2683 SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2684
2685 #elif defined(__i386)
2686
2687 #define SUWORD(NAME, INSTR, REG, COPYOP) \
2688 ENTRY(NAME) \
2689 movl %gs:CPU_THREAD, %ecx; \
2690 movl kernelbase, %eax; \
2691 cmpl %eax, 4(%esp); \
2692 jae 1f; \
2693 lea _flt_/**/NAME, %edx; \
2694 movl %edx, T_LOFAULT(%ecx); \
2695 movl 4(%esp), %eax; \
2696 movl 8(%esp), %edx; \
2697 INSTR REG, (%eax); \
2698 movl $0, T_LOFAULT(%ecx); \
2699 xorl %eax, %eax; \
2700 ret; \
2701 _flt_/**/NAME: \
2702 movl $0, T_LOFAULT(%ecx); \
2703 1: \
2704 movl T_COPYOPS(%ecx), %eax; \
2705 cmpl $0, %eax; \
2706 jz 3f; \
2707 movl COPYOP(%eax), %ecx; \
2708 jmp *%ecx; \
2709 3: \
2710 movl $-1, %eax; \
2711 ret; \
2712 SET_SIZE(NAME)
2713
2714 SUWORD(suword32, movl, %edx, CP_SUWORD32)
2715 SUWORD(suword16, movw, %dx, CP_SUWORD16)
2716 SUWORD(suword8, movb, %dl, CP_SUWORD8)
2717
2718 #endif /* __i386 */
2719
2720 #undef SUWORD
2721
2722 #endif /* __lint */
2723
2724 #if defined(__lint)
2725
2726 #if defined(__amd64)
2727
2728 /*ARGSUSED*/
2729 void
2730 fuword64_noerr(const void *addr, uint64_t *dst)
2731 {}
2732
2733 #endif
2734
2735 /*ARGSUSED*/
2736 void
2737 fuword32_noerr(const void *addr, uint32_t *dst)
2738 {}
2739
2740 /*ARGSUSED*/
2741 void
2742 fuword8_noerr(const void *addr, uint8_t *dst)
2743 {}
2744
2745 /*ARGSUSED*/
2746 void
2747 fuword16_noerr(const void *addr, uint16_t *dst)
2748 {}
2749
2750 #else /* __lint */
2751
2752 #if defined(__amd64)
2753
2754 #define FUWORD_NOERR(NAME, INSTR, REG) \
2755 ENTRY(NAME) \
2756 cmpq kernelbase(%rip), %rdi; \
2757 cmovnbq kernelbase(%rip), %rdi; \
2758 INSTR (%rdi), REG; \
2759 INSTR REG, (%rsi); \
2760 ret; \
2761 SET_SIZE(NAME)
2762
2763 FUWORD_NOERR(fuword64_noerr, movq, %rax)
2764 FUWORD_NOERR(fuword32_noerr, movl, %eax)
2765 FUWORD_NOERR(fuword16_noerr, movw, %ax)
2766 FUWORD_NOERR(fuword8_noerr, movb, %al)
2767
2768 #elif defined(__i386)
2769
2770 #define FUWORD_NOERR(NAME, INSTR, REG) \
2771 ENTRY(NAME) \
2772 movl 4(%esp), %eax; \
2773 cmpl kernelbase, %eax; \
2774 jb 1f; \
2775 movl kernelbase, %eax; \
2776 1: movl 8(%esp), %edx; \
2777 INSTR (%eax), REG; \
2778 INSTR REG, (%edx); \
2779 ret; \
2780 SET_SIZE(NAME)
2781
2782 FUWORD_NOERR(fuword32_noerr, movl, %ecx)
2783 FUWORD_NOERR(fuword16_noerr, movw, %cx)
2784 FUWORD_NOERR(fuword8_noerr, movb, %cl)
2785
2786 #endif /* __i386 */
2787
2788 #undef FUWORD_NOERR
2789
2790 #endif /* __lint */
2791
2792 #if defined(__lint)
2793
2794 #if defined(__amd64)
2795
2796 /*ARGSUSED*/
2797 void
2798 suword64_noerr(void *addr, uint64_t value)
2799 {}
2800
2801 #endif
2802
2803 /*ARGSUSED*/
2804 void
2805 suword32_noerr(void *addr, uint32_t value)
2806 {}
2807
2808 /*ARGSUSED*/
2809 void
2810 suword16_noerr(void *addr, uint16_t value)
2811 {}
2812
2813 /*ARGSUSED*/
2814 void
2815 suword8_noerr(void *addr, uint8_t value)
2816 {}
2817
2818 #else /* lint */
2819
2820 #if defined(__amd64)
2821
2822 #define SUWORD_NOERR(NAME, INSTR, REG) \
2823 ENTRY(NAME) \
2824 cmpq kernelbase(%rip), %rdi; \
2825 cmovnbq kernelbase(%rip), %rdi; \
2826 INSTR REG, (%rdi); \
2827 ret; \
2828 SET_SIZE(NAME)
2829
2830 SUWORD_NOERR(suword64_noerr, movq, %rsi)
2831 SUWORD_NOERR(suword32_noerr, movl, %esi)
2832 SUWORD_NOERR(suword16_noerr, movw, %si)
2833 SUWORD_NOERR(suword8_noerr, movb, %sil)
2834
2835 #elif defined(__i386)
2836
2837 #define SUWORD_NOERR(NAME, INSTR, REG) \
2838 ENTRY(NAME) \
2839 movl 4(%esp), %eax; \
2840 cmpl kernelbase, %eax; \
2841 jb 1f; \
2842 movl kernelbase, %eax; \
2843 1: \
2844 movl 8(%esp), %edx; \
2845 INSTR REG, (%eax); \
2846 ret; \
2847 SET_SIZE(NAME)
2848
2849 SUWORD_NOERR(suword32_noerr, movl, %edx)
2850 SUWORD_NOERR(suword16_noerr, movw, %dx)
2851 SUWORD_NOERR(suword8_noerr, movb, %dl)
2852
2853 #endif /* __i386 */
2854
2855 #undef SUWORD_NOERR
2856
2857 #endif /* lint */
2858
2859
2860 #if defined(__lint)
2861
2862 /*ARGSUSED*/
2863 int
2864 subyte(void *addr, uchar_t value)
2865 { return (0); }
2866
2867 /*ARGSUSED*/
2868 void
2869 subyte_noerr(void *addr, uchar_t value)
2870 {}
2871
2872 /*ARGSUSED*/
2873 int
2874 fulword(const void *addr, ulong_t *valuep)
2875 { return (0); }
2876
2877 /*ARGSUSED*/
2878 void
2879 fulword_noerr(const void *addr, ulong_t *valuep)
2880 {}
2881
2882 /*ARGSUSED*/
2883 int
2884 sulword(void *addr, ulong_t valuep)
2885 { return (0); }
2886
2887 /*ARGSUSED*/
2888 void
2889 sulword_noerr(void *addr, ulong_t valuep)
2890 {}
2891
2892 #else
2893
2894 .weak subyte
2895 subyte=suword8
2896 .weak subyte_noerr
2897 subyte_noerr=suword8_noerr
2898
2899 #if defined(__amd64)
2900
2901 .weak fulword
2902 fulword=fuword64
2903 .weak fulword_noerr
2904 fulword_noerr=fuword64_noerr
2905 .weak sulword
2906 sulword=suword64
2907 .weak sulword_noerr
2908 sulword_noerr=suword64_noerr
2909
2910 #elif defined(__i386)
2911
2912 .weak fulword
2913 fulword=fuword32
2914 .weak fulword_noerr
2915 fulword_noerr=fuword32_noerr
2916 .weak sulword
2917 sulword=suword32
2918 .weak sulword_noerr
2919 sulword_noerr=suword32_noerr
2920
2921 #endif /* __i386 */
2922
2923 #endif /* __lint */
2924
2925 #if defined(__lint)
2926
2927 /*
2928 * Copy a block of storage - must not overlap (from + len <= to).
2929 * No fault handler installed (to be called under on_fault())
2930 */
2931
2932 /* ARGSUSED */
2933 void
2934 copyout_noerr(const void *kfrom, void *uto, size_t count)
2935 {}
2936
2937 /* ARGSUSED */
2938 void
2939 copyin_noerr(const void *ufrom, void *kto, size_t count)
2940 {}
2941
2942 /*
2943 * Zero a block of storage in user space
2944 */
2945
2946 /* ARGSUSED */
2947 void
2948 uzero(void *addr, size_t count)
2949 {}
2950
2951 /*
2952 * copy a block of storage in user space
2953 */
2954
2955 /* ARGSUSED */
2956 void
2957 ucopy(const void *ufrom, void *uto, size_t ulength)
2958 {}
2959
2960 /*
2961 * copy a string in user space
2962 */
2963
2964 /* ARGSUSED */
2965 void
2966 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied)
2967 {}
2968
2969 #else /* __lint */
2970
2971 #if defined(__amd64)
2972
2973 ENTRY(copyin_noerr)
2974 movq kernelbase(%rip), %rax
2975 #ifdef DEBUG
2976 cmpq %rax, %rsi /* %rsi = kto */
2977 jae 1f
2978 leaq .cpyin_ne_pmsg(%rip), %rdi
2979 jmp call_panic /* setup stack and call panic */
2980 1:
2981 #endif
2982 cmpq %rax, %rdi /* ufrom < kernelbase */
2983 jb do_copy
2984 movq %rax, %rdi /* force fault at kernelbase */
2985 jmp do_copy
2986 SET_SIZE(copyin_noerr)
2987
2988 ENTRY(copyout_noerr)
2989 movq kernelbase(%rip), %rax
2990 #ifdef DEBUG
2991 cmpq %rax, %rdi /* %rdi = kfrom */
2992 jae 1f
2993 leaq .cpyout_ne_pmsg(%rip), %rdi
2994 jmp call_panic /* setup stack and call panic */
2995 1:
2996 #endif
2997 cmpq %rax, %rsi /* uto < kernelbase */
2998 jb do_copy
2999 movq %rax, %rsi /* force fault at kernelbase */
3000 jmp do_copy
3001 SET_SIZE(copyout_noerr)
3002
3003 ENTRY(uzero)
3004 movq kernelbase(%rip), %rax
3005 cmpq %rax, %rdi
3006 jb do_zero
3007 movq %rax, %rdi /* force fault at kernelbase */
3008 jmp do_zero
3009 SET_SIZE(uzero)
3010
3011 ENTRY(ucopy)
3012 movq kernelbase(%rip), %rax
3013 cmpq %rax, %rdi
3014 cmovaeq %rax, %rdi /* force fault at kernelbase */
3015 cmpq %rax, %rsi
3016 cmovaeq %rax, %rsi /* force fault at kernelbase */
3017 jmp do_copy
3018 SET_SIZE(ucopy)
3019
3020 /*
3021 * Note, the frame pointer is required here becuase do_copystr expects
3022 * to be able to pop it off!
3023 */
3024 ENTRY(ucopystr)
3025 pushq %rbp
3026 movq %rsp, %rbp
3027 movq kernelbase(%rip), %rax
3028 cmpq %rax, %rdi
3029 cmovaeq %rax, %rdi /* force fault at kernelbase */
3030 cmpq %rax, %rsi
3031 cmovaeq %rax, %rsi /* force fault at kernelbase */
3032 /* do_copystr expects lofault address in %r8 */
3033 /* do_copystr expects whether or not we need smap in %r10 */
3034 xorl %r10d, %r10d
3035 movq %gs:CPU_THREAD, %r8
3036 movq T_LOFAULT(%r8), %r8
3037 jmp do_copystr
3038 SET_SIZE(ucopystr)
3039
3040 #elif defined(__i386)
3041
3042 ENTRY(copyin_noerr)
3043 movl kernelbase, %eax
3044 #ifdef DEBUG
3045 cmpl %eax, 8(%esp)
3046 jae 1f
3047 pushl $.cpyin_ne_pmsg
3048 call panic
3049 1:
3050 #endif
3051 cmpl %eax, 4(%esp)
3052 jb do_copy
3053 movl %eax, 4(%esp) /* force fault at kernelbase */
3054 jmp do_copy
3055 SET_SIZE(copyin_noerr)
3056
3057 ENTRY(copyout_noerr)
3058 movl kernelbase, %eax
3059 #ifdef DEBUG
3060 cmpl %eax, 4(%esp)
3061 jae 1f
3062 pushl $.cpyout_ne_pmsg
3063 call panic
3064 1:
3065 #endif
3066 cmpl %eax, 8(%esp)
3067 jb do_copy
3068 movl %eax, 8(%esp) /* force fault at kernelbase */
3069 jmp do_copy
3070 SET_SIZE(copyout_noerr)
3071
3072 ENTRY(uzero)
3073 movl kernelbase, %eax
3074 cmpl %eax, 4(%esp)
3075 jb do_zero
3076 movl %eax, 4(%esp) /* force fault at kernelbase */
3077 jmp do_zero
3078 SET_SIZE(uzero)
3079
3080 ENTRY(ucopy)
3081 movl kernelbase, %eax
3082 cmpl %eax, 4(%esp)
3083 jb 1f
3084 movl %eax, 4(%esp) /* force fault at kernelbase */
3085 1:
3086 cmpl %eax, 8(%esp)
3087 jb do_copy
3088 movl %eax, 8(%esp) /* force fault at kernelbase */
3089 jmp do_copy
3090 SET_SIZE(ucopy)
3091
3092 ENTRY(ucopystr)
3093 movl kernelbase, %eax
3094 cmpl %eax, 4(%esp)
3095 jb 1f
3096 movl %eax, 4(%esp) /* force fault at kernelbase */
3097 1:
3098 cmpl %eax, 8(%esp)
3099 jb 2f
3100 movl %eax, 8(%esp) /* force fault at kernelbase */
3101 2:
3102 /* do_copystr expects the lofault address in %eax */
3103 movl %gs:CPU_THREAD, %eax
3104 movl T_LOFAULT(%eax), %eax
3105 jmp do_copystr
3106 SET_SIZE(ucopystr)
3107
3108 #endif /* __i386 */
3109
3110 #ifdef DEBUG
3111 .data
3112 .kcopy_panic_msg:
3113 .string "kcopy: arguments below kernelbase"
3114 .bcopy_panic_msg:
3115 .string "bcopy: arguments below kernelbase"
3116 .kzero_panic_msg:
3117 .string "kzero: arguments below kernelbase"
3118 .bzero_panic_msg:
3119 .string "bzero: arguments below kernelbase"
3120 .copyin_panic_msg:
3121 .string "copyin: kaddr argument below kernelbase"
3122 .xcopyin_panic_msg:
3123 .string "xcopyin: kaddr argument below kernelbase"
3124 .copyout_panic_msg:
3125 .string "copyout: kaddr argument below kernelbase"
3126 .xcopyout_panic_msg:
3127 .string "xcopyout: kaddr argument below kernelbase"
3128 .copystr_panic_msg:
3129 .string "copystr: arguments in user space"
3130 .copyinstr_panic_msg:
3131 .string "copyinstr: kaddr argument not in kernel address space"
3132 .copyoutstr_panic_msg:
3133 .string "copyoutstr: kaddr argument not in kernel address space"
3134 .cpyin_ne_pmsg:
3135 .string "copyin_noerr: argument not in kernel address space"
3136 .cpyout_ne_pmsg:
3137 .string "copyout_noerr: argument not in kernel address space"
3138 #endif
3139
3140 #endif /* __lint */
3141
3142 /*
3143 * These functions are used for SMAP, supervisor mode access protection. They
3144 * are hotpatched to become real instructions when the system starts up which is
3145 * done in mlsetup() as a part of enabling the other CR4 related features.
3146 *
3147 * Generally speaking, smap_disable() is a stac instruction and smap_enable is a
3148 * clac instruction. It's safe to call these any number of times, and in fact,
3149 * out of paranoia, the kernel will likely call it at several points.
3150 */
3151
3152 #if defined(__lint)
3153
3154 void
3155 smap_enable(void)
3156 {}
3157
3158 void
3159 smap_disable(void)
3160 {}
3161
3162 #else
3163
3164 #if defined (__amd64) || defined(__i386)
3165 ENTRY(smap_disable)
3166 nop
3167 nop
3168 nop
3169 ret
3170 SET_SIZE(smap_disable)
3171
3172 ENTRY(smap_enable)
3173 nop
3174 nop
3175 nop
3176 ret
3177 SET_SIZE(smap_enable)
3178
3179 #endif /* __amd64 || __i386 */
3180
3181 #endif /* __lint */
3182
3183 #ifndef __lint
3184
3185 .data
3186 .align 4
3187 .globl _smap_enable_patch_count
3188 .type _smap_enable_patch_count,@object
3189 .size _smap_enable_patch_count, 4
3190 _smap_enable_patch_count:
3191 .long SMAP_ENABLE_COUNT
3192
3193 .globl _smap_disable_patch_count
3194 .type _smap_disable_patch_count,@object
3195 .size _smap_disable_patch_count, 4
3196 _smap_disable_patch_count:
3197 .long SMAP_DISABLE_COUNT
3198
3199 #endif /* __lint */