10908 Simplify SMAP relocations with krtld
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2009, Intel Corporation
28 * All rights reserved.
29 */
30
31 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
33 /* All Rights Reserved */
34
35 /* Copyright (c) 1987, 1988 Microsoft Corporation */
36 /* All Rights Reserved */
37
38 /*
39 * Copyright (c) 2017 Joyent, Inc.
40 */
41
42 #include <sys/errno.h>
43 #include <sys/asm_linkage.h>
44
45 #if defined(__lint)
46 #include <sys/types.h>
47 #include <sys/systm.h>
48 #else /* __lint */
49 #include "assym.h"
50 #endif /* __lint */
51
52 #define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
53 #define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
54 /*
55 * Non-temopral access (NTA) alignment requirement
56 */
57 #define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */
58 #define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1)
59 #define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */
60 #define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1)
61
62 /*
63 * With the introduction of Broadwell, Intel has introduced supervisor mode
64 * access protection -- SMAP. SMAP forces the kernel to set certain bits to
65 * enable access of user pages (AC in rflags, defines as PS_ACHK in
66 * <sys/psw.h>). One of the challenges is that the implementation of many of the
67 * userland copy routines directly use the kernel ones. For example, copyin and
68 * copyout simply go and jump to the do_copy_fault label and traditionally let
69 * those deal with the return for them. In fact, changing that is a can of frame
70 * pointers.
71 *
72 * Rules and Constraints:
73 *
74 * 1. For anything that's not in copy.s, we have it do explicit calls to the
75 * smap related code. It usually is in a position where it is able to. This is
76 * restricted to the following three places: DTrace, resume() in swtch.s and
77 * on_fault/no_fault. If you want to add it somewhere else, we should be
78 * thinking twice.
79 *
80 * 2. We try to toggle this at the smallest window possible. This means that if
81 * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
82 * other function, we will always leave with SMAP enabled (the kernel cannot
83 * access user pages).
84 *
85 * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
86 * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
87 * which already takes care of ensuring that SMAP is enabled and disabled. Note
88 * this means that when under an on_fault()/no_fault() handler, one must not
89 * call the non-*_noeer() routines.
90 *
91 * 4. The first thing we should do after coming out of an lofault handler is to
92 * make sure that we call smap_enable again to ensure that we are safely
93 * protected, as more often than not, we will have disabled smap to get there.
94 *
95 * 5. The SMAP functions, smap_enable and smap_disable may not touch any
96 * registers beyond those done by the call and ret. These routines may be called
97 * from arbitrary contexts in copy.s where we have slightly more special ABIs in
98 * place.
99 *
100 * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
101 * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and
102 * smap_disable()). If the number of these is changed, you must update the
103 * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
104 *
105 * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is
106 * no known technical reason preventing it from being enabled.
107 *
108 * 8. Generally this .s file is processed by a K&R style cpp. This means that it
109 * really has a lot of feelings about whitespace. In particular, if you have a
110 * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
111 *
112 * 9. The smap_enable and smap_disable functions should not generally be called.
113 * They exist such that DTrace and on_trap() may use them, that's it.
114 *
115 * 10. In general, the kernel has its own value for rflags that gets used. This
116 * is maintained in a few different places which vary based on how the thread
117 * comes into existence and whether it's a user thread. In general, when the
118 * kernel takes a trap, it always will set ourselves to a known set of flags,
119 * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
120 * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
121 * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
122 * where that gets masked off.
123 */
124
125 /*
126 * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
127 * "rep smovq" for large sizes. Performance data shows that many calls to
128 * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
129 * these small sizes unrolled code is used. For medium sizes loops writing
130 * 64-bytes per loop are used. Transition points were determined experimentally.
131 */
132 #define BZERO_USE_REP (1024)
133 #define BCOPY_DFLT_REP (128)
134 #define BCOPY_NHM_REP (768)
135
136 /*
137 * Copy a block of storage, returning an error code if `from' or
138 * `to' takes a kernel pagefault which cannot be resolved.
139 * Returns errno value on pagefault error, 0 if all ok
140 */
141
142 /*
143 * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
144 * additional call instructions.
145 */
146 #if defined(__amd64)
147 #define SMAP_DISABLE_COUNT 16
148 #define SMAP_ENABLE_COUNT 26
149 #elif defined(__i386)
150 #define SMAP_DISABLE_COUNT 0
151 #define SMAP_ENABLE_COUNT 0
152 #endif
153
154 #define SMAP_DISABLE_INSTR(ITER) \
155 .globl _smap_disable_patch_/**/ITER; \
156 _smap_disable_patch_/**/ITER/**/:; \
157 nop; nop; nop;
158
159 #define SMAP_ENABLE_INSTR(ITER) \
160 .globl _smap_enable_patch_/**/ITER; \
161 _smap_enable_patch_/**/ITER/**/:; \
162 nop; nop; nop;
163
164 #if defined(__lint)
165
166 /* ARGSUSED */
167 int
168 kcopy(const void *from, void *to, size_t count)
169 { return (0); }
170
171 #else /* __lint */
172
173 .globl kernelbase
174 .globl postbootkernelbase
175
176 #if defined(__amd64)
177
178 ENTRY(kcopy)
179 pushq %rbp
180 movq %rsp, %rbp
181 #ifdef DEBUG
182 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
183 jb 0f
184 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
185 jnb 1f
186 0: leaq .kcopy_panic_msg(%rip), %rdi
187 xorl %eax, %eax
188 call panic
189 1:
190 #endif
191 /*
192 * pass lofault value as 4th argument to do_copy_fault
193 */
194 leaq _kcopy_copyerr(%rip), %rcx
195 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
196
197 do_copy_fault:
198 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
199 movq %rcx, T_LOFAULT(%r9) /* new lofault */
200 call bcopy_altentry
201 xorl %eax, %eax /* return 0 (success) */
202 SMAP_ENABLE_INSTR(0)
203
204 /*
205 * A fault during do_copy_fault is indicated through an errno value
206 * in %rax and we iretq from the trap handler to here.
207 */
208 _kcopy_copyerr:
209 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
210 leave
211 ret
212 SET_SIZE(kcopy)
213
214 #elif defined(__i386)
215
216 #define ARG_FROM 8
217 #define ARG_TO 12
218 #define ARG_COUNT 16
219
220 ENTRY(kcopy)
221 #ifdef DEBUG
222 pushl %ebp
223 movl %esp, %ebp
224 movl postbootkernelbase, %eax
225 cmpl %eax, ARG_FROM(%ebp)
226 jb 0f
227 cmpl %eax, ARG_TO(%ebp)
228 jnb 1f
229 0: pushl $.kcopy_panic_msg
230 call panic
231 1: popl %ebp
232 #endif
233 lea _kcopy_copyerr, %eax /* lofault value */
234 movl %gs:CPU_THREAD, %edx
235
236 do_copy_fault:
237 pushl %ebp
238 movl %esp, %ebp /* setup stack frame */
239 pushl %esi
240 pushl %edi /* save registers */
241
242 movl T_LOFAULT(%edx), %edi
243 pushl %edi /* save the current lofault */
244 movl %eax, T_LOFAULT(%edx) /* new lofault */
245
246 movl ARG_COUNT(%ebp), %ecx
247 movl ARG_FROM(%ebp), %esi
248 movl ARG_TO(%ebp), %edi
249 shrl $2, %ecx /* word count */
250 rep
251 smovl
252 movl ARG_COUNT(%ebp), %ecx
253 andl $3, %ecx /* bytes left over */
254 rep
255 smovb
256 xorl %eax, %eax
257
258 /*
259 * A fault during do_copy_fault is indicated through an errno value
260 * in %eax and we iret from the trap handler to here.
261 */
262 _kcopy_copyerr:
263 popl %ecx
264 popl %edi
265 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */
266 popl %esi
267 popl %ebp
268 ret
269 SET_SIZE(kcopy)
270
271 #undef ARG_FROM
272 #undef ARG_TO
273 #undef ARG_COUNT
274
275 #endif /* __i386 */
276 #endif /* __lint */
277
278 #if defined(__lint)
279
280 /*
281 * Copy a block of storage. Similar to kcopy but uses non-temporal
282 * instructions.
283 */
284
285 /* ARGSUSED */
286 int
287 kcopy_nta(const void *from, void *to, size_t count, int copy_cached)
288 { return (0); }
289
290 #else /* __lint */
291
292 #if defined(__amd64)
293
294 #define COPY_LOOP_INIT(src, dst, cnt) \
295 addq cnt, src; \
296 addq cnt, dst; \
297 shrq $3, cnt; \
298 neg cnt
299
300 /* Copy 16 bytes per loop. Uses %rax and %r8 */
301 #define COPY_LOOP_BODY(src, dst, cnt) \
302 prefetchnta 0x100(src, cnt, 8); \
303 movq (src, cnt, 8), %rax; \
304 movq 0x8(src, cnt, 8), %r8; \
305 movnti %rax, (dst, cnt, 8); \
306 movnti %r8, 0x8(dst, cnt, 8); \
307 addq $2, cnt
308
309 ENTRY(kcopy_nta)
310 pushq %rbp
311 movq %rsp, %rbp
312 #ifdef DEBUG
313 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
314 jb 0f
315 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
316 jnb 1f
317 0: leaq .kcopy_panic_msg(%rip), %rdi
318 xorl %eax, %eax
319 call panic
320 1:
321 #endif
322
323 movq %gs:CPU_THREAD, %r9
324 cmpq $0, %rcx /* No non-temporal access? */
325 /*
326 * pass lofault value as 4th argument to do_copy_fault
327 */
328 leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */
329 jnz do_copy_fault /* use regular access */
330 /*
331 * Make sure cnt is >= KCOPY_MIN_SIZE
332 */
333 cmpq $KCOPY_MIN_SIZE, %rdx
334 jb do_copy_fault
335
336 /*
337 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
338 * count is COUNT_ALIGN_SIZE aligned.
339 */
340 movq %rdi, %r10
341 orq %rsi, %r10
342 andq $NTA_ALIGN_MASK, %r10
343 orq %rdx, %r10
344 andq $COUNT_ALIGN_MASK, %r10
345 jnz do_copy_fault
346
347 ALTENTRY(do_copy_fault_nta)
348 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
349 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
350 movq %rcx, T_LOFAULT(%r9) /* new lofault */
351
352 /*
353 * COPY_LOOP_BODY uses %rax and %r8
354 */
355 COPY_LOOP_INIT(%rdi, %rsi, %rdx)
356 2: COPY_LOOP_BODY(%rdi, %rsi, %rdx)
357 jnz 2b
358
359 mfence
360 xorl %eax, %eax /* return 0 (success) */
361 SMAP_ENABLE_INSTR(1)
362
363 _kcopy_nta_copyerr:
364 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
365 leave
366 ret
367 SET_SIZE(do_copy_fault_nta)
368 SET_SIZE(kcopy_nta)
369
370 #elif defined(__i386)
371
372 #define ARG_FROM 8
373 #define ARG_TO 12
374 #define ARG_COUNT 16
375
376 #define COPY_LOOP_INIT(src, dst, cnt) \
377 addl cnt, src; \
378 addl cnt, dst; \
379 shrl $3, cnt; \
380 neg cnt
381
382 #define COPY_LOOP_BODY(src, dst, cnt) \
383 prefetchnta 0x100(src, cnt, 8); \
384 movl (src, cnt, 8), %esi; \
385 movnti %esi, (dst, cnt, 8); \
386 movl 0x4(src, cnt, 8), %esi; \
387 movnti %esi, 0x4(dst, cnt, 8); \
388 movl 0x8(src, cnt, 8), %esi; \
389 movnti %esi, 0x8(dst, cnt, 8); \
390 movl 0xc(src, cnt, 8), %esi; \
391 movnti %esi, 0xc(dst, cnt, 8); \
392 addl $2, cnt
393
394 /*
395 * kcopy_nta is not implemented for 32-bit as no performance
396 * improvement was shown. We simply jump directly to kcopy
397 * and discard the 4 arguments.
398 */
399 ENTRY(kcopy_nta)
400 jmp kcopy
401
402 lea _kcopy_nta_copyerr, %eax /* lofault value */
403 ALTENTRY(do_copy_fault_nta)
404 pushl %ebp
405 movl %esp, %ebp /* setup stack frame */
406 pushl %esi
407 pushl %edi
408
409 movl %gs:CPU_THREAD, %edx
410 movl T_LOFAULT(%edx), %edi
411 pushl %edi /* save the current lofault */
412 movl %eax, T_LOFAULT(%edx) /* new lofault */
413
414 /* COPY_LOOP_BODY needs to use %esi */
415 movl ARG_COUNT(%ebp), %ecx
416 movl ARG_FROM(%ebp), %edi
417 movl ARG_TO(%ebp), %eax
418 COPY_LOOP_INIT(%edi, %eax, %ecx)
419 1: COPY_LOOP_BODY(%edi, %eax, %ecx)
420 jnz 1b
421 mfence
422
423 xorl %eax, %eax
424 _kcopy_nta_copyerr:
425 popl %ecx
426 popl %edi
427 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */
428 popl %esi
429 leave
430 ret
431 SET_SIZE(do_copy_fault_nta)
432 SET_SIZE(kcopy_nta)
433
434 #undef ARG_FROM
435 #undef ARG_TO
436 #undef ARG_COUNT
437
438 #endif /* __i386 */
439 #endif /* __lint */
440
441 #if defined(__lint)
442
443 /* ARGSUSED */
444 void
445 bcopy(const void *from, void *to, size_t count)
446 {}
447
448 #else /* __lint */
449
450 #if defined(__amd64)
451
452 ENTRY(bcopy)
453 #ifdef DEBUG
454 orq %rdx, %rdx /* %rdx = count */
455 jz 1f
456 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
457 jb 0f
458 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
459 jnb 1f
460 0: leaq .bcopy_panic_msg(%rip), %rdi
461 jmp call_panic /* setup stack and call panic */
462 1:
463 #endif
464 /*
465 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
466 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
467 * uses these registers in future they must be saved and restored.
468 */
469 ALTENTRY(bcopy_altentry)
470 do_copy:
471 #define L(s) .bcopy/**/s
472 cmpq $0x50, %rdx /* 80 */
473 jae bcopy_ck_size
474
475 /*
476 * Performance data shows many caller's copy small buffers. So for
477 * best perf for these sizes unrolled code is used. Store data without
478 * worrying about alignment.
479 */
480 leaq L(fwdPxQx)(%rip), %r10
481 addq %rdx, %rdi
482 addq %rdx, %rsi
483 movslq (%r10,%rdx,4), %rcx
484 leaq (%rcx,%r10,1), %r10
485 jmpq *%r10
486
487 .p2align 4
488 L(fwdPxQx):
489 .int L(P0Q0)-L(fwdPxQx) /* 0 */
490 .int L(P1Q0)-L(fwdPxQx)
491 .int L(P2Q0)-L(fwdPxQx)
492 .int L(P3Q0)-L(fwdPxQx)
493 .int L(P4Q0)-L(fwdPxQx)
494 .int L(P5Q0)-L(fwdPxQx)
495 .int L(P6Q0)-L(fwdPxQx)
496 .int L(P7Q0)-L(fwdPxQx)
497
498 .int L(P0Q1)-L(fwdPxQx) /* 8 */
499 .int L(P1Q1)-L(fwdPxQx)
500 .int L(P2Q1)-L(fwdPxQx)
501 .int L(P3Q1)-L(fwdPxQx)
502 .int L(P4Q1)-L(fwdPxQx)
503 .int L(P5Q1)-L(fwdPxQx)
504 .int L(P6Q1)-L(fwdPxQx)
505 .int L(P7Q1)-L(fwdPxQx)
506
507 .int L(P0Q2)-L(fwdPxQx) /* 16 */
508 .int L(P1Q2)-L(fwdPxQx)
509 .int L(P2Q2)-L(fwdPxQx)
510 .int L(P3Q2)-L(fwdPxQx)
511 .int L(P4Q2)-L(fwdPxQx)
512 .int L(P5Q2)-L(fwdPxQx)
513 .int L(P6Q2)-L(fwdPxQx)
514 .int L(P7Q2)-L(fwdPxQx)
515
516 .int L(P0Q3)-L(fwdPxQx) /* 24 */
517 .int L(P1Q3)-L(fwdPxQx)
518 .int L(P2Q3)-L(fwdPxQx)
519 .int L(P3Q3)-L(fwdPxQx)
520 .int L(P4Q3)-L(fwdPxQx)
521 .int L(P5Q3)-L(fwdPxQx)
522 .int L(P6Q3)-L(fwdPxQx)
523 .int L(P7Q3)-L(fwdPxQx)
524
525 .int L(P0Q4)-L(fwdPxQx) /* 32 */
526 .int L(P1Q4)-L(fwdPxQx)
527 .int L(P2Q4)-L(fwdPxQx)
528 .int L(P3Q4)-L(fwdPxQx)
529 .int L(P4Q4)-L(fwdPxQx)
530 .int L(P5Q4)-L(fwdPxQx)
531 .int L(P6Q4)-L(fwdPxQx)
532 .int L(P7Q4)-L(fwdPxQx)
533
534 .int L(P0Q5)-L(fwdPxQx) /* 40 */
535 .int L(P1Q5)-L(fwdPxQx)
536 .int L(P2Q5)-L(fwdPxQx)
537 .int L(P3Q5)-L(fwdPxQx)
538 .int L(P4Q5)-L(fwdPxQx)
539 .int L(P5Q5)-L(fwdPxQx)
540 .int L(P6Q5)-L(fwdPxQx)
541 .int L(P7Q5)-L(fwdPxQx)
542
543 .int L(P0Q6)-L(fwdPxQx) /* 48 */
544 .int L(P1Q6)-L(fwdPxQx)
545 .int L(P2Q6)-L(fwdPxQx)
546 .int L(P3Q6)-L(fwdPxQx)
547 .int L(P4Q6)-L(fwdPxQx)
548 .int L(P5Q6)-L(fwdPxQx)
549 .int L(P6Q6)-L(fwdPxQx)
550 .int L(P7Q6)-L(fwdPxQx)
551
552 .int L(P0Q7)-L(fwdPxQx) /* 56 */
553 .int L(P1Q7)-L(fwdPxQx)
554 .int L(P2Q7)-L(fwdPxQx)
555 .int L(P3Q7)-L(fwdPxQx)
556 .int L(P4Q7)-L(fwdPxQx)
557 .int L(P5Q7)-L(fwdPxQx)
558 .int L(P6Q7)-L(fwdPxQx)
559 .int L(P7Q7)-L(fwdPxQx)
560
561 .int L(P0Q8)-L(fwdPxQx) /* 64 */
562 .int L(P1Q8)-L(fwdPxQx)
563 .int L(P2Q8)-L(fwdPxQx)
564 .int L(P3Q8)-L(fwdPxQx)
565 .int L(P4Q8)-L(fwdPxQx)
566 .int L(P5Q8)-L(fwdPxQx)
567 .int L(P6Q8)-L(fwdPxQx)
568 .int L(P7Q8)-L(fwdPxQx)
569
570 .int L(P0Q9)-L(fwdPxQx) /* 72 */
571 .int L(P1Q9)-L(fwdPxQx)
572 .int L(P2Q9)-L(fwdPxQx)
573 .int L(P3Q9)-L(fwdPxQx)
574 .int L(P4Q9)-L(fwdPxQx)
575 .int L(P5Q9)-L(fwdPxQx)
576 .int L(P6Q9)-L(fwdPxQx)
577 .int L(P7Q9)-L(fwdPxQx) /* 79 */
578
579 .p2align 4
580 L(P0Q9):
581 mov -0x48(%rdi), %rcx
582 mov %rcx, -0x48(%rsi)
583 L(P0Q8):
584 mov -0x40(%rdi), %r10
585 mov %r10, -0x40(%rsi)
586 L(P0Q7):
587 mov -0x38(%rdi), %r8
588 mov %r8, -0x38(%rsi)
589 L(P0Q6):
590 mov -0x30(%rdi), %rcx
591 mov %rcx, -0x30(%rsi)
592 L(P0Q5):
593 mov -0x28(%rdi), %r10
594 mov %r10, -0x28(%rsi)
595 L(P0Q4):
596 mov -0x20(%rdi), %r8
597 mov %r8, -0x20(%rsi)
598 L(P0Q3):
599 mov -0x18(%rdi), %rcx
600 mov %rcx, -0x18(%rsi)
601 L(P0Q2):
602 mov -0x10(%rdi), %r10
603 mov %r10, -0x10(%rsi)
604 L(P0Q1):
605 mov -0x8(%rdi), %r8
606 mov %r8, -0x8(%rsi)
607 L(P0Q0):
608 ret
609
610 .p2align 4
611 L(P1Q9):
612 mov -0x49(%rdi), %r8
613 mov %r8, -0x49(%rsi)
614 L(P1Q8):
615 mov -0x41(%rdi), %rcx
616 mov %rcx, -0x41(%rsi)
617 L(P1Q7):
618 mov -0x39(%rdi), %r10
619 mov %r10, -0x39(%rsi)
620 L(P1Q6):
621 mov -0x31(%rdi), %r8
622 mov %r8, -0x31(%rsi)
623 L(P1Q5):
624 mov -0x29(%rdi), %rcx
625 mov %rcx, -0x29(%rsi)
626 L(P1Q4):
627 mov -0x21(%rdi), %r10
628 mov %r10, -0x21(%rsi)
629 L(P1Q3):
630 mov -0x19(%rdi), %r8
631 mov %r8, -0x19(%rsi)
632 L(P1Q2):
633 mov -0x11(%rdi), %rcx
634 mov %rcx, -0x11(%rsi)
635 L(P1Q1):
636 mov -0x9(%rdi), %r10
637 mov %r10, -0x9(%rsi)
638 L(P1Q0):
639 movzbq -0x1(%rdi), %r8
640 mov %r8b, -0x1(%rsi)
641 ret
642
643 .p2align 4
644 L(P2Q9):
645 mov -0x4a(%rdi), %r8
646 mov %r8, -0x4a(%rsi)
647 L(P2Q8):
648 mov -0x42(%rdi), %rcx
649 mov %rcx, -0x42(%rsi)
650 L(P2Q7):
651 mov -0x3a(%rdi), %r10
652 mov %r10, -0x3a(%rsi)
653 L(P2Q6):
654 mov -0x32(%rdi), %r8
655 mov %r8, -0x32(%rsi)
656 L(P2Q5):
657 mov -0x2a(%rdi), %rcx
658 mov %rcx, -0x2a(%rsi)
659 L(P2Q4):
660 mov -0x22(%rdi), %r10
661 mov %r10, -0x22(%rsi)
662 L(P2Q3):
663 mov -0x1a(%rdi), %r8
664 mov %r8, -0x1a(%rsi)
665 L(P2Q2):
666 mov -0x12(%rdi), %rcx
667 mov %rcx, -0x12(%rsi)
668 L(P2Q1):
669 mov -0xa(%rdi), %r10
670 mov %r10, -0xa(%rsi)
671 L(P2Q0):
672 movzwq -0x2(%rdi), %r8
673 mov %r8w, -0x2(%rsi)
674 ret
675
676 .p2align 4
677 L(P3Q9):
678 mov -0x4b(%rdi), %r8
679 mov %r8, -0x4b(%rsi)
680 L(P3Q8):
681 mov -0x43(%rdi), %rcx
682 mov %rcx, -0x43(%rsi)
683 L(P3Q7):
684 mov -0x3b(%rdi), %r10
685 mov %r10, -0x3b(%rsi)
686 L(P3Q6):
687 mov -0x33(%rdi), %r8
688 mov %r8, -0x33(%rsi)
689 L(P3Q5):
690 mov -0x2b(%rdi), %rcx
691 mov %rcx, -0x2b(%rsi)
692 L(P3Q4):
693 mov -0x23(%rdi), %r10
694 mov %r10, -0x23(%rsi)
695 L(P3Q3):
696 mov -0x1b(%rdi), %r8
697 mov %r8, -0x1b(%rsi)
698 L(P3Q2):
699 mov -0x13(%rdi), %rcx
700 mov %rcx, -0x13(%rsi)
701 L(P3Q1):
702 mov -0xb(%rdi), %r10
703 mov %r10, -0xb(%rsi)
704 /*
705 * These trailing loads/stores have to do all their loads 1st,
706 * then do the stores.
707 */
708 L(P3Q0):
709 movzwq -0x3(%rdi), %r8
710 movzbq -0x1(%rdi), %r10
711 mov %r8w, -0x3(%rsi)
712 mov %r10b, -0x1(%rsi)
713 ret
714
715 .p2align 4
716 L(P4Q9):
717 mov -0x4c(%rdi), %r8
718 mov %r8, -0x4c(%rsi)
719 L(P4Q8):
720 mov -0x44(%rdi), %rcx
721 mov %rcx, -0x44(%rsi)
722 L(P4Q7):
723 mov -0x3c(%rdi), %r10
724 mov %r10, -0x3c(%rsi)
725 L(P4Q6):
726 mov -0x34(%rdi), %r8
727 mov %r8, -0x34(%rsi)
728 L(P4Q5):
729 mov -0x2c(%rdi), %rcx
730 mov %rcx, -0x2c(%rsi)
731 L(P4Q4):
732 mov -0x24(%rdi), %r10
733 mov %r10, -0x24(%rsi)
734 L(P4Q3):
735 mov -0x1c(%rdi), %r8
736 mov %r8, -0x1c(%rsi)
737 L(P4Q2):
738 mov -0x14(%rdi), %rcx
739 mov %rcx, -0x14(%rsi)
740 L(P4Q1):
741 mov -0xc(%rdi), %r10
742 mov %r10, -0xc(%rsi)
743 L(P4Q0):
744 mov -0x4(%rdi), %r8d
745 mov %r8d, -0x4(%rsi)
746 ret
747
748 .p2align 4
749 L(P5Q9):
750 mov -0x4d(%rdi), %r8
751 mov %r8, -0x4d(%rsi)
752 L(P5Q8):
753 mov -0x45(%rdi), %rcx
754 mov %rcx, -0x45(%rsi)
755 L(P5Q7):
756 mov -0x3d(%rdi), %r10
757 mov %r10, -0x3d(%rsi)
758 L(P5Q6):
759 mov -0x35(%rdi), %r8
760 mov %r8, -0x35(%rsi)
761 L(P5Q5):
762 mov -0x2d(%rdi), %rcx
763 mov %rcx, -0x2d(%rsi)
764 L(P5Q4):
765 mov -0x25(%rdi), %r10
766 mov %r10, -0x25(%rsi)
767 L(P5Q3):
768 mov -0x1d(%rdi), %r8
769 mov %r8, -0x1d(%rsi)
770 L(P5Q2):
771 mov -0x15(%rdi), %rcx
772 mov %rcx, -0x15(%rsi)
773 L(P5Q1):
774 mov -0xd(%rdi), %r10
775 mov %r10, -0xd(%rsi)
776 L(P5Q0):
777 mov -0x5(%rdi), %r8d
778 movzbq -0x1(%rdi), %r10
779 mov %r8d, -0x5(%rsi)
780 mov %r10b, -0x1(%rsi)
781 ret
782
783 .p2align 4
784 L(P6Q9):
785 mov -0x4e(%rdi), %r8
786 mov %r8, -0x4e(%rsi)
787 L(P6Q8):
788 mov -0x46(%rdi), %rcx
789 mov %rcx, -0x46(%rsi)
790 L(P6Q7):
791 mov -0x3e(%rdi), %r10
792 mov %r10, -0x3e(%rsi)
793 L(P6Q6):
794 mov -0x36(%rdi), %r8
795 mov %r8, -0x36(%rsi)
796 L(P6Q5):
797 mov -0x2e(%rdi), %rcx
798 mov %rcx, -0x2e(%rsi)
799 L(P6Q4):
800 mov -0x26(%rdi), %r10
801 mov %r10, -0x26(%rsi)
802 L(P6Q3):
803 mov -0x1e(%rdi), %r8
804 mov %r8, -0x1e(%rsi)
805 L(P6Q2):
806 mov -0x16(%rdi), %rcx
807 mov %rcx, -0x16(%rsi)
808 L(P6Q1):
809 mov -0xe(%rdi), %r10
810 mov %r10, -0xe(%rsi)
811 L(P6Q0):
812 mov -0x6(%rdi), %r8d
813 movzwq -0x2(%rdi), %r10
814 mov %r8d, -0x6(%rsi)
815 mov %r10w, -0x2(%rsi)
816 ret
817
818 .p2align 4
819 L(P7Q9):
820 mov -0x4f(%rdi), %r8
821 mov %r8, -0x4f(%rsi)
822 L(P7Q8):
823 mov -0x47(%rdi), %rcx
824 mov %rcx, -0x47(%rsi)
825 L(P7Q7):
826 mov -0x3f(%rdi), %r10
827 mov %r10, -0x3f(%rsi)
828 L(P7Q6):
829 mov -0x37(%rdi), %r8
830 mov %r8, -0x37(%rsi)
831 L(P7Q5):
832 mov -0x2f(%rdi), %rcx
833 mov %rcx, -0x2f(%rsi)
834 L(P7Q4):
835 mov -0x27(%rdi), %r10
836 mov %r10, -0x27(%rsi)
837 L(P7Q3):
838 mov -0x1f(%rdi), %r8
839 mov %r8, -0x1f(%rsi)
840 L(P7Q2):
841 mov -0x17(%rdi), %rcx
842 mov %rcx, -0x17(%rsi)
843 L(P7Q1):
844 mov -0xf(%rdi), %r10
845 mov %r10, -0xf(%rsi)
846 L(P7Q0):
847 mov -0x7(%rdi), %r8d
848 movzwq -0x3(%rdi), %r10
849 movzbq -0x1(%rdi), %rcx
850 mov %r8d, -0x7(%rsi)
851 mov %r10w, -0x3(%rsi)
852 mov %cl, -0x1(%rsi)
853 ret
854
855 /*
856 * For large sizes rep smovq is fastest.
857 * Transition point determined experimentally as measured on
858 * Intel Xeon processors (incl. Nehalem and previous generations) and
859 * AMD Opteron. The transition value is patched at boot time to avoid
860 * memory reference hit.
861 */
862 .globl bcopy_patch_start
863 bcopy_patch_start:
864 cmpq $BCOPY_NHM_REP, %rdx
865 .globl bcopy_patch_end
866 bcopy_patch_end:
867
868 .p2align 4
869 .globl bcopy_ck_size
870 bcopy_ck_size:
871 cmpq $BCOPY_DFLT_REP, %rdx
872 jae L(use_rep)
873
874 /*
875 * Align to a 8-byte boundary. Avoids penalties from unaligned stores
876 * as well as from stores spanning cachelines.
877 */
878 test $0x7, %rsi
879 jz L(aligned_loop)
880 test $0x1, %rsi
881 jz 2f
882 movzbq (%rdi), %r8
883 dec %rdx
884 inc %rdi
885 mov %r8b, (%rsi)
886 inc %rsi
887 2:
888 test $0x2, %rsi
889 jz 4f
890 movzwq (%rdi), %r8
891 sub $0x2, %rdx
892 add $0x2, %rdi
893 mov %r8w, (%rsi)
894 add $0x2, %rsi
895 4:
896 test $0x4, %rsi
897 jz L(aligned_loop)
898 mov (%rdi), %r8d
899 sub $0x4, %rdx
900 add $0x4, %rdi
901 mov %r8d, (%rsi)
902 add $0x4, %rsi
903
904 /*
905 * Copy 64-bytes per loop
906 */
907 .p2align 4
908 L(aligned_loop):
909 mov (%rdi), %r8
910 mov 0x8(%rdi), %r10
911 lea -0x40(%rdx), %rdx
912 mov %r8, (%rsi)
913 mov %r10, 0x8(%rsi)
914 mov 0x10(%rdi), %rcx
915 mov 0x18(%rdi), %r8
916 mov %rcx, 0x10(%rsi)
917 mov %r8, 0x18(%rsi)
918
919 cmp $0x40, %rdx
920 mov 0x20(%rdi), %r10
921 mov 0x28(%rdi), %rcx
922 mov %r10, 0x20(%rsi)
923 mov %rcx, 0x28(%rsi)
924 mov 0x30(%rdi), %r8
925 mov 0x38(%rdi), %r10
926 lea 0x40(%rdi), %rdi
927 mov %r8, 0x30(%rsi)
928 mov %r10, 0x38(%rsi)
929 lea 0x40(%rsi), %rsi
930 jae L(aligned_loop)
931
932 /*
933 * Copy remaining bytes (0-63)
934 */
935 L(do_remainder):
936 leaq L(fwdPxQx)(%rip), %r10
937 addq %rdx, %rdi
938 addq %rdx, %rsi
939 movslq (%r10,%rdx,4), %rcx
940 leaq (%rcx,%r10,1), %r10
941 jmpq *%r10
942
943 /*
944 * Use rep smovq. Clear remainder via unrolled code
945 */
946 .p2align 4
947 L(use_rep):
948 xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */
949 movq %rdx, %rcx /* %rcx = count */
950 shrq $3, %rcx /* 8-byte word count */
951 rep
952 smovq
953
954 xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */
955 andq $7, %rdx /* remainder */
956 jnz L(do_remainder)
957 ret
958 #undef L
959
960 #ifdef DEBUG
961 /*
962 * Setup frame on the run-time stack. The end of the input argument
963 * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
964 * always points to the end of the latest allocated stack frame.
965 * panic(const char *format, ...) is a varargs function. When a
966 * function taking variable arguments is called, %rax must be set
967 * to eight times the number of floating point parameters passed
968 * to the function in SSE registers.
969 */
970 call_panic:
971 pushq %rbp /* align stack properly */
972 movq %rsp, %rbp
973 xorl %eax, %eax /* no variable arguments */
974 call panic /* %rdi = format string */
975 #endif
976 SET_SIZE(bcopy_altentry)
977 SET_SIZE(bcopy)
978
979 #elif defined(__i386)
980
981 #define ARG_FROM 4
982 #define ARG_TO 8
983 #define ARG_COUNT 12
984
985 ENTRY(bcopy)
986 #ifdef DEBUG
987 movl ARG_COUNT(%esp), %eax
988 orl %eax, %eax
989 jz 1f
990 movl postbootkernelbase, %eax
991 cmpl %eax, ARG_FROM(%esp)
992 jb 0f
993 cmpl %eax, ARG_TO(%esp)
994 jnb 1f
995 0: pushl %ebp
996 movl %esp, %ebp
997 pushl $.bcopy_panic_msg
998 call panic
999 1:
1000 #endif
1001 do_copy:
1002 movl %esi, %eax /* save registers */
1003 movl %edi, %edx
1004 movl ARG_COUNT(%esp), %ecx
1005 movl ARG_FROM(%esp), %esi
1006 movl ARG_TO(%esp), %edi
1007
1008 shrl $2, %ecx /* word count */
1009 rep
1010 smovl
1011 movl ARG_COUNT(%esp), %ecx
1012 andl $3, %ecx /* bytes left over */
1013 rep
1014 smovb
1015 movl %eax, %esi /* restore registers */
1016 movl %edx, %edi
1017 ret
1018 SET_SIZE(bcopy)
1019
1020 #undef ARG_COUNT
1021 #undef ARG_FROM
1022 #undef ARG_TO
1023
1024 #endif /* __i386 */
1025 #endif /* __lint */
1026
1027
1028 /*
1029 * Zero a block of storage, returning an error code if we
1030 * take a kernel pagefault which cannot be resolved.
1031 * Returns errno value on pagefault error, 0 if all ok
1032 */
1033
1034 #if defined(__lint)
1035
1036 /* ARGSUSED */
1037 int
1038 kzero(void *addr, size_t count)
1039 { return (0); }
1040
1041 #else /* __lint */
1042
1043 #if defined(__amd64)
1044
1045 ENTRY(kzero)
1046 #ifdef DEBUG
1047 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
1048 jnb 0f
1049 leaq .kzero_panic_msg(%rip), %rdi
1050 jmp call_panic /* setup stack and call panic */
1051 0:
1052 #endif
1053 /*
1054 * pass lofault value as 3rd argument for fault return
1055 */
1056 leaq _kzeroerr(%rip), %rdx
1057
1058 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
1059 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
1060 movq %rdx, T_LOFAULT(%r9) /* new lofault */
1061 call bzero_altentry
1062 xorl %eax, %eax
1063 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
1064 ret
1065 /*
1066 * A fault during bzero is indicated through an errno value
1067 * in %rax when we iretq to here.
1068 */
1069 _kzeroerr:
1070 addq $8, %rsp /* pop bzero_altentry call ret addr */
1071 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
1072 ret
1073 SET_SIZE(kzero)
1074
1075 #elif defined(__i386)
1076
1077 #define ARG_ADDR 8
1078 #define ARG_COUNT 12
1079
1080 ENTRY(kzero)
1081 #ifdef DEBUG
1082 pushl %ebp
1083 movl %esp, %ebp
1084 movl postbootkernelbase, %eax
1085 cmpl %eax, ARG_ADDR(%ebp)
1086 jnb 0f
1087 pushl $.kzero_panic_msg
1088 call panic
1089 0: popl %ebp
1090 #endif
1091 lea _kzeroerr, %eax /* kzeroerr is lofault value */
1092
1093 pushl %ebp /* save stack base */
1094 movl %esp, %ebp /* set new stack base */
1095 pushl %edi /* save %edi */
1096
1097 mov %gs:CPU_THREAD, %edx
1098 movl T_LOFAULT(%edx), %edi
1099 pushl %edi /* save the current lofault */
1100 movl %eax, T_LOFAULT(%edx) /* new lofault */
1101
1102 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */
1103 movl ARG_ADDR(%ebp), %edi /* %edi <- address of bytes to clear */
1104 shrl $2, %ecx /* Count of double words to zero */
1105 xorl %eax, %eax /* sstol val */
1106 rep
1107 sstol /* %ecx contains words to clear (%eax=0) */
1108
1109 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */
1110 andl $3, %ecx /* do mod 4 */
1111 rep
1112 sstob /* %ecx contains residual bytes to clear */
1113
1114 /*
1115 * A fault during kzero is indicated through an errno value
1116 * in %eax when we iret to here.
1117 */
1118 _kzeroerr:
1119 popl %edi
1120 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */
1121 popl %edi
1122 popl %ebp
1123 ret
1124 SET_SIZE(kzero)
1125
1126 #undef ARG_ADDR
1127 #undef ARG_COUNT
1128
1129 #endif /* __i386 */
1130 #endif /* __lint */
1131
1132 /*
1133 * Zero a block of storage.
1134 */
1135
1136 #if defined(__lint)
1137
1138 /* ARGSUSED */
1139 void
1140 bzero(void *addr, size_t count)
1141 {}
1142
1143 #else /* __lint */
1144
1145 #if defined(__amd64)
1146
1147 ENTRY(bzero)
1148 #ifdef DEBUG
1149 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
1150 jnb 0f
1151 leaq .bzero_panic_msg(%rip), %rdi
1152 jmp call_panic /* setup stack and call panic */
1153 0:
1154 #endif
1155 ALTENTRY(bzero_altentry)
1156 do_zero:
1157 #define L(s) .bzero/**/s
1158 xorl %eax, %eax
1159
1160 cmpq $0x50, %rsi /* 80 */
1161 jae L(ck_align)
1162
1163 /*
1164 * Performance data shows many caller's are zeroing small buffers. So
1165 * for best perf for these sizes unrolled code is used. Store zeros
1166 * without worrying about alignment.
1167 */
1168 leaq L(setPxQx)(%rip), %r10
1169 addq %rsi, %rdi
1170 movslq (%r10,%rsi,4), %rcx
1171 leaq (%rcx,%r10,1), %r10
1172 jmpq *%r10
1173
1174 .p2align 4
1175 L(setPxQx):
1176 .int L(P0Q0)-L(setPxQx) /* 0 */
1177 .int L(P1Q0)-L(setPxQx)
1178 .int L(P2Q0)-L(setPxQx)
1179 .int L(P3Q0)-L(setPxQx)
1180 .int L(P4Q0)-L(setPxQx)
1181 .int L(P5Q0)-L(setPxQx)
1182 .int L(P6Q0)-L(setPxQx)
1183 .int L(P7Q0)-L(setPxQx)
1184
1185 .int L(P0Q1)-L(setPxQx) /* 8 */
1186 .int L(P1Q1)-L(setPxQx)
1187 .int L(P2Q1)-L(setPxQx)
1188 .int L(P3Q1)-L(setPxQx)
1189 .int L(P4Q1)-L(setPxQx)
1190 .int L(P5Q1)-L(setPxQx)
1191 .int L(P6Q1)-L(setPxQx)
1192 .int L(P7Q1)-L(setPxQx)
1193
1194 .int L(P0Q2)-L(setPxQx) /* 16 */
1195 .int L(P1Q2)-L(setPxQx)
1196 .int L(P2Q2)-L(setPxQx)
1197 .int L(P3Q2)-L(setPxQx)
1198 .int L(P4Q2)-L(setPxQx)
1199 .int L(P5Q2)-L(setPxQx)
1200 .int L(P6Q2)-L(setPxQx)
1201 .int L(P7Q2)-L(setPxQx)
1202
1203 .int L(P0Q3)-L(setPxQx) /* 24 */
1204 .int L(P1Q3)-L(setPxQx)
1205 .int L(P2Q3)-L(setPxQx)
1206 .int L(P3Q3)-L(setPxQx)
1207 .int L(P4Q3)-L(setPxQx)
1208 .int L(P5Q3)-L(setPxQx)
1209 .int L(P6Q3)-L(setPxQx)
1210 .int L(P7Q3)-L(setPxQx)
1211
1212 .int L(P0Q4)-L(setPxQx) /* 32 */
1213 .int L(P1Q4)-L(setPxQx)
1214 .int L(P2Q4)-L(setPxQx)
1215 .int L(P3Q4)-L(setPxQx)
1216 .int L(P4Q4)-L(setPxQx)
1217 .int L(P5Q4)-L(setPxQx)
1218 .int L(P6Q4)-L(setPxQx)
1219 .int L(P7Q4)-L(setPxQx)
1220
1221 .int L(P0Q5)-L(setPxQx) /* 40 */
1222 .int L(P1Q5)-L(setPxQx)
1223 .int L(P2Q5)-L(setPxQx)
1224 .int L(P3Q5)-L(setPxQx)
1225 .int L(P4Q5)-L(setPxQx)
1226 .int L(P5Q5)-L(setPxQx)
1227 .int L(P6Q5)-L(setPxQx)
1228 .int L(P7Q5)-L(setPxQx)
1229
1230 .int L(P0Q6)-L(setPxQx) /* 48 */
1231 .int L(P1Q6)-L(setPxQx)
1232 .int L(P2Q6)-L(setPxQx)
1233 .int L(P3Q6)-L(setPxQx)
1234 .int L(P4Q6)-L(setPxQx)
1235 .int L(P5Q6)-L(setPxQx)
1236 .int L(P6Q6)-L(setPxQx)
1237 .int L(P7Q6)-L(setPxQx)
1238
1239 .int L(P0Q7)-L(setPxQx) /* 56 */
1240 .int L(P1Q7)-L(setPxQx)
1241 .int L(P2Q7)-L(setPxQx)
1242 .int L(P3Q7)-L(setPxQx)
1243 .int L(P4Q7)-L(setPxQx)
1244 .int L(P5Q7)-L(setPxQx)
1245 .int L(P6Q7)-L(setPxQx)
1246 .int L(P7Q7)-L(setPxQx)
1247
1248 .int L(P0Q8)-L(setPxQx) /* 64 */
1249 .int L(P1Q8)-L(setPxQx)
1250 .int L(P2Q8)-L(setPxQx)
1251 .int L(P3Q8)-L(setPxQx)
1252 .int L(P4Q8)-L(setPxQx)
1253 .int L(P5Q8)-L(setPxQx)
1254 .int L(P6Q8)-L(setPxQx)
1255 .int L(P7Q8)-L(setPxQx)
1256
1257 .int L(P0Q9)-L(setPxQx) /* 72 */
1258 .int L(P1Q9)-L(setPxQx)
1259 .int L(P2Q9)-L(setPxQx)
1260 .int L(P3Q9)-L(setPxQx)
1261 .int L(P4Q9)-L(setPxQx)
1262 .int L(P5Q9)-L(setPxQx)
1263 .int L(P6Q9)-L(setPxQx)
1264 .int L(P7Q9)-L(setPxQx) /* 79 */
1265
1266 .p2align 4
1267 L(P0Q9): mov %rax, -0x48(%rdi)
1268 L(P0Q8): mov %rax, -0x40(%rdi)
1269 L(P0Q7): mov %rax, -0x38(%rdi)
1270 L(P0Q6): mov %rax, -0x30(%rdi)
1271 L(P0Q5): mov %rax, -0x28(%rdi)
1272 L(P0Q4): mov %rax, -0x20(%rdi)
1273 L(P0Q3): mov %rax, -0x18(%rdi)
1274 L(P0Q2): mov %rax, -0x10(%rdi)
1275 L(P0Q1): mov %rax, -0x8(%rdi)
1276 L(P0Q0):
1277 ret
1278
1279 .p2align 4
1280 L(P1Q9): mov %rax, -0x49(%rdi)
1281 L(P1Q8): mov %rax, -0x41(%rdi)
1282 L(P1Q7): mov %rax, -0x39(%rdi)
1283 L(P1Q6): mov %rax, -0x31(%rdi)
1284 L(P1Q5): mov %rax, -0x29(%rdi)
1285 L(P1Q4): mov %rax, -0x21(%rdi)
1286 L(P1Q3): mov %rax, -0x19(%rdi)
1287 L(P1Q2): mov %rax, -0x11(%rdi)
1288 L(P1Q1): mov %rax, -0x9(%rdi)
1289 L(P1Q0): mov %al, -0x1(%rdi)
1290 ret
1291
1292 .p2align 4
1293 L(P2Q9): mov %rax, -0x4a(%rdi)
1294 L(P2Q8): mov %rax, -0x42(%rdi)
1295 L(P2Q7): mov %rax, -0x3a(%rdi)
1296 L(P2Q6): mov %rax, -0x32(%rdi)
1297 L(P2Q5): mov %rax, -0x2a(%rdi)
1298 L(P2Q4): mov %rax, -0x22(%rdi)
1299 L(P2Q3): mov %rax, -0x1a(%rdi)
1300 L(P2Q2): mov %rax, -0x12(%rdi)
1301 L(P2Q1): mov %rax, -0xa(%rdi)
1302 L(P2Q0): mov %ax, -0x2(%rdi)
1303 ret
1304
1305 .p2align 4
1306 L(P3Q9): mov %rax, -0x4b(%rdi)
1307 L(P3Q8): mov %rax, -0x43(%rdi)
1308 L(P3Q7): mov %rax, -0x3b(%rdi)
1309 L(P3Q6): mov %rax, -0x33(%rdi)
1310 L(P3Q5): mov %rax, -0x2b(%rdi)
1311 L(P3Q4): mov %rax, -0x23(%rdi)
1312 L(P3Q3): mov %rax, -0x1b(%rdi)
1313 L(P3Q2): mov %rax, -0x13(%rdi)
1314 L(P3Q1): mov %rax, -0xb(%rdi)
1315 L(P3Q0): mov %ax, -0x3(%rdi)
1316 mov %al, -0x1(%rdi)
1317 ret
1318
1319 .p2align 4
1320 L(P4Q9): mov %rax, -0x4c(%rdi)
1321 L(P4Q8): mov %rax, -0x44(%rdi)
1322 L(P4Q7): mov %rax, -0x3c(%rdi)
1323 L(P4Q6): mov %rax, -0x34(%rdi)
1324 L(P4Q5): mov %rax, -0x2c(%rdi)
1325 L(P4Q4): mov %rax, -0x24(%rdi)
1326 L(P4Q3): mov %rax, -0x1c(%rdi)
1327 L(P4Q2): mov %rax, -0x14(%rdi)
1328 L(P4Q1): mov %rax, -0xc(%rdi)
1329 L(P4Q0): mov %eax, -0x4(%rdi)
1330 ret
1331
1332 .p2align 4
1333 L(P5Q9): mov %rax, -0x4d(%rdi)
1334 L(P5Q8): mov %rax, -0x45(%rdi)
1335 L(P5Q7): mov %rax, -0x3d(%rdi)
1336 L(P5Q6): mov %rax, -0x35(%rdi)
1337 L(P5Q5): mov %rax, -0x2d(%rdi)
1338 L(P5Q4): mov %rax, -0x25(%rdi)
1339 L(P5Q3): mov %rax, -0x1d(%rdi)
1340 L(P5Q2): mov %rax, -0x15(%rdi)
1341 L(P5Q1): mov %rax, -0xd(%rdi)
1342 L(P5Q0): mov %eax, -0x5(%rdi)
1343 mov %al, -0x1(%rdi)
1344 ret
1345
1346 .p2align 4
1347 L(P6Q9): mov %rax, -0x4e(%rdi)
1348 L(P6Q8): mov %rax, -0x46(%rdi)
1349 L(P6Q7): mov %rax, -0x3e(%rdi)
1350 L(P6Q6): mov %rax, -0x36(%rdi)
1351 L(P6Q5): mov %rax, -0x2e(%rdi)
1352 L(P6Q4): mov %rax, -0x26(%rdi)
1353 L(P6Q3): mov %rax, -0x1e(%rdi)
1354 L(P6Q2): mov %rax, -0x16(%rdi)
1355 L(P6Q1): mov %rax, -0xe(%rdi)
1356 L(P6Q0): mov %eax, -0x6(%rdi)
1357 mov %ax, -0x2(%rdi)
1358 ret
1359
1360 .p2align 4
1361 L(P7Q9): mov %rax, -0x4f(%rdi)
1362 L(P7Q8): mov %rax, -0x47(%rdi)
1363 L(P7Q7): mov %rax, -0x3f(%rdi)
1364 L(P7Q6): mov %rax, -0x37(%rdi)
1365 L(P7Q5): mov %rax, -0x2f(%rdi)
1366 L(P7Q4): mov %rax, -0x27(%rdi)
1367 L(P7Q3): mov %rax, -0x1f(%rdi)
1368 L(P7Q2): mov %rax, -0x17(%rdi)
1369 L(P7Q1): mov %rax, -0xf(%rdi)
1370 L(P7Q0): mov %eax, -0x7(%rdi)
1371 mov %ax, -0x3(%rdi)
1372 mov %al, -0x1(%rdi)
1373 ret
1374
1375 /*
1376 * Align to a 16-byte boundary. Avoids penalties from unaligned stores
1377 * as well as from stores spanning cachelines. Note 16-byte alignment
1378 * is better in case where rep sstosq is used.
1379 */
1380 .p2align 4
1381 L(ck_align):
1382 test $0xf, %rdi
1383 jz L(aligned_now)
1384 test $1, %rdi
1385 jz 2f
1386 mov %al, (%rdi)
1387 dec %rsi
1388 lea 1(%rdi),%rdi
1389 2:
1390 test $2, %rdi
1391 jz 4f
1392 mov %ax, (%rdi)
1393 sub $2, %rsi
1394 lea 2(%rdi),%rdi
1395 4:
1396 test $4, %rdi
1397 jz 8f
1398 mov %eax, (%rdi)
1399 sub $4, %rsi
1400 lea 4(%rdi),%rdi
1401 8:
1402 test $8, %rdi
1403 jz L(aligned_now)
1404 mov %rax, (%rdi)
1405 sub $8, %rsi
1406 lea 8(%rdi),%rdi
1407
1408 /*
1409 * For large sizes rep sstoq is fastest.
1410 * Transition point determined experimentally as measured on
1411 * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
1412 */
1413 L(aligned_now):
1414 cmp $BZERO_USE_REP, %rsi
1415 ja L(use_rep)
1416
1417 /*
1418 * zero 64-bytes per loop
1419 */
1420 .p2align 4
1421 L(bzero_loop):
1422 leaq -0x40(%rsi), %rsi
1423 cmpq $0x40, %rsi
1424 movq %rax, (%rdi)
1425 movq %rax, 0x8(%rdi)
1426 movq %rax, 0x10(%rdi)
1427 movq %rax, 0x18(%rdi)
1428 movq %rax, 0x20(%rdi)
1429 movq %rax, 0x28(%rdi)
1430 movq %rax, 0x30(%rdi)
1431 movq %rax, 0x38(%rdi)
1432 leaq 0x40(%rdi), %rdi
1433 jae L(bzero_loop)
1434
1435 /*
1436 * Clear any remaining bytes..
1437 */
1438 9:
1439 leaq L(setPxQx)(%rip), %r10
1440 addq %rsi, %rdi
1441 movslq (%r10,%rsi,4), %rcx
1442 leaq (%rcx,%r10,1), %r10
1443 jmpq *%r10
1444
1445 /*
1446 * Use rep sstoq. Clear any remainder via unrolled code
1447 */
1448 .p2align 4
1449 L(use_rep):
1450 movq %rsi, %rcx /* get size in bytes */
1451 shrq $3, %rcx /* count of 8-byte words to zero */
1452 rep
1453 sstoq /* %rcx = words to clear (%rax=0) */
1454 andq $7, %rsi /* remaining bytes */
1455 jnz 9b
1456 ret
1457 #undef L
1458 SET_SIZE(bzero_altentry)
1459 SET_SIZE(bzero)
1460
1461 #elif defined(__i386)
1462
1463 #define ARG_ADDR 4
1464 #define ARG_COUNT 8
1465
1466 ENTRY(bzero)
1467 #ifdef DEBUG
1468 movl postbootkernelbase, %eax
1469 cmpl %eax, ARG_ADDR(%esp)
1470 jnb 0f
1471 pushl %ebp
1472 movl %esp, %ebp
1473 pushl $.bzero_panic_msg
1474 call panic
1475 0:
1476 #endif
1477 do_zero:
1478 movl %edi, %edx
1479 movl ARG_COUNT(%esp), %ecx
1480 movl ARG_ADDR(%esp), %edi
1481 shrl $2, %ecx
1482 xorl %eax, %eax
1483 rep
1484 sstol
1485 movl ARG_COUNT(%esp), %ecx
1486 andl $3, %ecx
1487 rep
1488 sstob
1489 movl %edx, %edi
1490 ret
1491 SET_SIZE(bzero)
1492
1493 #undef ARG_ADDR
1494 #undef ARG_COUNT
1495
1496 #endif /* __i386 */
1497 #endif /* __lint */
1498
1499 /*
1500 * Transfer data to and from user space -
1501 * Note that these routines can cause faults
1502 * It is assumed that the kernel has nothing at
1503 * less than KERNELBASE in the virtual address space.
1504 *
1505 * Note that copyin(9F) and copyout(9F) are part of the
1506 * DDI/DKI which specifies that they return '-1' on "errors."
1507 *
1508 * Sigh.
1509 *
1510 * So there's two extremely similar routines - xcopyin_nta() and
1511 * xcopyout_nta() which return the errno that we've faithfully computed.
1512 * This allows other callers (e.g. uiomove(9F)) to work correctly.
1513 * Given that these are used pretty heavily, we expand the calling
1514 * sequences inline for all flavours (rather than making wrappers).
1515 */
1516
1517 /*
1518 * Copy user data to kernel space.
1519 */
1520
1521 #if defined(__lint)
1522
1523 /* ARGSUSED */
1524 int
1525 copyin(const void *uaddr, void *kaddr, size_t count)
1526 { return (0); }
1527
1528 #else /* lint */
1529
1530 #if defined(__amd64)
1531
1532 ENTRY(copyin)
1533 pushq %rbp
1534 movq %rsp, %rbp
1535 subq $24, %rsp
1536
1537 /*
1538 * save args in case we trap and need to rerun as a copyop
1539 */
1540 movq %rdi, (%rsp)
1541 movq %rsi, 0x8(%rsp)
1542 movq %rdx, 0x10(%rsp)
1543
1544 movq kernelbase(%rip), %rax
1545 #ifdef DEBUG
1546 cmpq %rax, %rsi /* %rsi = kaddr */
1547 jnb 1f
1548 leaq .copyin_panic_msg(%rip), %rdi
1549 xorl %eax, %eax
1550 call panic
1551 1:
1552 #endif
1553 /*
1554 * pass lofault value as 4th argument to do_copy_fault
1555 */
1556 leaq _copyin_err(%rip), %rcx
1557
1558 movq %gs:CPU_THREAD, %r9
1559 cmpq %rax, %rdi /* test uaddr < kernelbase */
1560 jae 3f /* take copyop if uaddr > kernelbase */
1561 SMAP_DISABLE_INSTR(0)
1562 jmp do_copy_fault /* Takes care of leave for us */
1563
1564 _copyin_err:
1565 SMAP_ENABLE_INSTR(2)
1566 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1567 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1568 3:
1569 movq T_COPYOPS(%r9), %rax
1570 cmpq $0, %rax
1571 jz 2f
1572 /*
1573 * reload args for the copyop
1574 */
1575 movq (%rsp), %rdi
1576 movq 0x8(%rsp), %rsi
1577 movq 0x10(%rsp), %rdx
1578 leave
1579 jmp *CP_COPYIN(%rax)
1580
1581 2: movl $-1, %eax
1582 leave
1583 ret
1584 SET_SIZE(copyin)
1585
1586 #elif defined(__i386)
1587
1588 #define ARG_UADDR 4
1589 #define ARG_KADDR 8
1590
1591 ENTRY(copyin)
1592 movl kernelbase, %ecx
1593 #ifdef DEBUG
1594 cmpl %ecx, ARG_KADDR(%esp)
1595 jnb 1f
1596 pushl %ebp
1597 movl %esp, %ebp
1598 pushl $.copyin_panic_msg
1599 call panic
1600 1:
1601 #endif
1602 lea _copyin_err, %eax
1603
1604 movl %gs:CPU_THREAD, %edx
1605 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1606 jb do_copy_fault
1607 jmp 3f
1608
1609 _copyin_err:
1610 popl %ecx
1611 popl %edi
1612 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1613 popl %esi
1614 popl %ebp
1615 3:
1616 movl T_COPYOPS(%edx), %eax
1617 cmpl $0, %eax
1618 jz 2f
1619 jmp *CP_COPYIN(%eax)
1620
1621 2: movl $-1, %eax
1622 ret
1623 SET_SIZE(copyin)
1624
1625 #undef ARG_UADDR
1626 #undef ARG_KADDR
1627
1628 #endif /* __i386 */
1629 #endif /* __lint */
1630
1631 #if defined(__lint)
1632
1633 /* ARGSUSED */
1634 int
1635 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached)
1636 { return (0); }
1637
1638 #else /* __lint */
1639
1640 #if defined(__amd64)
1641
1642 ENTRY(xcopyin_nta)
1643 pushq %rbp
1644 movq %rsp, %rbp
1645 subq $24, %rsp
1646
1647 /*
1648 * save args in case we trap and need to rerun as a copyop
1649 * %rcx is consumed in this routine so we don't need to save
1650 * it.
1651 */
1652 movq %rdi, (%rsp)
1653 movq %rsi, 0x8(%rsp)
1654 movq %rdx, 0x10(%rsp)
1655
1656 movq kernelbase(%rip), %rax
1657 #ifdef DEBUG
1658 cmpq %rax, %rsi /* %rsi = kaddr */
1659 jnb 1f
1660 leaq .xcopyin_panic_msg(%rip), %rdi
1661 xorl %eax, %eax
1662 call panic
1663 1:
1664 #endif
1665 movq %gs:CPU_THREAD, %r9
1666 cmpq %rax, %rdi /* test uaddr < kernelbase */
1667 jae 4f
1668 cmpq $0, %rcx /* No non-temporal access? */
1669 /*
1670 * pass lofault value as 4th argument to do_copy_fault
1671 */
1672 leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */
1673 jnz 6f /* use regular access */
1674 /*
1675 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1676 */
1677 cmpq $XCOPY_MIN_SIZE, %rdx
1678 jae 5f
1679 6:
1680 SMAP_DISABLE_INSTR(1)
1681 jmp do_copy_fault
1682
1683 /*
1684 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1685 * count is COUNT_ALIGN_SIZE aligned.
1686 */
1687 5:
1688 movq %rdi, %r10
1689 orq %rsi, %r10
1690 andq $NTA_ALIGN_MASK, %r10
1691 orq %rdx, %r10
1692 andq $COUNT_ALIGN_MASK, %r10
1693 jnz 6b
1694 leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */
1695 SMAP_DISABLE_INSTR(2)
1696 jmp do_copy_fault_nta /* use non-temporal access */
1697
1698 4:
1699 movl $EFAULT, %eax
1700 jmp 3f
1701
1702 /*
1703 * A fault during do_copy_fault or do_copy_fault_nta is
1704 * indicated through an errno value in %rax and we iret from the
1705 * trap handler to here.
1706 */
1707 _xcopyin_err:
1708 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1709 _xcopyin_nta_err:
1710 SMAP_ENABLE_INSTR(3)
1711 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1712 3:
1713 movq T_COPYOPS(%r9), %r8
1714 cmpq $0, %r8
1715 jz 2f
1716
1717 /*
1718 * reload args for the copyop
1719 */
1720 movq (%rsp), %rdi
1721 movq 0x8(%rsp), %rsi
1722 movq 0x10(%rsp), %rdx
1723 leave
1724 jmp *CP_XCOPYIN(%r8)
1725
1726 2: leave
1727 ret
1728 SET_SIZE(xcopyin_nta)
1729
1730 #elif defined(__i386)
1731
1732 #define ARG_UADDR 4
1733 #define ARG_KADDR 8
1734 #define ARG_COUNT 12
1735 #define ARG_CACHED 16
1736
1737 .globl use_sse_copy
1738
1739 ENTRY(xcopyin_nta)
1740 movl kernelbase, %ecx
1741 lea _xcopyin_err, %eax
1742 movl %gs:CPU_THREAD, %edx
1743 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1744 jae 4f
1745
1746 cmpl $0, use_sse_copy /* no sse support */
1747 jz do_copy_fault
1748
1749 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */
1750 jnz do_copy_fault
1751
1752 /*
1753 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1754 */
1755 cmpl $XCOPY_MIN_SIZE, ARG_COUNT(%esp)
1756 jb do_copy_fault
1757
1758 /*
1759 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1760 * count is COUNT_ALIGN_SIZE aligned.
1761 */
1762 movl ARG_UADDR(%esp), %ecx
1763 orl ARG_KADDR(%esp), %ecx
1764 andl $NTA_ALIGN_MASK, %ecx
1765 orl ARG_COUNT(%esp), %ecx
1766 andl $COUNT_ALIGN_MASK, %ecx
1767 jnz do_copy_fault
1768
1769 jmp do_copy_fault_nta /* use regular access */
1770
1771 4:
1772 movl $EFAULT, %eax
1773 jmp 3f
1774
1775 /*
1776 * A fault during do_copy_fault or do_copy_fault_nta is
1777 * indicated through an errno value in %eax and we iret from the
1778 * trap handler to here.
1779 */
1780 _xcopyin_err:
1781 popl %ecx
1782 popl %edi
1783 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1784 popl %esi
1785 popl %ebp
1786 3:
1787 cmpl $0, T_COPYOPS(%edx)
1788 jz 2f
1789 movl T_COPYOPS(%edx), %eax
1790 jmp *CP_XCOPYIN(%eax)
1791
1792 2: rep; ret /* use 2 byte return instruction when branch target */
1793 /* AMD Software Optimization Guide - Section 6.2 */
1794 SET_SIZE(xcopyin_nta)
1795
1796 #undef ARG_UADDR
1797 #undef ARG_KADDR
1798 #undef ARG_COUNT
1799 #undef ARG_CACHED
1800
1801 #endif /* __i386 */
1802 #endif /* __lint */
1803
1804 /*
1805 * Copy kernel data to user space.
1806 */
1807
1808 #if defined(__lint)
1809
1810 /* ARGSUSED */
1811 int
1812 copyout(const void *kaddr, void *uaddr, size_t count)
1813 { return (0); }
1814
1815 #else /* __lint */
1816
1817 #if defined(__amd64)
1818
1819 ENTRY(copyout)
1820 pushq %rbp
1821 movq %rsp, %rbp
1822 subq $24, %rsp
1823
1824 /*
1825 * save args in case we trap and need to rerun as a copyop
1826 */
1827 movq %rdi, (%rsp)
1828 movq %rsi, 0x8(%rsp)
1829 movq %rdx, 0x10(%rsp)
1830
1831 movq kernelbase(%rip), %rax
1832 #ifdef DEBUG
1833 cmpq %rax, %rdi /* %rdi = kaddr */
1834 jnb 1f
1835 leaq .copyout_panic_msg(%rip), %rdi
1836 xorl %eax, %eax
1837 call panic
1838 1:
1839 #endif
1840 /*
1841 * pass lofault value as 4th argument to do_copy_fault
1842 */
1843 leaq _copyout_err(%rip), %rcx
1844
1845 movq %gs:CPU_THREAD, %r9
1846 cmpq %rax, %rsi /* test uaddr < kernelbase */
1847 jae 3f /* take copyop if uaddr > kernelbase */
1848 SMAP_DISABLE_INSTR(3)
1849 jmp do_copy_fault /* Calls leave for us */
1850
1851 _copyout_err:
1852 SMAP_ENABLE_INSTR(4)
1853 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1854 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1855 3:
1856 movq T_COPYOPS(%r9), %rax
1857 cmpq $0, %rax
1858 jz 2f
1859
1860 /*
1861 * reload args for the copyop
1862 */
1863 movq (%rsp), %rdi
1864 movq 0x8(%rsp), %rsi
1865 movq 0x10(%rsp), %rdx
1866 leave
1867 jmp *CP_COPYOUT(%rax)
1868
1869 2: movl $-1, %eax
1870 leave
1871 ret
1872 SET_SIZE(copyout)
1873
1874 #elif defined(__i386)
1875
1876 #define ARG_KADDR 4
1877 #define ARG_UADDR 8
1878
1879 ENTRY(copyout)
1880 movl kernelbase, %ecx
1881 #ifdef DEBUG
1882 cmpl %ecx, ARG_KADDR(%esp)
1883 jnb 1f
1884 pushl %ebp
1885 movl %esp, %ebp
1886 pushl $.copyout_panic_msg
1887 call panic
1888 1:
1889 #endif
1890 lea _copyout_err, %eax
1891 movl %gs:CPU_THREAD, %edx
1892 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1893 jb do_copy_fault
1894 jmp 3f
1895
1896 _copyout_err:
1897 popl %ecx
1898 popl %edi
1899 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1900 popl %esi
1901 popl %ebp
1902 3:
1903 movl T_COPYOPS(%edx), %eax
1904 cmpl $0, %eax
1905 jz 2f
1906 jmp *CP_COPYOUT(%eax)
1907
1908 2: movl $-1, %eax
1909 ret
1910 SET_SIZE(copyout)
1911
1912 #undef ARG_UADDR
1913 #undef ARG_KADDR
1914
1915 #endif /* __i386 */
1916 #endif /* __lint */
1917
1918 #if defined(__lint)
1919
1920 /* ARGSUSED */
1921 int
1922 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached)
1923 { return (0); }
1924
1925 #else /* __lint */
1926
1927 #if defined(__amd64)
1928
1929 ENTRY(xcopyout_nta)
1930 pushq %rbp
1931 movq %rsp, %rbp
1932 subq $24, %rsp
1933
1934 /*
1935 * save args in case we trap and need to rerun as a copyop
1936 */
1937 movq %rdi, (%rsp)
1938 movq %rsi, 0x8(%rsp)
1939 movq %rdx, 0x10(%rsp)
1940
1941 movq kernelbase(%rip), %rax
1942 #ifdef DEBUG
1943 cmpq %rax, %rdi /* %rdi = kaddr */
1944 jnb 1f
1945 leaq .xcopyout_panic_msg(%rip), %rdi
1946 xorl %eax, %eax
1947 call panic
1948 1:
1949 #endif
1950 movq %gs:CPU_THREAD, %r9
1951 cmpq %rax, %rsi /* test uaddr < kernelbase */
1952 jae 4f
1953
1954 cmpq $0, %rcx /* No non-temporal access? */
1955 /*
1956 * pass lofault value as 4th argument to do_copy_fault
1957 */
1958 leaq _xcopyout_err(%rip), %rcx
1959 jnz 6f
1960 /*
1961 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1962 */
1963 cmpq $XCOPY_MIN_SIZE, %rdx
1964 jae 5f
1965 6:
1966 SMAP_DISABLE_INSTR(4)
1967 jmp do_copy_fault
1968
1969 /*
1970 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1971 * count is COUNT_ALIGN_SIZE aligned.
1972 */
1973 5:
1974 movq %rdi, %r10
1975 orq %rsi, %r10
1976 andq $NTA_ALIGN_MASK, %r10
1977 orq %rdx, %r10
1978 andq $COUNT_ALIGN_MASK, %r10
1979 jnz 6b
1980 leaq _xcopyout_nta_err(%rip), %rcx
1981 SMAP_DISABLE_INSTR(5)
1982 call do_copy_fault_nta
1983 SMAP_ENABLE_INSTR(5)
1984 ret
1985
1986 4:
1987 movl $EFAULT, %eax
1988 jmp 3f
1989
1990 /*
1991 * A fault during do_copy_fault or do_copy_fault_nta is
1992 * indicated through an errno value in %rax and we iret from the
1993 * trap handler to here.
1994 */
1995 _xcopyout_err:
1996 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1997 _xcopyout_nta_err:
1998 SMAP_ENABLE_INSTR(6)
1999 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2000 3:
2001 movq T_COPYOPS(%r9), %r8
2002 cmpq $0, %r8
2003 jz 2f
2004
2005 /*
2006 * reload args for the copyop
2007 */
2008 movq (%rsp), %rdi
2009 movq 0x8(%rsp), %rsi
2010 movq 0x10(%rsp), %rdx
2011 leave
2012 jmp *CP_XCOPYOUT(%r8)
2013
2014 2: leave
2015 ret
2016 SET_SIZE(xcopyout_nta)
2017
2018 #elif defined(__i386)
2019
2020 #define ARG_KADDR 4
2021 #define ARG_UADDR 8
2022 #define ARG_COUNT 12
2023 #define ARG_CACHED 16
2024
2025 ENTRY(xcopyout_nta)
2026 movl kernelbase, %ecx
2027 lea _xcopyout_err, %eax
2028 movl %gs:CPU_THREAD, %edx
2029 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2030 jae 4f
2031
2032 cmpl $0, use_sse_copy /* no sse support */
2033 jz do_copy_fault
2034
2035 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */
2036 jnz do_copy_fault
2037
2038 /*
2039 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
2040 */
2041 cmpl $XCOPY_MIN_SIZE, %edx
2042 jb do_copy_fault
2043
2044 /*
2045 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2046 * count is COUNT_ALIGN_SIZE aligned.
2047 */
2048 movl ARG_UADDR(%esp), %ecx
2049 orl ARG_KADDR(%esp), %ecx
2050 andl $NTA_ALIGN_MASK, %ecx
2051 orl ARG_COUNT(%esp), %ecx
2052 andl $COUNT_ALIGN_MASK, %ecx
2053 jnz do_copy_fault
2054 jmp do_copy_fault_nta
2055
2056 4:
2057 movl $EFAULT, %eax
2058 jmp 3f
2059
2060 /*
2061 * A fault during do_copy_fault or do_copy_fault_nta is
2062 * indicated through an errno value in %eax and we iret from the
2063 * trap handler to here.
2064 */
2065 _xcopyout_err:
2066 / restore the original lofault
2067 popl %ecx
2068 popl %edi
2069 movl %ecx, T_LOFAULT(%edx) / original lofault
2070 popl %esi
2071 popl %ebp
2072 3:
2073 cmpl $0, T_COPYOPS(%edx)
2074 jz 2f
2075 movl T_COPYOPS(%edx), %eax
2076 jmp *CP_XCOPYOUT(%eax)
2077
2078 2: rep; ret /* use 2 byte return instruction when branch target */
2079 /* AMD Software Optimization Guide - Section 6.2 */
2080 SET_SIZE(xcopyout_nta)
2081
2082 #undef ARG_UADDR
2083 #undef ARG_KADDR
2084 #undef ARG_COUNT
2085 #undef ARG_CACHED
2086
2087 #endif /* __i386 */
2088 #endif /* __lint */
2089
2090 /*
2091 * Copy a null terminated string from one point to another in
2092 * the kernel address space.
2093 */
2094
2095 #if defined(__lint)
2096
2097 /* ARGSUSED */
2098 int
2099 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied)
2100 { return (0); }
2101
2102 #else /* __lint */
2103
2104 #if defined(__amd64)
2105
2106 ENTRY(copystr)
2107 pushq %rbp
2108 movq %rsp, %rbp
2109 #ifdef DEBUG
2110 movq kernelbase(%rip), %rax
2111 cmpq %rax, %rdi /* %rdi = from */
2112 jb 0f
2113 cmpq %rax, %rsi /* %rsi = to */
2114 jnb 1f
2115 0: leaq .copystr_panic_msg(%rip), %rdi
2116 xorl %eax, %eax
2117 call panic
2118 1:
2119 #endif
2120 movq %gs:CPU_THREAD, %r9
2121 movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */
2122 /* 5th argument to do_copystr */
2123 xorl %r10d,%r10d /* pass smap restore need in %r10d */
2124 /* as a non-ABI 6th arg */
2125 do_copystr:
2126 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
2127 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
2128 movq %r8, T_LOFAULT(%r9) /* new lofault */
2129
2130 movq %rdx, %r8 /* save maxlength */
2131
2132 cmpq $0, %rdx /* %rdx = maxlength */
2133 je copystr_enametoolong /* maxlength == 0 */
2134
2135 copystr_loop:
2136 decq %r8
2137 movb (%rdi), %al
2138 incq %rdi
2139 movb %al, (%rsi)
2140 incq %rsi
2141 cmpb $0, %al
2142 je copystr_null /* null char */
2143 cmpq $0, %r8
2144 jne copystr_loop
2145
2146 copystr_enametoolong:
2147 movl $ENAMETOOLONG, %eax
2148 jmp copystr_out
2149
2150 copystr_null:
2151 xorl %eax, %eax /* no error */
2152
2153 copystr_out:
2154 cmpq $0, %rcx /* want length? */
2155 je copystr_smap /* no */
2156 subq %r8, %rdx /* compute length and store it */
2157 movq %rdx, (%rcx)
2158
2159 copystr_smap:
2160 cmpl $0, %r10d
2161 jz copystr_done
2162 SMAP_ENABLE_INSTR(7)
2163
2164 copystr_done:
2165 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2166 leave
2167 ret
2168 SET_SIZE(copystr)
2169
2170 #elif defined(__i386)
2171
2172 #define ARG_FROM 8
2173 #define ARG_TO 12
2174 #define ARG_MAXLEN 16
2175 #define ARG_LENCOPIED 20
2176
2177 ENTRY(copystr)
2178 #ifdef DEBUG
2179 pushl %ebp
2180 movl %esp, %ebp
2181 movl kernelbase, %eax
2182 cmpl %eax, ARG_FROM(%esp)
2183 jb 0f
2184 cmpl %eax, ARG_TO(%esp)
2185 jnb 1f
2186 0: pushl $.copystr_panic_msg
2187 call panic
2188 1: popl %ebp
2189 #endif
2190 /* get the current lofault address */
2191 movl %gs:CPU_THREAD, %eax
2192 movl T_LOFAULT(%eax), %eax
2193 do_copystr:
2194 pushl %ebp /* setup stack frame */
2195 movl %esp, %ebp
2196 pushl %ebx /* save registers */
2197 pushl %edi
2198
2199 movl %gs:CPU_THREAD, %ebx
2200 movl T_LOFAULT(%ebx), %edi
2201 pushl %edi /* save the current lofault */
2202 movl %eax, T_LOFAULT(%ebx) /* new lofault */
2203
2204 movl ARG_MAXLEN(%ebp), %ecx
2205 cmpl $0, %ecx
2206 je copystr_enametoolong /* maxlength == 0 */
2207
2208 movl ARG_FROM(%ebp), %ebx /* source address */
2209 movl ARG_TO(%ebp), %edx /* destination address */
2210
2211 copystr_loop:
2212 decl %ecx
2213 movb (%ebx), %al
2214 incl %ebx
2215 movb %al, (%edx)
2216 incl %edx
2217 cmpb $0, %al
2218 je copystr_null /* null char */
2219 cmpl $0, %ecx
2220 jne copystr_loop
2221
2222 copystr_enametoolong:
2223 movl $ENAMETOOLONG, %eax
2224 jmp copystr_out
2225
2226 copystr_null:
2227 xorl %eax, %eax /* no error */
2228
2229 copystr_out:
2230 cmpl $0, ARG_LENCOPIED(%ebp) /* want length? */
2231 je copystr_done /* no */
2232 movl ARG_MAXLEN(%ebp), %edx
2233 subl %ecx, %edx /* compute length and store it */
2234 movl ARG_LENCOPIED(%ebp), %ecx
2235 movl %edx, (%ecx)
2236
2237 copystr_done:
2238 popl %edi
2239 movl %gs:CPU_THREAD, %ebx
2240 movl %edi, T_LOFAULT(%ebx) /* restore the original lofault */
2241
2242 popl %edi
2243 popl %ebx
2244 popl %ebp
2245 ret
2246 SET_SIZE(copystr)
2247
2248 #undef ARG_FROM
2249 #undef ARG_TO
2250 #undef ARG_MAXLEN
2251 #undef ARG_LENCOPIED
2252
2253 #endif /* __i386 */
2254 #endif /* __lint */
2255
2256 /*
2257 * Copy a null terminated string from the user address space into
2258 * the kernel address space.
2259 */
2260
2261 #if defined(__lint)
2262
2263 /* ARGSUSED */
2264 int
2265 copyinstr(const char *uaddr, char *kaddr, size_t maxlength,
2266 size_t *lencopied)
2267 { return (0); }
2268
2269 #else /* __lint */
2270
2271 #if defined(__amd64)
2272
2273 ENTRY(copyinstr)
2274 pushq %rbp
2275 movq %rsp, %rbp
2276 subq $32, %rsp
2277
2278 /*
2279 * save args in case we trap and need to rerun as a copyop
2280 */
2281 movq %rdi, (%rsp)
2282 movq %rsi, 0x8(%rsp)
2283 movq %rdx, 0x10(%rsp)
2284 movq %rcx, 0x18(%rsp)
2285
2286 movq kernelbase(%rip), %rax
2287 #ifdef DEBUG
2288 cmpq %rax, %rsi /* %rsi = kaddr */
2289 jnb 1f
2290 leaq .copyinstr_panic_msg(%rip), %rdi
2291 xorl %eax, %eax
2292 call panic
2293 1:
2294 #endif
2295 /*
2296 * pass lofault value as 5th argument to do_copystr
2297 * do_copystr expects whether or not we need smap in %r10d
2298 */
2299 leaq _copyinstr_error(%rip), %r8
2300 movl $1, %r10d
2301
2302 cmpq %rax, %rdi /* test uaddr < kernelbase */
2303 jae 4f
2304 SMAP_DISABLE_INSTR(6)
2305 jmp do_copystr
2306 4:
2307 movq %gs:CPU_THREAD, %r9
2308 jmp 3f
2309
2310 _copyinstr_error:
2311 SMAP_ENABLE_INSTR(8)
2312 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2313 3:
2314 movq T_COPYOPS(%r9), %rax
2315 cmpq $0, %rax
2316 jz 2f
2317
2318 /*
2319 * reload args for the copyop
2320 */
2321 movq (%rsp), %rdi
2322 movq 0x8(%rsp), %rsi
2323 movq 0x10(%rsp), %rdx
2324 movq 0x18(%rsp), %rcx
2325 leave
2326 jmp *CP_COPYINSTR(%rax)
2327
2328 2: movl $EFAULT, %eax /* return EFAULT */
2329 leave
2330 ret
2331 SET_SIZE(copyinstr)
2332
2333 #elif defined(__i386)
2334
2335 #define ARG_UADDR 4
2336 #define ARG_KADDR 8
2337
2338 ENTRY(copyinstr)
2339 movl kernelbase, %ecx
2340 #ifdef DEBUG
2341 cmpl %ecx, ARG_KADDR(%esp)
2342 jnb 1f
2343 pushl %ebp
2344 movl %esp, %ebp
2345 pushl $.copyinstr_panic_msg
2346 call panic
2347 1:
2348 #endif
2349 lea _copyinstr_error, %eax
2350 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2351 jb do_copystr
2352 movl %gs:CPU_THREAD, %edx
2353 jmp 3f
2354
2355 _copyinstr_error:
2356 popl %edi
2357 movl %gs:CPU_THREAD, %edx
2358 movl %edi, T_LOFAULT(%edx) /* original lofault */
2359
2360 popl %edi
2361 popl %ebx
2362 popl %ebp
2363 3:
2364 movl T_COPYOPS(%edx), %eax
2365 cmpl $0, %eax
2366 jz 2f
2367 jmp *CP_COPYINSTR(%eax)
2368
2369 2: movl $EFAULT, %eax /* return EFAULT */
2370 ret
2371 SET_SIZE(copyinstr)
2372
2373 #undef ARG_UADDR
2374 #undef ARG_KADDR
2375
2376 #endif /* __i386 */
2377 #endif /* __lint */
2378
2379 /*
2380 * Copy a null terminated string from the kernel
2381 * address space to the user address space.
2382 */
2383
2384 #if defined(__lint)
2385
2386 /* ARGSUSED */
2387 int
2388 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength,
2389 size_t *lencopied)
2390 { return (0); }
2391
2392 #else /* __lint */
2393
2394 #if defined(__amd64)
2395
2396 ENTRY(copyoutstr)
2397 pushq %rbp
2398 movq %rsp, %rbp
2399 subq $32, %rsp
2400
2401 /*
2402 * save args in case we trap and need to rerun as a copyop
2403 */
2404 movq %rdi, (%rsp)
2405 movq %rsi, 0x8(%rsp)
2406 movq %rdx, 0x10(%rsp)
2407 movq %rcx, 0x18(%rsp)
2408
2409 movq kernelbase(%rip), %rax
2410 #ifdef DEBUG
2411 cmpq %rax, %rdi /* %rdi = kaddr */
2412 jnb 1f
2413 leaq .copyoutstr_panic_msg(%rip), %rdi
2414 jmp call_panic /* setup stack and call panic */
2415 1:
2416 #endif
2417 /*
2418 * pass lofault value as 5th argument to do_copystr
2419 * pass one as 6th argument to do_copystr in %r10d
2420 */
2421 leaq _copyoutstr_error(%rip), %r8
2422 movl $1, %r10d
2423
2424 cmpq %rax, %rsi /* test uaddr < kernelbase */
2425 jae 4f
2426 SMAP_DISABLE_INSTR(7)
2427 jmp do_copystr
2428 4:
2429 movq %gs:CPU_THREAD, %r9
2430 jmp 3f
2431
2432 _copyoutstr_error:
2433 SMAP_ENABLE_INSTR(9)
2434 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2435 3:
2436 movq T_COPYOPS(%r9), %rax
2437 cmpq $0, %rax
2438 jz 2f
2439
2440 /*
2441 * reload args for the copyop
2442 */
2443 movq (%rsp), %rdi
2444 movq 0x8(%rsp), %rsi
2445 movq 0x10(%rsp), %rdx
2446 movq 0x18(%rsp), %rcx
2447 leave
2448 jmp *CP_COPYOUTSTR(%rax)
2449
2450 2: movl $EFAULT, %eax /* return EFAULT */
2451 leave
2452 ret
2453 SET_SIZE(copyoutstr)
2454
2455 #elif defined(__i386)
2456
2457 #define ARG_KADDR 4
2458 #define ARG_UADDR 8
2459
2460 ENTRY(copyoutstr)
2461 movl kernelbase, %ecx
2462 #ifdef DEBUG
2463 cmpl %ecx, ARG_KADDR(%esp)
2464 jnb 1f
2465 pushl %ebp
2466 movl %esp, %ebp
2467 pushl $.copyoutstr_panic_msg
2468 call panic
2469 1:
2470 #endif
2471 lea _copyoutstr_error, %eax
2472 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2473 jb do_copystr
2474 movl %gs:CPU_THREAD, %edx
2475 jmp 3f
2476
2477 _copyoutstr_error:
2478 popl %edi
2479 movl %gs:CPU_THREAD, %edx
2480 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */
2481
2482 popl %edi
2483 popl %ebx
2484 popl %ebp
2485 3:
2486 movl T_COPYOPS(%edx), %eax
2487 cmpl $0, %eax
2488 jz 2f
2489 jmp *CP_COPYOUTSTR(%eax)
2490
2491 2: movl $EFAULT, %eax /* return EFAULT */
2492 ret
2493 SET_SIZE(copyoutstr)
2494
2495 #undef ARG_KADDR
2496 #undef ARG_UADDR
2497
2498 #endif /* __i386 */
2499 #endif /* __lint */
2500
2501 /*
2502 * Since all of the fuword() variants are so similar, we have a macro to spit
2503 * them out. This allows us to create DTrace-unobservable functions easily.
2504 */
2505
2506 #if defined(__lint)
2507
2508 #if defined(__amd64)
2509
2510 /* ARGSUSED */
2511 int
2512 fuword64(const void *addr, uint64_t *dst)
2513 { return (0); }
2514
2515 #endif
2516
2517 /* ARGSUSED */
2518 int
2519 fuword32(const void *addr, uint32_t *dst)
2520 { return (0); }
2521
2522 /* ARGSUSED */
2523 int
2524 fuword16(const void *addr, uint16_t *dst)
2525 { return (0); }
2526
2527 /* ARGSUSED */
2528 int
2529 fuword8(const void *addr, uint8_t *dst)
2530 { return (0); }
2531
2532 #else /* __lint */
2533
2534 #if defined(__amd64)
2535
2536 /*
2537 * Note that we don't save and reload the arguments here
2538 * because their values are not altered in the copy path.
2539 * Additionally, when successful, the smap_enable jmp will
2540 * actually return us to our original caller.
2541 */
2542
2543 #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2544 ENTRY(NAME) \
2545 movq %gs:CPU_THREAD, %r9; \
2546 cmpq kernelbase(%rip), %rdi; \
2547 jae 1f; \
2548 leaq _flt_/**/NAME, %rdx; \
2549 movq %rdx, T_LOFAULT(%r9); \
2550 SMAP_DISABLE_INSTR(DISNUM) \
2551 INSTR (%rdi), REG; \
2552 movq $0, T_LOFAULT(%r9); \
2553 INSTR REG, (%rsi); \
2554 xorl %eax, %eax; \
2555 SMAP_ENABLE_INSTR(EN1) \
2556 ret; \
2557 _flt_/**/NAME: \
2558 SMAP_ENABLE_INSTR(EN2) \
2559 movq $0, T_LOFAULT(%r9); \
2560 1: \
2561 movq T_COPYOPS(%r9), %rax; \
2562 cmpq $0, %rax; \
2563 jz 2f; \
2564 jmp *COPYOP(%rax); \
2565 2: \
2566 movl $-1, %eax; \
2567 ret; \
2568 SET_SIZE(NAME)
2569
2570 FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2571 FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2572 FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2573 FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2574
2575 #elif defined(__i386)
2576
2577 #define FUWORD(NAME, INSTR, REG, COPYOP) \
2578 ENTRY(NAME) \
2579 movl %gs:CPU_THREAD, %ecx; \
2580 movl kernelbase, %eax; \
2581 cmpl %eax, 4(%esp); \
2582 jae 1f; \
2583 lea _flt_/**/NAME, %edx; \
2584 movl %edx, T_LOFAULT(%ecx); \
2585 movl 4(%esp), %eax; \
2586 movl 8(%esp), %edx; \
2587 INSTR (%eax), REG; \
2588 movl $0, T_LOFAULT(%ecx); \
2589 INSTR REG, (%edx); \
2590 xorl %eax, %eax; \
2591 ret; \
2592 _flt_/**/NAME: \
2593 movl $0, T_LOFAULT(%ecx); \
2594 1: \
2595 movl T_COPYOPS(%ecx), %eax; \
2596 cmpl $0, %eax; \
2597 jz 2f; \
2598 jmp *COPYOP(%eax); \
2599 2: \
2600 movl $-1, %eax; \
2601 ret; \
2602 SET_SIZE(NAME)
2603
2604 FUWORD(fuword32, movl, %eax, CP_FUWORD32)
2605 FUWORD(fuword16, movw, %ax, CP_FUWORD16)
2606 FUWORD(fuword8, movb, %al, CP_FUWORD8)
2607
2608 #endif /* __i386 */
2609
2610 #undef FUWORD
2611
2612 #endif /* __lint */
2613
2614 /*
2615 * Set user word.
2616 */
2617
2618 #if defined(__lint)
2619
2620 #if defined(__amd64)
2621
2622 /* ARGSUSED */
2623 int
2624 suword64(void *addr, uint64_t value)
2625 { return (0); }
2626
2627 #endif
2628
2629 /* ARGSUSED */
2630 int
2631 suword32(void *addr, uint32_t value)
2632 { return (0); }
2633
2634 /* ARGSUSED */
2635 int
2636 suword16(void *addr, uint16_t value)
2637 { return (0); }
2638
2639 /* ARGSUSED */
2640 int
2641 suword8(void *addr, uint8_t value)
2642 { return (0); }
2643
2644 #else /* lint */
2645
2646 #if defined(__amd64)
2647
2648 /*
2649 * Note that we don't save and reload the arguments here
2650 * because their values are not altered in the copy path.
2651 */
2652
2653 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2654 ENTRY(NAME) \
2655 movq %gs:CPU_THREAD, %r9; \
2656 cmpq kernelbase(%rip), %rdi; \
2657 jae 1f; \
2658 leaq _flt_/**/NAME, %rdx; \
2659 SMAP_DISABLE_INSTR(DISNUM) \
2660 movq %rdx, T_LOFAULT(%r9); \
2661 INSTR REG, (%rdi); \
2662 movq $0, T_LOFAULT(%r9); \
2663 xorl %eax, %eax; \
2664 SMAP_ENABLE_INSTR(EN1) \
2665 ret; \
2666 _flt_/**/NAME: \
2667 SMAP_ENABLE_INSTR(EN2) \
2668 movq $0, T_LOFAULT(%r9); \
2669 1: \
2670 movq T_COPYOPS(%r9), %rax; \
2671 cmpq $0, %rax; \
2672 jz 3f; \
2673 jmp *COPYOP(%rax); \
2674 3: \
2675 movl $-1, %eax; \
2676 ret; \
2677 SET_SIZE(NAME)
2678
2679 SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2680 SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2681 SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2682 SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2683
2684 #elif defined(__i386)
2685
2686 #define SUWORD(NAME, INSTR, REG, COPYOP) \
2687 ENTRY(NAME) \
2688 movl %gs:CPU_THREAD, %ecx; \
2689 movl kernelbase, %eax; \
2690 cmpl %eax, 4(%esp); \
2691 jae 1f; \
2692 lea _flt_/**/NAME, %edx; \
2693 movl %edx, T_LOFAULT(%ecx); \
2694 movl 4(%esp), %eax; \
2695 movl 8(%esp), %edx; \
2696 INSTR REG, (%eax); \
2697 movl $0, T_LOFAULT(%ecx); \
2698 xorl %eax, %eax; \
2699 ret; \
2700 _flt_/**/NAME: \
2701 movl $0, T_LOFAULT(%ecx); \
2702 1: \
2703 movl T_COPYOPS(%ecx), %eax; \
2704 cmpl $0, %eax; \
2705 jz 3f; \
2706 movl COPYOP(%eax), %ecx; \
2707 jmp *%ecx; \
2708 3: \
2709 movl $-1, %eax; \
2710 ret; \
2711 SET_SIZE(NAME)
2712
2713 SUWORD(suword32, movl, %edx, CP_SUWORD32)
2714 SUWORD(suword16, movw, %dx, CP_SUWORD16)
2715 SUWORD(suword8, movb, %dl, CP_SUWORD8)
2716
2717 #endif /* __i386 */
2718
2719 #undef SUWORD
2720
2721 #endif /* __lint */
2722
2723 #if defined(__lint)
2724
2725 #if defined(__amd64)
2726
2727 /*ARGSUSED*/
2728 void
2729 fuword64_noerr(const void *addr, uint64_t *dst)
2730 {}
2731
2732 #endif
2733
2734 /*ARGSUSED*/
2735 void
2736 fuword32_noerr(const void *addr, uint32_t *dst)
2737 {}
2738
2739 /*ARGSUSED*/
2740 void
2741 fuword8_noerr(const void *addr, uint8_t *dst)
2742 {}
2743
2744 /*ARGSUSED*/
2745 void
2746 fuword16_noerr(const void *addr, uint16_t *dst)
2747 {}
2748
2749 #else /* __lint */
2750
2751 #if defined(__amd64)
2752
2753 #define FUWORD_NOERR(NAME, INSTR, REG) \
2754 ENTRY(NAME) \
2755 cmpq kernelbase(%rip), %rdi; \
2756 cmovnbq kernelbase(%rip), %rdi; \
2757 INSTR (%rdi), REG; \
2758 INSTR REG, (%rsi); \
2759 ret; \
2760 SET_SIZE(NAME)
2761
2762 FUWORD_NOERR(fuword64_noerr, movq, %rax)
2763 FUWORD_NOERR(fuword32_noerr, movl, %eax)
2764 FUWORD_NOERR(fuword16_noerr, movw, %ax)
2765 FUWORD_NOERR(fuword8_noerr, movb, %al)
2766
2767 #elif defined(__i386)
2768
2769 #define FUWORD_NOERR(NAME, INSTR, REG) \
2770 ENTRY(NAME) \
2771 movl 4(%esp), %eax; \
2772 cmpl kernelbase, %eax; \
2773 jb 1f; \
2774 movl kernelbase, %eax; \
2775 1: movl 8(%esp), %edx; \
2776 INSTR (%eax), REG; \
2777 INSTR REG, (%edx); \
2778 ret; \
2779 SET_SIZE(NAME)
2780
2781 FUWORD_NOERR(fuword32_noerr, movl, %ecx)
2782 FUWORD_NOERR(fuword16_noerr, movw, %cx)
2783 FUWORD_NOERR(fuword8_noerr, movb, %cl)
2784
2785 #endif /* __i386 */
2786
2787 #undef FUWORD_NOERR
2788
2789 #endif /* __lint */
2790
2791 #if defined(__lint)
2792
2793 #if defined(__amd64)
2794
2795 /*ARGSUSED*/
2796 void
2797 suword64_noerr(void *addr, uint64_t value)
2798 {}
2799
2800 #endif
2801
2802 /*ARGSUSED*/
2803 void
2804 suword32_noerr(void *addr, uint32_t value)
2805 {}
2806
2807 /*ARGSUSED*/
2808 void
2809 suword16_noerr(void *addr, uint16_t value)
2810 {}
2811
2812 /*ARGSUSED*/
2813 void
2814 suword8_noerr(void *addr, uint8_t value)
2815 {}
2816
2817 #else /* lint */
2818
2819 #if defined(__amd64)
2820
2821 #define SUWORD_NOERR(NAME, INSTR, REG) \
2822 ENTRY(NAME) \
2823 cmpq kernelbase(%rip), %rdi; \
2824 cmovnbq kernelbase(%rip), %rdi; \
2825 INSTR REG, (%rdi); \
2826 ret; \
2827 SET_SIZE(NAME)
2828
2829 SUWORD_NOERR(suword64_noerr, movq, %rsi)
2830 SUWORD_NOERR(suword32_noerr, movl, %esi)
2831 SUWORD_NOERR(suword16_noerr, movw, %si)
2832 SUWORD_NOERR(suword8_noerr, movb, %sil)
2833
2834 #elif defined(__i386)
2835
2836 #define SUWORD_NOERR(NAME, INSTR, REG) \
2837 ENTRY(NAME) \
2838 movl 4(%esp), %eax; \
2839 cmpl kernelbase, %eax; \
2840 jb 1f; \
2841 movl kernelbase, %eax; \
2842 1: \
2843 movl 8(%esp), %edx; \
2844 INSTR REG, (%eax); \
2845 ret; \
2846 SET_SIZE(NAME)
2847
2848 SUWORD_NOERR(suword32_noerr, movl, %edx)
2849 SUWORD_NOERR(suword16_noerr, movw, %dx)
2850 SUWORD_NOERR(suword8_noerr, movb, %dl)
2851
2852 #endif /* __i386 */
2853
2854 #undef SUWORD_NOERR
2855
2856 #endif /* lint */
2857
2858
2859 #if defined(__lint)
2860
2861 /*ARGSUSED*/
2862 int
2863 subyte(void *addr, uchar_t value)
2864 { return (0); }
2865
2866 /*ARGSUSED*/
2867 void
2868 subyte_noerr(void *addr, uchar_t value)
2869 {}
2870
2871 /*ARGSUSED*/
2872 int
2873 fulword(const void *addr, ulong_t *valuep)
2874 { return (0); }
2875
2876 /*ARGSUSED*/
2877 void
2878 fulword_noerr(const void *addr, ulong_t *valuep)
2879 {}
2880
2881 /*ARGSUSED*/
2882 int
2883 sulword(void *addr, ulong_t valuep)
2884 { return (0); }
2885
2886 /*ARGSUSED*/
2887 void
2888 sulword_noerr(void *addr, ulong_t valuep)
2889 {}
2890
2891 #else
2892
2893 .weak subyte
2894 subyte=suword8
2895 .weak subyte_noerr
2896 subyte_noerr=suword8_noerr
2897
2898 #if defined(__amd64)
2899
2900 .weak fulword
2901 fulword=fuword64
2902 .weak fulword_noerr
2903 fulword_noerr=fuword64_noerr
2904 .weak sulword
2905 sulword=suword64
2906 .weak sulword_noerr
2907 sulword_noerr=suword64_noerr
2908
2909 #elif defined(__i386)
2910
2911 .weak fulword
2912 fulword=fuword32
2913 .weak fulword_noerr
2914 fulword_noerr=fuword32_noerr
2915 .weak sulword
2916 sulword=suword32
2917 .weak sulword_noerr
2918 sulword_noerr=suword32_noerr
2919
2920 #endif /* __i386 */
2921
2922 #endif /* __lint */
2923
2924 #if defined(__lint)
2925
2926 /*
2927 * Copy a block of storage - must not overlap (from + len <= to).
2928 * No fault handler installed (to be called under on_fault())
2929 */
2930
2931 /* ARGSUSED */
2932 void
2933 copyout_noerr(const void *kfrom, void *uto, size_t count)
2934 {}
2935
2936 /* ARGSUSED */
2937 void
2938 copyin_noerr(const void *ufrom, void *kto, size_t count)
2939 {}
2940
2941 /*
2942 * Zero a block of storage in user space
2943 */
2944
2945 /* ARGSUSED */
2946 void
2947 uzero(void *addr, size_t count)
2948 {}
2949
2950 /*
2951 * copy a block of storage in user space
2952 */
2953
2954 /* ARGSUSED */
2955 void
2956 ucopy(const void *ufrom, void *uto, size_t ulength)
2957 {}
2958
2959 /*
2960 * copy a string in user space
2961 */
2962
2963 /* ARGSUSED */
2964 void
2965 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied)
2966 {}
2967
2968 #else /* __lint */
2969
2970 #if defined(__amd64)
2971
2972 ENTRY(copyin_noerr)
2973 movq kernelbase(%rip), %rax
2974 #ifdef DEBUG
2975 cmpq %rax, %rsi /* %rsi = kto */
2976 jae 1f
2977 leaq .cpyin_ne_pmsg(%rip), %rdi
2978 jmp call_panic /* setup stack and call panic */
2979 1:
2980 #endif
2981 cmpq %rax, %rdi /* ufrom < kernelbase */
2982 jb do_copy
2983 movq %rax, %rdi /* force fault at kernelbase */
2984 jmp do_copy
2985 SET_SIZE(copyin_noerr)
2986
2987 ENTRY(copyout_noerr)
2988 movq kernelbase(%rip), %rax
2989 #ifdef DEBUG
2990 cmpq %rax, %rdi /* %rdi = kfrom */
2991 jae 1f
2992 leaq .cpyout_ne_pmsg(%rip), %rdi
2993 jmp call_panic /* setup stack and call panic */
2994 1:
2995 #endif
2996 cmpq %rax, %rsi /* uto < kernelbase */
2997 jb do_copy
2998 movq %rax, %rsi /* force fault at kernelbase */
2999 jmp do_copy
3000 SET_SIZE(copyout_noerr)
3001
3002 ENTRY(uzero)
3003 movq kernelbase(%rip), %rax
3004 cmpq %rax, %rdi
3005 jb do_zero
3006 movq %rax, %rdi /* force fault at kernelbase */
3007 jmp do_zero
3008 SET_SIZE(uzero)
3009
3010 ENTRY(ucopy)
3011 movq kernelbase(%rip), %rax
3012 cmpq %rax, %rdi
3013 cmovaeq %rax, %rdi /* force fault at kernelbase */
3014 cmpq %rax, %rsi
3015 cmovaeq %rax, %rsi /* force fault at kernelbase */
3016 jmp do_copy
3017 SET_SIZE(ucopy)
3018
3019 /*
3020 * Note, the frame pointer is required here becuase do_copystr expects
3021 * to be able to pop it off!
3022 */
3023 ENTRY(ucopystr)
3024 pushq %rbp
3025 movq %rsp, %rbp
3026 movq kernelbase(%rip), %rax
3027 cmpq %rax, %rdi
3028 cmovaeq %rax, %rdi /* force fault at kernelbase */
3029 cmpq %rax, %rsi
3030 cmovaeq %rax, %rsi /* force fault at kernelbase */
3031 /* do_copystr expects lofault address in %r8 */
3032 /* do_copystr expects whether or not we need smap in %r10 */
3033 xorl %r10d, %r10d
3034 movq %gs:CPU_THREAD, %r8
3035 movq T_LOFAULT(%r8), %r8
3036 jmp do_copystr
3037 SET_SIZE(ucopystr)
3038
3039 #elif defined(__i386)
3040
3041 ENTRY(copyin_noerr)
3042 movl kernelbase, %eax
3043 #ifdef DEBUG
3044 cmpl %eax, 8(%esp)
3045 jae 1f
3046 pushl $.cpyin_ne_pmsg
3047 call panic
3048 1:
3049 #endif
3050 cmpl %eax, 4(%esp)
3051 jb do_copy
3052 movl %eax, 4(%esp) /* force fault at kernelbase */
3053 jmp do_copy
3054 SET_SIZE(copyin_noerr)
3055
3056 ENTRY(copyout_noerr)
3057 movl kernelbase, %eax
3058 #ifdef DEBUG
3059 cmpl %eax, 4(%esp)
3060 jae 1f
3061 pushl $.cpyout_ne_pmsg
3062 call panic
3063 1:
3064 #endif
3065 cmpl %eax, 8(%esp)
3066 jb do_copy
3067 movl %eax, 8(%esp) /* force fault at kernelbase */
3068 jmp do_copy
3069 SET_SIZE(copyout_noerr)
3070
3071 ENTRY(uzero)
3072 movl kernelbase, %eax
3073 cmpl %eax, 4(%esp)
3074 jb do_zero
3075 movl %eax, 4(%esp) /* force fault at kernelbase */
3076 jmp do_zero
3077 SET_SIZE(uzero)
3078
3079 ENTRY(ucopy)
3080 movl kernelbase, %eax
3081 cmpl %eax, 4(%esp)
3082 jb 1f
3083 movl %eax, 4(%esp) /* force fault at kernelbase */
3084 1:
3085 cmpl %eax, 8(%esp)
3086 jb do_copy
3087 movl %eax, 8(%esp) /* force fault at kernelbase */
3088 jmp do_copy
3089 SET_SIZE(ucopy)
3090
3091 ENTRY(ucopystr)
3092 movl kernelbase, %eax
3093 cmpl %eax, 4(%esp)
3094 jb 1f
3095 movl %eax, 4(%esp) /* force fault at kernelbase */
3096 1:
3097 cmpl %eax, 8(%esp)
3098 jb 2f
3099 movl %eax, 8(%esp) /* force fault at kernelbase */
3100 2:
3101 /* do_copystr expects the lofault address in %eax */
3102 movl %gs:CPU_THREAD, %eax
3103 movl T_LOFAULT(%eax), %eax
3104 jmp do_copystr
3105 SET_SIZE(ucopystr)
3106
3107 #endif /* __i386 */
3108
3109 #ifdef DEBUG
3110 .data
3111 .kcopy_panic_msg:
3112 .string "kcopy: arguments below kernelbase"
3113 .bcopy_panic_msg:
3114 .string "bcopy: arguments below kernelbase"
3115 .kzero_panic_msg:
3116 .string "kzero: arguments below kernelbase"
3117 .bzero_panic_msg:
3118 .string "bzero: arguments below kernelbase"
3119 .copyin_panic_msg:
3120 .string "copyin: kaddr argument below kernelbase"
3121 .xcopyin_panic_msg:
3122 .string "xcopyin: kaddr argument below kernelbase"
3123 .copyout_panic_msg:
3124 .string "copyout: kaddr argument below kernelbase"
3125 .xcopyout_panic_msg:
3126 .string "xcopyout: kaddr argument below kernelbase"
3127 .copystr_panic_msg:
3128 .string "copystr: arguments in user space"
3129 .copyinstr_panic_msg:
3130 .string "copyinstr: kaddr argument not in kernel address space"
3131 .copyoutstr_panic_msg:
3132 .string "copyoutstr: kaddr argument not in kernel address space"
3133 .cpyin_ne_pmsg:
3134 .string "copyin_noerr: argument not in kernel address space"
3135 .cpyout_ne_pmsg:
3136 .string "copyout_noerr: argument not in kernel address space"
3137 #endif
3138
3139 #endif /* __lint */
3140
3141 #ifndef __lint
3142
3143 .data
3144 .align 4
3145 .globl _smap_enable_patch_count
3146 .type _smap_enable_patch_count,@object
3147 .size _smap_enable_patch_count, 4
3148 _smap_enable_patch_count:
3149 .long SMAP_ENABLE_COUNT
3150
3151 .globl _smap_disable_patch_count
3152 .type _smap_disable_patch_count,@object
3153 .size _smap_disable_patch_count, 4
3154 _smap_disable_patch_count:
3155 .long SMAP_DISABLE_COUNT
3156
3157 #endif /* __lint */
--- EOF ---