Print this page
11787 Kernel needs to be built with retpolines
11788 Kernel needs to generally use RSB stuffing
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: John Levon <john.levon@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/ia32/ml/copy.s
+++ new/usr/src/uts/intel/ia32/ml/copy.s
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 2009, Intel Corporation
28 28 * All rights reserved.
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
29 29 */
30 30
31 31 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
32 32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
33 33 /* All Rights Reserved */
34 34
35 35 /* Copyright (c) 1987, 1988 Microsoft Corporation */
36 36 /* All Rights Reserved */
37 37
38 38 /*
39 - * Copyright (c) 2018 Joyent, Inc.
39 + * Copyright 2019 Joyent, Inc.
40 40 */
41 41
42 42 #include <sys/errno.h>
43 43 #include <sys/asm_linkage.h>
44 44
45 45 #if defined(__lint)
46 46 #include <sys/types.h>
47 47 #include <sys/systm.h>
48 48 #else /* __lint */
49 49 #include "assym.h"
50 50 #endif /* __lint */
51 51
52 52 #define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
53 53 #define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
54 54 /*
55 55 * Non-temopral access (NTA) alignment requirement
56 56 */
57 57 #define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */
58 58 #define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1)
59 59 #define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */
60 60 #define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1)
61 61
62 62 /*
63 63 * With the introduction of Broadwell, Intel has introduced supervisor mode
64 64 * access protection -- SMAP. SMAP forces the kernel to set certain bits to
65 65 * enable access of user pages (AC in rflags, defines as PS_ACHK in
66 66 * <sys/psw.h>). One of the challenges is that the implementation of many of the
67 67 * userland copy routines directly use the kernel ones. For example, copyin and
68 68 * copyout simply go and jump to the do_copy_fault label and traditionally let
69 69 * those deal with the return for them. In fact, changing that is a can of frame
70 70 * pointers.
71 71 *
72 72 * Rules and Constraints:
73 73 *
74 74 * 1. For anything that's not in copy.s, we have it do explicit calls to the
75 75 * smap related code. It usually is in a position where it is able to. This is
76 76 * restricted to the following three places: DTrace, resume() in swtch.s and
77 77 * on_fault/no_fault. If you want to add it somewhere else, we should be
78 78 * thinking twice.
79 79 *
80 80 * 2. We try to toggle this at the smallest window possible. This means that if
81 81 * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
82 82 * other function, we will always leave with SMAP enabled (the kernel cannot
83 83 * access user pages).
84 84 *
85 85 * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
86 86 * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
87 87 * which already takes care of ensuring that SMAP is enabled and disabled. Note
88 88 * this means that when under an on_fault()/no_fault() handler, one must not
89 89 * call the non-*_noeer() routines.
90 90 *
91 91 * 4. The first thing we should do after coming out of an lofault handler is to
92 92 * make sure that we call smap_enable again to ensure that we are safely
93 93 * protected, as more often than not, we will have disabled smap to get there.
94 94 *
95 95 * 5. The SMAP functions, smap_enable and smap_disable may not touch any
96 96 * registers beyond those done by the call and ret. These routines may be called
97 97 * from arbitrary contexts in copy.s where we have slightly more special ABIs in
98 98 * place.
99 99 *
100 100 * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
101 101 * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and
102 102 * smap_disable()). If the number of these is changed, you must update the
103 103 * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
104 104 *
105 105 * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is
106 106 * no known technical reason preventing it from being enabled.
107 107 *
108 108 * 8. Generally this .s file is processed by a K&R style cpp. This means that it
109 109 * really has a lot of feelings about whitespace. In particular, if you have a
110 110 * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
111 111 *
112 112 * 9. The smap_enable and smap_disable functions should not generally be called.
113 113 * They exist such that DTrace and on_trap() may use them, that's it.
114 114 *
115 115 * 10. In general, the kernel has its own value for rflags that gets used. This
116 116 * is maintained in a few different places which vary based on how the thread
117 117 * comes into existence and whether it's a user thread. In general, when the
118 118 * kernel takes a trap, it always will set ourselves to a known set of flags,
119 119 * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
120 120 * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
↓ open down ↓ |
71 lines elided |
↑ open up ↑ |
121 121 * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
122 122 * where that gets masked off.
123 123 */
124 124
125 125 /*
126 126 * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
127 127 * "rep smovq" for large sizes. Performance data shows that many calls to
128 128 * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
129 129 * these small sizes unrolled code is used. For medium sizes loops writing
130 130 * 64-bytes per loop are used. Transition points were determined experimentally.
131 - */
131 + */
132 132 #define BZERO_USE_REP (1024)
133 133 #define BCOPY_DFLT_REP (128)
134 134 #define BCOPY_NHM_REP (768)
135 135
136 136 /*
137 137 * Copy a block of storage, returning an error code if `from' or
138 138 * `to' takes a kernel pagefault which cannot be resolved.
139 139 * Returns errno value on pagefault error, 0 if all ok
140 140 */
141 141
142 142 /*
143 143 * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
144 144 * additional call instructions.
145 145 */
146 146 #if defined(__amd64)
147 147 #define SMAP_DISABLE_COUNT 16
148 148 #define SMAP_ENABLE_COUNT 26
149 149 #elif defined(__i386)
150 150 #define SMAP_DISABLE_COUNT 0
151 151 #define SMAP_ENABLE_COUNT 0
152 152 #endif
153 153
154 154 #define SMAP_DISABLE_INSTR(ITER) \
155 155 .globl _smap_disable_patch_/**/ITER; \
156 156 _smap_disable_patch_/**/ITER/**/:; \
157 157 nop; nop; nop;
158 158
159 159 #define SMAP_ENABLE_INSTR(ITER) \
160 160 .globl _smap_enable_patch_/**/ITER; \
161 161 _smap_enable_patch_/**/ITER/**/:; \
162 162 nop; nop; nop;
163 163
164 164 #if defined(__lint)
165 165
166 166 /* ARGSUSED */
167 167 int
168 168 kcopy(const void *from, void *to, size_t count)
169 169 { return (0); }
170 170
171 171 #else /* __lint */
↓ open down ↓ |
30 lines elided |
↑ open up ↑ |
172 172
173 173 .globl kernelbase
174 174 .globl postbootkernelbase
175 175
176 176 #if defined(__amd64)
177 177
178 178 ENTRY(kcopy)
179 179 pushq %rbp
180 180 movq %rsp, %rbp
181 181 #ifdef DEBUG
182 - cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
182 + cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
183 183 jb 0f
184 184 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
185 185 jnb 1f
186 186 0: leaq .kcopy_panic_msg(%rip), %rdi
187 187 xorl %eax, %eax
188 188 call panic
189 189 1:
190 190 #endif
191 191 /*
192 192 * pass lofault value as 4th argument to do_copy_fault
193 193 */
194 194 leaq _kcopy_copyerr(%rip), %rcx
195 195 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
196 196
197 197 do_copy_fault:
198 198 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
199 199 movq %rcx, T_LOFAULT(%r9) /* new lofault */
200 200 call bcopy_altentry
201 201 xorl %eax, %eax /* return 0 (success) */
202 202 SMAP_ENABLE_INSTR(0)
203 203
204 204 /*
205 205 * A fault during do_copy_fault is indicated through an errno value
206 206 * in %rax and we iretq from the trap handler to here.
207 207 */
208 208 _kcopy_copyerr:
209 209 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
210 210 leave
211 211 ret
212 212 SET_SIZE(kcopy)
213 213
214 214 #elif defined(__i386)
215 215
216 216 #define ARG_FROM 8
217 217 #define ARG_TO 12
218 218 #define ARG_COUNT 16
219 219
220 220 ENTRY(kcopy)
221 221 #ifdef DEBUG
222 222 pushl %ebp
223 223 movl %esp, %ebp
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
224 224 movl postbootkernelbase, %eax
225 225 cmpl %eax, ARG_FROM(%ebp)
226 226 jb 0f
227 227 cmpl %eax, ARG_TO(%ebp)
228 228 jnb 1f
229 229 0: pushl $.kcopy_panic_msg
230 230 call panic
231 231 1: popl %ebp
232 232 #endif
233 233 lea _kcopy_copyerr, %eax /* lofault value */
234 - movl %gs:CPU_THREAD, %edx
234 + movl %gs:CPU_THREAD, %edx
235 235
236 236 do_copy_fault:
237 237 pushl %ebp
238 238 movl %esp, %ebp /* setup stack frame */
239 239 pushl %esi
240 240 pushl %edi /* save registers */
241 241
242 242 movl T_LOFAULT(%edx), %edi
243 243 pushl %edi /* save the current lofault */
244 244 movl %eax, T_LOFAULT(%edx) /* new lofault */
245 245
246 246 movl ARG_COUNT(%ebp), %ecx
247 247 movl ARG_FROM(%ebp), %esi
248 248 movl ARG_TO(%ebp), %edi
249 249 shrl $2, %ecx /* word count */
250 250 rep
251 251 smovl
252 252 movl ARG_COUNT(%ebp), %ecx
253 253 andl $3, %ecx /* bytes left over */
254 254 rep
255 255 smovb
256 256 xorl %eax, %eax
257 257
258 258 /*
259 259 * A fault during do_copy_fault is indicated through an errno value
260 260 * in %eax and we iret from the trap handler to here.
261 261 */
262 262 _kcopy_copyerr:
263 263 popl %ecx
264 264 popl %edi
265 265 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */
266 266 popl %esi
267 267 popl %ebp
268 268 ret
269 269 SET_SIZE(kcopy)
270 270
271 271 #undef ARG_FROM
272 272 #undef ARG_TO
273 273 #undef ARG_COUNT
274 274
275 275 #endif /* __i386 */
276 276 #endif /* __lint */
277 277
278 278 #if defined(__lint)
279 279
280 280 /*
281 281 * Copy a block of storage. Similar to kcopy but uses non-temporal
282 282 * instructions.
283 283 */
284 284
285 285 /* ARGSUSED */
286 286 int
287 287 kcopy_nta(const void *from, void *to, size_t count, int copy_cached)
288 288 { return (0); }
289 289
290 290 #else /* __lint */
291 291
292 292 #if defined(__amd64)
293 293
294 294 #define COPY_LOOP_INIT(src, dst, cnt) \
295 295 addq cnt, src; \
296 296 addq cnt, dst; \
297 297 shrq $3, cnt; \
298 298 neg cnt
299 299
300 300 /* Copy 16 bytes per loop. Uses %rax and %r8 */
301 301 #define COPY_LOOP_BODY(src, dst, cnt) \
302 302 prefetchnta 0x100(src, cnt, 8); \
↓ open down ↓ |
58 lines elided |
↑ open up ↑ |
303 303 movq (src, cnt, 8), %rax; \
304 304 movq 0x8(src, cnt, 8), %r8; \
305 305 movnti %rax, (dst, cnt, 8); \
306 306 movnti %r8, 0x8(dst, cnt, 8); \
307 307 addq $2, cnt
308 308
309 309 ENTRY(kcopy_nta)
310 310 pushq %rbp
311 311 movq %rsp, %rbp
312 312 #ifdef DEBUG
313 - cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
313 + cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
314 314 jb 0f
315 315 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
316 316 jnb 1f
317 317 0: leaq .kcopy_panic_msg(%rip), %rdi
318 318 xorl %eax, %eax
319 319 call panic
320 320 1:
321 321 #endif
322 322
323 323 movq %gs:CPU_THREAD, %r9
324 324 cmpq $0, %rcx /* No non-temporal access? */
325 325 /*
326 326 * pass lofault value as 4th argument to do_copy_fault
327 327 */
328 328 leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */
329 329 jnz do_copy_fault /* use regular access */
330 330 /*
331 331 * Make sure cnt is >= KCOPY_MIN_SIZE
332 332 */
333 333 cmpq $KCOPY_MIN_SIZE, %rdx
334 334 jb do_copy_fault
335 335
336 336 /*
337 337 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
338 338 * count is COUNT_ALIGN_SIZE aligned.
339 339 */
340 340 movq %rdi, %r10
341 341 orq %rsi, %r10
342 342 andq $NTA_ALIGN_MASK, %r10
343 343 orq %rdx, %r10
344 344 andq $COUNT_ALIGN_MASK, %r10
345 345 jnz do_copy_fault
346 346
347 347 ALTENTRY(do_copy_fault_nta)
348 348 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
349 349 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
350 350 movq %rcx, T_LOFAULT(%r9) /* new lofault */
351 351
352 352 /*
353 353 * COPY_LOOP_BODY uses %rax and %r8
354 354 */
355 355 COPY_LOOP_INIT(%rdi, %rsi, %rdx)
356 356 2: COPY_LOOP_BODY(%rdi, %rsi, %rdx)
357 357 jnz 2b
358 358
359 359 mfence
360 360 xorl %eax, %eax /* return 0 (success) */
361 361 SMAP_ENABLE_INSTR(1)
362 362
363 363 _kcopy_nta_copyerr:
364 364 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
365 365 leave
366 366 ret
367 367 SET_SIZE(do_copy_fault_nta)
368 368 SET_SIZE(kcopy_nta)
369 369
370 370 #elif defined(__i386)
371 371
372 372 #define ARG_FROM 8
373 373 #define ARG_TO 12
374 374 #define ARG_COUNT 16
375 375
376 376 #define COPY_LOOP_INIT(src, dst, cnt) \
377 377 addl cnt, src; \
378 378 addl cnt, dst; \
379 379 shrl $3, cnt; \
380 380 neg cnt
381 381
382 382 #define COPY_LOOP_BODY(src, dst, cnt) \
383 383 prefetchnta 0x100(src, cnt, 8); \
384 384 movl (src, cnt, 8), %esi; \
385 385 movnti %esi, (dst, cnt, 8); \
386 386 movl 0x4(src, cnt, 8), %esi; \
387 387 movnti %esi, 0x4(dst, cnt, 8); \
388 388 movl 0x8(src, cnt, 8), %esi; \
389 389 movnti %esi, 0x8(dst, cnt, 8); \
390 390 movl 0xc(src, cnt, 8), %esi; \
391 391 movnti %esi, 0xc(dst, cnt, 8); \
392 392 addl $2, cnt
393 393
394 394 /*
395 395 * kcopy_nta is not implemented for 32-bit as no performance
396 396 * improvement was shown. We simply jump directly to kcopy
397 397 * and discard the 4 arguments.
398 398 */
↓ open down ↓ |
75 lines elided |
↑ open up ↑ |
399 399 ENTRY(kcopy_nta)
400 400 jmp kcopy
401 401
402 402 lea _kcopy_nta_copyerr, %eax /* lofault value */
403 403 ALTENTRY(do_copy_fault_nta)
404 404 pushl %ebp
405 405 movl %esp, %ebp /* setup stack frame */
406 406 pushl %esi
407 407 pushl %edi
408 408
409 - movl %gs:CPU_THREAD, %edx
409 + movl %gs:CPU_THREAD, %edx
410 410 movl T_LOFAULT(%edx), %edi
411 411 pushl %edi /* save the current lofault */
412 412 movl %eax, T_LOFAULT(%edx) /* new lofault */
413 413
414 414 /* COPY_LOOP_BODY needs to use %esi */
415 415 movl ARG_COUNT(%ebp), %ecx
416 416 movl ARG_FROM(%ebp), %edi
417 417 movl ARG_TO(%ebp), %eax
418 418 COPY_LOOP_INIT(%edi, %eax, %ecx)
419 419 1: COPY_LOOP_BODY(%edi, %eax, %ecx)
420 420 jnz 1b
421 421 mfence
422 422
423 423 xorl %eax, %eax
424 424 _kcopy_nta_copyerr:
425 425 popl %ecx
426 426 popl %edi
427 427 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */
428 428 popl %esi
429 429 leave
430 430 ret
431 431 SET_SIZE(do_copy_fault_nta)
432 432 SET_SIZE(kcopy_nta)
433 433
434 434 #undef ARG_FROM
435 435 #undef ARG_TO
436 436 #undef ARG_COUNT
437 437
438 438 #endif /* __i386 */
439 439 #endif /* __lint */
440 440
441 441 #if defined(__lint)
442 442
443 443 /* ARGSUSED */
444 444 void
445 445 bcopy(const void *from, void *to, size_t count)
446 446 {}
447 447
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
448 448 #else /* __lint */
449 449
450 450 #if defined(__amd64)
451 451
452 452 ENTRY(bcopy)
453 453 #ifdef DEBUG
454 454 orq %rdx, %rdx /* %rdx = count */
455 455 jz 1f
456 456 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
457 457 jb 0f
458 - cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
458 + cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
459 459 jnb 1f
460 460 0: leaq .bcopy_panic_msg(%rip), %rdi
461 461 jmp call_panic /* setup stack and call panic */
462 462 1:
463 463 #endif
464 464 /*
465 465 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
466 466 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
467 467 * uses these registers in future they must be saved and restored.
468 468 */
469 469 ALTENTRY(bcopy_altentry)
470 470 do_copy:
471 471 #define L(s) .bcopy/**/s
472 472 cmpq $0x50, %rdx /* 80 */
473 473 jae bcopy_ck_size
474 474
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
475 475 /*
476 476 * Performance data shows many caller's copy small buffers. So for
477 477 * best perf for these sizes unrolled code is used. Store data without
478 478 * worrying about alignment.
479 479 */
480 480 leaq L(fwdPxQx)(%rip), %r10
481 481 addq %rdx, %rdi
482 482 addq %rdx, %rsi
483 483 movslq (%r10,%rdx,4), %rcx
484 484 leaq (%rcx,%r10,1), %r10
485 - jmpq *%r10
485 + INDIRECT_JMP_REG(r10)
486 486
487 487 .p2align 4
488 488 L(fwdPxQx):
489 489 .int L(P0Q0)-L(fwdPxQx) /* 0 */
490 490 .int L(P1Q0)-L(fwdPxQx)
491 491 .int L(P2Q0)-L(fwdPxQx)
492 492 .int L(P3Q0)-L(fwdPxQx)
493 493 .int L(P4Q0)-L(fwdPxQx)
494 494 .int L(P5Q0)-L(fwdPxQx)
495 495 .int L(P6Q0)-L(fwdPxQx)
496 - .int L(P7Q0)-L(fwdPxQx)
496 + .int L(P7Q0)-L(fwdPxQx)
497 497
498 498 .int L(P0Q1)-L(fwdPxQx) /* 8 */
499 499 .int L(P1Q1)-L(fwdPxQx)
500 500 .int L(P2Q1)-L(fwdPxQx)
501 501 .int L(P3Q1)-L(fwdPxQx)
502 502 .int L(P4Q1)-L(fwdPxQx)
503 503 .int L(P5Q1)-L(fwdPxQx)
504 504 .int L(P6Q1)-L(fwdPxQx)
505 - .int L(P7Q1)-L(fwdPxQx)
505 + .int L(P7Q1)-L(fwdPxQx)
506 506
507 507 .int L(P0Q2)-L(fwdPxQx) /* 16 */
508 508 .int L(P1Q2)-L(fwdPxQx)
509 509 .int L(P2Q2)-L(fwdPxQx)
510 510 .int L(P3Q2)-L(fwdPxQx)
511 511 .int L(P4Q2)-L(fwdPxQx)
512 512 .int L(P5Q2)-L(fwdPxQx)
513 513 .int L(P6Q2)-L(fwdPxQx)
514 - .int L(P7Q2)-L(fwdPxQx)
514 + .int L(P7Q2)-L(fwdPxQx)
515 515
516 516 .int L(P0Q3)-L(fwdPxQx) /* 24 */
517 517 .int L(P1Q3)-L(fwdPxQx)
518 518 .int L(P2Q3)-L(fwdPxQx)
519 519 .int L(P3Q3)-L(fwdPxQx)
520 520 .int L(P4Q3)-L(fwdPxQx)
521 521 .int L(P5Q3)-L(fwdPxQx)
522 522 .int L(P6Q3)-L(fwdPxQx)
523 - .int L(P7Q3)-L(fwdPxQx)
523 + .int L(P7Q3)-L(fwdPxQx)
524 524
525 525 .int L(P0Q4)-L(fwdPxQx) /* 32 */
526 526 .int L(P1Q4)-L(fwdPxQx)
527 527 .int L(P2Q4)-L(fwdPxQx)
528 528 .int L(P3Q4)-L(fwdPxQx)
529 529 .int L(P4Q4)-L(fwdPxQx)
530 530 .int L(P5Q4)-L(fwdPxQx)
531 531 .int L(P6Q4)-L(fwdPxQx)
532 - .int L(P7Q4)-L(fwdPxQx)
532 + .int L(P7Q4)-L(fwdPxQx)
533 533
534 534 .int L(P0Q5)-L(fwdPxQx) /* 40 */
535 535 .int L(P1Q5)-L(fwdPxQx)
536 536 .int L(P2Q5)-L(fwdPxQx)
537 537 .int L(P3Q5)-L(fwdPxQx)
538 538 .int L(P4Q5)-L(fwdPxQx)
539 539 .int L(P5Q5)-L(fwdPxQx)
540 540 .int L(P6Q5)-L(fwdPxQx)
541 - .int L(P7Q5)-L(fwdPxQx)
541 + .int L(P7Q5)-L(fwdPxQx)
542 542
543 543 .int L(P0Q6)-L(fwdPxQx) /* 48 */
544 544 .int L(P1Q6)-L(fwdPxQx)
545 545 .int L(P2Q6)-L(fwdPxQx)
546 546 .int L(P3Q6)-L(fwdPxQx)
547 547 .int L(P4Q6)-L(fwdPxQx)
548 548 .int L(P5Q6)-L(fwdPxQx)
549 549 .int L(P6Q6)-L(fwdPxQx)
550 - .int L(P7Q6)-L(fwdPxQx)
550 + .int L(P7Q6)-L(fwdPxQx)
551 551
552 552 .int L(P0Q7)-L(fwdPxQx) /* 56 */
553 553 .int L(P1Q7)-L(fwdPxQx)
554 554 .int L(P2Q7)-L(fwdPxQx)
555 555 .int L(P3Q7)-L(fwdPxQx)
556 556 .int L(P4Q7)-L(fwdPxQx)
557 557 .int L(P5Q7)-L(fwdPxQx)
558 558 .int L(P6Q7)-L(fwdPxQx)
559 - .int L(P7Q7)-L(fwdPxQx)
559 + .int L(P7Q7)-L(fwdPxQx)
560 560
561 561 .int L(P0Q8)-L(fwdPxQx) /* 64 */
562 562 .int L(P1Q8)-L(fwdPxQx)
563 563 .int L(P2Q8)-L(fwdPxQx)
564 564 .int L(P3Q8)-L(fwdPxQx)
565 565 .int L(P4Q8)-L(fwdPxQx)
566 566 .int L(P5Q8)-L(fwdPxQx)
567 567 .int L(P6Q8)-L(fwdPxQx)
568 568 .int L(P7Q8)-L(fwdPxQx)
569 569
570 570 .int L(P0Q9)-L(fwdPxQx) /* 72 */
571 571 .int L(P1Q9)-L(fwdPxQx)
572 572 .int L(P2Q9)-L(fwdPxQx)
573 573 .int L(P3Q9)-L(fwdPxQx)
574 574 .int L(P4Q9)-L(fwdPxQx)
575 575 .int L(P5Q9)-L(fwdPxQx)
576 576 .int L(P6Q9)-L(fwdPxQx)
577 577 .int L(P7Q9)-L(fwdPxQx) /* 79 */
578 578
579 579 .p2align 4
580 580 L(P0Q9):
581 581 mov -0x48(%rdi), %rcx
582 582 mov %rcx, -0x48(%rsi)
583 583 L(P0Q8):
584 584 mov -0x40(%rdi), %r10
585 585 mov %r10, -0x40(%rsi)
586 586 L(P0Q7):
587 587 mov -0x38(%rdi), %r8
588 588 mov %r8, -0x38(%rsi)
589 589 L(P0Q6):
590 590 mov -0x30(%rdi), %rcx
591 591 mov %rcx, -0x30(%rsi)
592 592 L(P0Q5):
593 593 mov -0x28(%rdi), %r10
594 594 mov %r10, -0x28(%rsi)
595 595 L(P0Q4):
596 596 mov -0x20(%rdi), %r8
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
597 597 mov %r8, -0x20(%rsi)
598 598 L(P0Q3):
599 599 mov -0x18(%rdi), %rcx
600 600 mov %rcx, -0x18(%rsi)
601 601 L(P0Q2):
602 602 mov -0x10(%rdi), %r10
603 603 mov %r10, -0x10(%rsi)
604 604 L(P0Q1):
605 605 mov -0x8(%rdi), %r8
606 606 mov %r8, -0x8(%rsi)
607 -L(P0Q0):
608 - ret
607 +L(P0Q0):
608 + ret
609 609
610 610 .p2align 4
611 611 L(P1Q9):
612 612 mov -0x49(%rdi), %r8
613 613 mov %r8, -0x49(%rsi)
614 614 L(P1Q8):
615 615 mov -0x41(%rdi), %rcx
616 616 mov %rcx, -0x41(%rsi)
617 617 L(P1Q7):
618 618 mov -0x39(%rdi), %r10
619 619 mov %r10, -0x39(%rsi)
620 620 L(P1Q6):
621 621 mov -0x31(%rdi), %r8
622 622 mov %r8, -0x31(%rsi)
623 623 L(P1Q5):
624 624 mov -0x29(%rdi), %rcx
625 625 mov %rcx, -0x29(%rsi)
626 626 L(P1Q4):
627 627 mov -0x21(%rdi), %r10
628 628 mov %r10, -0x21(%rsi)
629 629 L(P1Q3):
630 630 mov -0x19(%rdi), %r8
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
631 631 mov %r8, -0x19(%rsi)
632 632 L(P1Q2):
633 633 mov -0x11(%rdi), %rcx
634 634 mov %rcx, -0x11(%rsi)
635 635 L(P1Q1):
636 636 mov -0x9(%rdi), %r10
637 637 mov %r10, -0x9(%rsi)
638 638 L(P1Q0):
639 639 movzbq -0x1(%rdi), %r8
640 640 mov %r8b, -0x1(%rsi)
641 - ret
641 + ret
642 642
643 643 .p2align 4
644 644 L(P2Q9):
645 645 mov -0x4a(%rdi), %r8
646 646 mov %r8, -0x4a(%rsi)
647 647 L(P2Q8):
648 648 mov -0x42(%rdi), %rcx
649 649 mov %rcx, -0x42(%rsi)
650 650 L(P2Q7):
651 651 mov -0x3a(%rdi), %r10
652 652 mov %r10, -0x3a(%rsi)
653 653 L(P2Q6):
654 654 mov -0x32(%rdi), %r8
655 655 mov %r8, -0x32(%rsi)
656 656 L(P2Q5):
657 657 mov -0x2a(%rdi), %rcx
658 658 mov %rcx, -0x2a(%rsi)
659 659 L(P2Q4):
660 660 mov -0x22(%rdi), %r10
661 661 mov %r10, -0x22(%rsi)
662 662 L(P2Q3):
663 663 mov -0x1a(%rdi), %r8
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
664 664 mov %r8, -0x1a(%rsi)
665 665 L(P2Q2):
666 666 mov -0x12(%rdi), %rcx
667 667 mov %rcx, -0x12(%rsi)
668 668 L(P2Q1):
669 669 mov -0xa(%rdi), %r10
670 670 mov %r10, -0xa(%rsi)
671 671 L(P2Q0):
672 672 movzwq -0x2(%rdi), %r8
673 673 mov %r8w, -0x2(%rsi)
674 - ret
674 + ret
675 675
676 676 .p2align 4
677 677 L(P3Q9):
678 678 mov -0x4b(%rdi), %r8
679 679 mov %r8, -0x4b(%rsi)
680 680 L(P3Q8):
681 681 mov -0x43(%rdi), %rcx
682 682 mov %rcx, -0x43(%rsi)
683 683 L(P3Q7):
684 684 mov -0x3b(%rdi), %r10
685 685 mov %r10, -0x3b(%rsi)
686 686 L(P3Q6):
687 687 mov -0x33(%rdi), %r8
688 688 mov %r8, -0x33(%rsi)
689 689 L(P3Q5):
690 690 mov -0x2b(%rdi), %rcx
691 691 mov %rcx, -0x2b(%rsi)
692 692 L(P3Q4):
693 693 mov -0x23(%rdi), %r10
694 694 mov %r10, -0x23(%rsi)
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
695 695 L(P3Q3):
696 696 mov -0x1b(%rdi), %r8
697 697 mov %r8, -0x1b(%rsi)
698 698 L(P3Q2):
699 699 mov -0x13(%rdi), %rcx
700 700 mov %rcx, -0x13(%rsi)
701 701 L(P3Q1):
702 702 mov -0xb(%rdi), %r10
703 703 mov %r10, -0xb(%rsi)
704 704 /*
705 - * These trailing loads/stores have to do all their loads 1st,
705 + * These trailing loads/stores have to do all their loads 1st,
706 706 * then do the stores.
707 707 */
708 708 L(P3Q0):
709 709 movzwq -0x3(%rdi), %r8
710 710 movzbq -0x1(%rdi), %r10
711 711 mov %r8w, -0x3(%rsi)
712 712 mov %r10b, -0x1(%rsi)
713 - ret
713 + ret
714 714
715 715 .p2align 4
716 716 L(P4Q9):
717 717 mov -0x4c(%rdi), %r8
718 718 mov %r8, -0x4c(%rsi)
719 719 L(P4Q8):
720 720 mov -0x44(%rdi), %rcx
721 721 mov %rcx, -0x44(%rsi)
722 722 L(P4Q7):
723 723 mov -0x3c(%rdi), %r10
724 724 mov %r10, -0x3c(%rsi)
725 725 L(P4Q6):
726 726 mov -0x34(%rdi), %r8
727 727 mov %r8, -0x34(%rsi)
728 728 L(P4Q5):
729 729 mov -0x2c(%rdi), %rcx
730 730 mov %rcx, -0x2c(%rsi)
731 731 L(P4Q4):
732 732 mov -0x24(%rdi), %r10
733 733 mov %r10, -0x24(%rsi)
734 734 L(P4Q3):
735 735 mov -0x1c(%rdi), %r8
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
736 736 mov %r8, -0x1c(%rsi)
737 737 L(P4Q2):
738 738 mov -0x14(%rdi), %rcx
739 739 mov %rcx, -0x14(%rsi)
740 740 L(P4Q1):
741 741 mov -0xc(%rdi), %r10
742 742 mov %r10, -0xc(%rsi)
743 743 L(P4Q0):
744 744 mov -0x4(%rdi), %r8d
745 745 mov %r8d, -0x4(%rsi)
746 - ret
746 + ret
747 747
748 748 .p2align 4
749 749 L(P5Q9):
750 750 mov -0x4d(%rdi), %r8
751 751 mov %r8, -0x4d(%rsi)
752 752 L(P5Q8):
753 753 mov -0x45(%rdi), %rcx
754 754 mov %rcx, -0x45(%rsi)
755 755 L(P5Q7):
756 756 mov -0x3d(%rdi), %r10
757 757 mov %r10, -0x3d(%rsi)
758 758 L(P5Q6):
759 759 mov -0x35(%rdi), %r8
760 760 mov %r8, -0x35(%rsi)
761 761 L(P5Q5):
762 762 mov -0x2d(%rdi), %rcx
763 763 mov %rcx, -0x2d(%rsi)
764 764 L(P5Q4):
765 765 mov -0x25(%rdi), %r10
766 766 mov %r10, -0x25(%rsi)
767 767 L(P5Q3):
768 768 mov -0x1d(%rdi), %r8
769 769 mov %r8, -0x1d(%rsi)
770 770 L(P5Q2):
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
771 771 mov -0x15(%rdi), %rcx
772 772 mov %rcx, -0x15(%rsi)
773 773 L(P5Q1):
774 774 mov -0xd(%rdi), %r10
775 775 mov %r10, -0xd(%rsi)
776 776 L(P5Q0):
777 777 mov -0x5(%rdi), %r8d
778 778 movzbq -0x1(%rdi), %r10
779 779 mov %r8d, -0x5(%rsi)
780 780 mov %r10b, -0x1(%rsi)
781 - ret
781 + ret
782 782
783 783 .p2align 4
784 784 L(P6Q9):
785 785 mov -0x4e(%rdi), %r8
786 786 mov %r8, -0x4e(%rsi)
787 787 L(P6Q8):
788 788 mov -0x46(%rdi), %rcx
789 789 mov %rcx, -0x46(%rsi)
790 790 L(P6Q7):
791 791 mov -0x3e(%rdi), %r10
792 792 mov %r10, -0x3e(%rsi)
793 793 L(P6Q6):
794 794 mov -0x36(%rdi), %r8
795 795 mov %r8, -0x36(%rsi)
796 796 L(P6Q5):
797 797 mov -0x2e(%rdi), %rcx
798 798 mov %rcx, -0x2e(%rsi)
799 799 L(P6Q4):
800 800 mov -0x26(%rdi), %r10
801 801 mov %r10, -0x26(%rsi)
802 802 L(P6Q3):
803 803 mov -0x1e(%rdi), %r8
804 804 mov %r8, -0x1e(%rsi)
805 805 L(P6Q2):
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
806 806 mov -0x16(%rdi), %rcx
807 807 mov %rcx, -0x16(%rsi)
808 808 L(P6Q1):
809 809 mov -0xe(%rdi), %r10
810 810 mov %r10, -0xe(%rsi)
811 811 L(P6Q0):
812 812 mov -0x6(%rdi), %r8d
813 813 movzwq -0x2(%rdi), %r10
814 814 mov %r8d, -0x6(%rsi)
815 815 mov %r10w, -0x2(%rsi)
816 - ret
816 + ret
817 817
818 818 .p2align 4
819 819 L(P7Q9):
820 820 mov -0x4f(%rdi), %r8
821 821 mov %r8, -0x4f(%rsi)
822 822 L(P7Q8):
823 823 mov -0x47(%rdi), %rcx
824 824 mov %rcx, -0x47(%rsi)
825 825 L(P7Q7):
826 826 mov -0x3f(%rdi), %r10
827 827 mov %r10, -0x3f(%rsi)
828 828 L(P7Q6):
829 829 mov -0x37(%rdi), %r8
830 830 mov %r8, -0x37(%rsi)
831 831 L(P7Q5):
832 832 mov -0x2f(%rdi), %rcx
833 833 mov %rcx, -0x2f(%rsi)
834 834 L(P7Q4):
835 835 mov -0x27(%rdi), %r10
836 836 mov %r10, -0x27(%rsi)
837 837 L(P7Q3):
838 838 mov -0x1f(%rdi), %r8
839 839 mov %r8, -0x1f(%rsi)
840 840 L(P7Q2):
841 841 mov -0x17(%rdi), %rcx
842 842 mov %rcx, -0x17(%rsi)
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
843 843 L(P7Q1):
844 844 mov -0xf(%rdi), %r10
845 845 mov %r10, -0xf(%rsi)
846 846 L(P7Q0):
847 847 mov -0x7(%rdi), %r8d
848 848 movzwq -0x3(%rdi), %r10
849 849 movzbq -0x1(%rdi), %rcx
850 850 mov %r8d, -0x7(%rsi)
851 851 mov %r10w, -0x3(%rsi)
852 852 mov %cl, -0x1(%rsi)
853 - ret
853 + ret
854 854
855 855 /*
856 856 * For large sizes rep smovq is fastest.
857 857 * Transition point determined experimentally as measured on
858 858 * Intel Xeon processors (incl. Nehalem and previous generations) and
859 859 * AMD Opteron. The transition value is patched at boot time to avoid
860 860 * memory reference hit.
861 861 */
862 862 .globl bcopy_patch_start
863 863 bcopy_patch_start:
864 864 cmpq $BCOPY_NHM_REP, %rdx
865 865 .globl bcopy_patch_end
866 866 bcopy_patch_end:
867 867
868 868 .p2align 4
869 869 ALTENTRY(bcopy_ck_size)
870 870
871 871 cmpq $BCOPY_DFLT_REP, %rdx
872 872 jae L(use_rep)
873 873
874 874 /*
875 875 * Align to a 8-byte boundary. Avoids penalties from unaligned stores
876 876 * as well as from stores spanning cachelines.
877 877 */
878 878 test $0x7, %rsi
879 879 jz L(aligned_loop)
880 880 test $0x1, %rsi
881 881 jz 2f
882 882 movzbq (%rdi), %r8
883 883 dec %rdx
884 884 inc %rdi
885 885 mov %r8b, (%rsi)
886 886 inc %rsi
887 887 2:
888 888 test $0x2, %rsi
889 889 jz 4f
890 890 movzwq (%rdi), %r8
891 891 sub $0x2, %rdx
892 892 add $0x2, %rdi
893 893 mov %r8w, (%rsi)
894 894 add $0x2, %rsi
895 895 4:
896 896 test $0x4, %rsi
897 897 jz L(aligned_loop)
898 898 mov (%rdi), %r8d
899 899 sub $0x4, %rdx
900 900 add $0x4, %rdi
901 901 mov %r8d, (%rsi)
902 902 add $0x4, %rsi
903 903
904 904 /*
905 905 * Copy 64-bytes per loop
906 906 */
907 907 .p2align 4
908 908 L(aligned_loop):
909 909 mov (%rdi), %r8
910 910 mov 0x8(%rdi), %r10
911 911 lea -0x40(%rdx), %rdx
912 912 mov %r8, (%rsi)
913 913 mov %r10, 0x8(%rsi)
914 914 mov 0x10(%rdi), %rcx
915 915 mov 0x18(%rdi), %r8
916 916 mov %rcx, 0x10(%rsi)
917 917 mov %r8, 0x18(%rsi)
918 918
919 919 cmp $0x40, %rdx
920 920 mov 0x20(%rdi), %r10
921 921 mov 0x28(%rdi), %rcx
922 922 mov %r10, 0x20(%rsi)
923 923 mov %rcx, 0x28(%rsi)
924 924 mov 0x30(%rdi), %r8
925 925 mov 0x38(%rdi), %r10
926 926 lea 0x40(%rdi), %rdi
927 927 mov %r8, 0x30(%rsi)
928 928 mov %r10, 0x38(%rsi)
929 929 lea 0x40(%rsi), %rsi
930 930 jae L(aligned_loop)
↓ open down ↓ |
67 lines elided |
↑ open up ↑ |
931 931
932 932 /*
933 933 * Copy remaining bytes (0-63)
934 934 */
935 935 L(do_remainder):
936 936 leaq L(fwdPxQx)(%rip), %r10
937 937 addq %rdx, %rdi
938 938 addq %rdx, %rsi
939 939 movslq (%r10,%rdx,4), %rcx
940 940 leaq (%rcx,%r10,1), %r10
941 - jmpq *%r10
941 + INDIRECT_JMP_REG(r10)
942 942
943 943 /*
944 944 * Use rep smovq. Clear remainder via unrolled code
945 945 */
946 946 .p2align 4
947 947 L(use_rep):
948 948 xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */
949 949 movq %rdx, %rcx /* %rcx = count */
950 950 shrq $3, %rcx /* 8-byte word count */
951 951 rep
952 952 smovq
953 953
954 954 xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */
955 955 andq $7, %rdx /* remainder */
956 956 jnz L(do_remainder)
957 957 ret
958 958 #undef L
959 959 SET_SIZE(bcopy_ck_size)
960 960
961 961 #ifdef DEBUG
962 962 /*
963 963 * Setup frame on the run-time stack. The end of the input argument
964 964 * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
965 965 * always points to the end of the latest allocated stack frame.
966 966 * panic(const char *format, ...) is a varargs function. When a
967 967 * function taking variable arguments is called, %rax must be set
968 968 * to eight times the number of floating point parameters passed
969 969 * to the function in SSE registers.
970 970 */
971 971 call_panic:
972 972 pushq %rbp /* align stack properly */
973 973 movq %rsp, %rbp
974 974 xorl %eax, %eax /* no variable arguments */
975 975 call panic /* %rdi = format string */
976 976 #endif
977 977 SET_SIZE(bcopy_altentry)
978 978 SET_SIZE(bcopy)
979 979
980 980 #elif defined(__i386)
981 981
982 982 #define ARG_FROM 4
983 983 #define ARG_TO 8
984 984 #define ARG_COUNT 12
985 985
986 986 ENTRY(bcopy)
987 987 #ifdef DEBUG
988 988 movl ARG_COUNT(%esp), %eax
989 989 orl %eax, %eax
990 990 jz 1f
991 991 movl postbootkernelbase, %eax
992 992 cmpl %eax, ARG_FROM(%esp)
993 993 jb 0f
994 994 cmpl %eax, ARG_TO(%esp)
995 995 jnb 1f
996 996 0: pushl %ebp
997 997 movl %esp, %ebp
998 998 pushl $.bcopy_panic_msg
999 999 call panic
1000 1000 1:
1001 1001 #endif
1002 1002 do_copy:
1003 1003 movl %esi, %eax /* save registers */
1004 1004 movl %edi, %edx
1005 1005 movl ARG_COUNT(%esp), %ecx
1006 1006 movl ARG_FROM(%esp), %esi
1007 1007 movl ARG_TO(%esp), %edi
1008 1008
1009 1009 shrl $2, %ecx /* word count */
1010 1010 rep
1011 1011 smovl
1012 1012 movl ARG_COUNT(%esp), %ecx
1013 1013 andl $3, %ecx /* bytes left over */
1014 1014 rep
1015 1015 smovb
1016 1016 movl %eax, %esi /* restore registers */
1017 1017 movl %edx, %edi
1018 1018 ret
1019 1019 SET_SIZE(bcopy)
1020 1020
1021 1021 #undef ARG_COUNT
1022 1022 #undef ARG_FROM
1023 1023 #undef ARG_TO
1024 1024
1025 1025 #endif /* __i386 */
1026 1026 #endif /* __lint */
1027 1027
1028 1028
1029 1029 /*
1030 1030 * Zero a block of storage, returning an error code if we
1031 1031 * take a kernel pagefault which cannot be resolved.
1032 1032 * Returns errno value on pagefault error, 0 if all ok
1033 1033 */
1034 1034
1035 1035 #if defined(__lint)
1036 1036
1037 1037 /* ARGSUSED */
1038 1038 int
1039 1039 kzero(void *addr, size_t count)
1040 1040 { return (0); }
1041 1041
1042 1042 #else /* __lint */
1043 1043
1044 1044 #if defined(__amd64)
↓ open down ↓ |
93 lines elided |
↑ open up ↑ |
1045 1045
1046 1046 ENTRY(kzero)
1047 1047 #ifdef DEBUG
1048 1048 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
1049 1049 jnb 0f
1050 1050 leaq .kzero_panic_msg(%rip), %rdi
1051 1051 jmp call_panic /* setup stack and call panic */
1052 1052 0:
1053 1053 #endif
1054 1054 /*
1055 - * pass lofault value as 3rd argument for fault return
1055 + * pass lofault value as 3rd argument for fault return
1056 1056 */
1057 1057 leaq _kzeroerr(%rip), %rdx
1058 1058
1059 1059 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
1060 1060 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
1061 1061 movq %rdx, T_LOFAULT(%r9) /* new lofault */
1062 1062 call bzero_altentry
1063 1063 xorl %eax, %eax
1064 1064 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
1065 1065 ret
1066 1066 /*
1067 1067 * A fault during bzero is indicated through an errno value
1068 1068 * in %rax when we iretq to here.
1069 1069 */
1070 1070 _kzeroerr:
1071 1071 addq $8, %rsp /* pop bzero_altentry call ret addr */
1072 1072 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
1073 1073 ret
1074 1074 SET_SIZE(kzero)
1075 1075
1076 1076 #elif defined(__i386)
1077 1077
1078 1078 #define ARG_ADDR 8
1079 1079 #define ARG_COUNT 12
1080 1080
1081 1081 ENTRY(kzero)
1082 1082 #ifdef DEBUG
1083 1083 pushl %ebp
1084 1084 movl %esp, %ebp
1085 1085 movl postbootkernelbase, %eax
1086 1086 cmpl %eax, ARG_ADDR(%ebp)
1087 1087 jnb 0f
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
1088 1088 pushl $.kzero_panic_msg
1089 1089 call panic
1090 1090 0: popl %ebp
1091 1091 #endif
1092 1092 lea _kzeroerr, %eax /* kzeroerr is lofault value */
1093 1093
1094 1094 pushl %ebp /* save stack base */
1095 1095 movl %esp, %ebp /* set new stack base */
1096 1096 pushl %edi /* save %edi */
1097 1097
1098 - mov %gs:CPU_THREAD, %edx
1098 + mov %gs:CPU_THREAD, %edx
1099 1099 movl T_LOFAULT(%edx), %edi
1100 1100 pushl %edi /* save the current lofault */
1101 1101 movl %eax, T_LOFAULT(%edx) /* new lofault */
1102 1102
1103 1103 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */
1104 1104 movl ARG_ADDR(%ebp), %edi /* %edi <- address of bytes to clear */
1105 1105 shrl $2, %ecx /* Count of double words to zero */
1106 1106 xorl %eax, %eax /* sstol val */
1107 1107 rep
1108 1108 sstol /* %ecx contains words to clear (%eax=0) */
1109 1109
1110 1110 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */
1111 1111 andl $3, %ecx /* do mod 4 */
1112 1112 rep
1113 1113 sstob /* %ecx contains residual bytes to clear */
1114 1114
1115 1115 /*
1116 1116 * A fault during kzero is indicated through an errno value
1117 1117 * in %eax when we iret to here.
1118 1118 */
1119 1119 _kzeroerr:
1120 1120 popl %edi
1121 1121 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */
1122 1122 popl %edi
1123 1123 popl %ebp
1124 1124 ret
1125 1125 SET_SIZE(kzero)
1126 1126
1127 1127 #undef ARG_ADDR
1128 1128 #undef ARG_COUNT
1129 1129
1130 1130 #endif /* __i386 */
1131 1131 #endif /* __lint */
1132 1132
1133 1133 /*
1134 1134 * Zero a block of storage.
1135 1135 */
1136 1136
1137 1137 #if defined(__lint)
1138 1138
1139 1139 /* ARGSUSED */
1140 1140 void
1141 1141 bzero(void *addr, size_t count)
1142 1142 {}
1143 1143
1144 1144 #else /* __lint */
1145 1145
1146 1146 #if defined(__amd64)
1147 1147
1148 1148 ENTRY(bzero)
1149 1149 #ifdef DEBUG
1150 1150 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
1151 1151 jnb 0f
1152 1152 leaq .bzero_panic_msg(%rip), %rdi
1153 1153 jmp call_panic /* setup stack and call panic */
1154 1154 0:
1155 1155 #endif
1156 1156 ALTENTRY(bzero_altentry)
1157 1157 do_zero:
1158 1158 #define L(s) .bzero/**/s
1159 1159 xorl %eax, %eax
1160 1160
1161 1161 cmpq $0x50, %rsi /* 80 */
1162 1162 jae L(ck_align)
↓ open down ↓ |
54 lines elided |
↑ open up ↑ |
1163 1163
1164 1164 /*
1165 1165 * Performance data shows many caller's are zeroing small buffers. So
1166 1166 * for best perf for these sizes unrolled code is used. Store zeros
1167 1167 * without worrying about alignment.
1168 1168 */
1169 1169 leaq L(setPxQx)(%rip), %r10
1170 1170 addq %rsi, %rdi
1171 1171 movslq (%r10,%rsi,4), %rcx
1172 1172 leaq (%rcx,%r10,1), %r10
1173 - jmpq *%r10
1173 + INDIRECT_JMP_REG(r10)
1174 1174
1175 1175 .p2align 4
1176 1176 L(setPxQx):
1177 1177 .int L(P0Q0)-L(setPxQx) /* 0 */
1178 1178 .int L(P1Q0)-L(setPxQx)
1179 1179 .int L(P2Q0)-L(setPxQx)
1180 1180 .int L(P3Q0)-L(setPxQx)
1181 1181 .int L(P4Q0)-L(setPxQx)
1182 1182 .int L(P5Q0)-L(setPxQx)
1183 1183 .int L(P6Q0)-L(setPxQx)
1184 - .int L(P7Q0)-L(setPxQx)
1184 + .int L(P7Q0)-L(setPxQx)
1185 1185
1186 1186 .int L(P0Q1)-L(setPxQx) /* 8 */
1187 1187 .int L(P1Q1)-L(setPxQx)
1188 1188 .int L(P2Q1)-L(setPxQx)
1189 1189 .int L(P3Q1)-L(setPxQx)
1190 1190 .int L(P4Q1)-L(setPxQx)
1191 1191 .int L(P5Q1)-L(setPxQx)
1192 1192 .int L(P6Q1)-L(setPxQx)
1193 - .int L(P7Q1)-L(setPxQx)
1193 + .int L(P7Q1)-L(setPxQx)
1194 1194
1195 1195 .int L(P0Q2)-L(setPxQx) /* 16 */
1196 1196 .int L(P1Q2)-L(setPxQx)
1197 1197 .int L(P2Q2)-L(setPxQx)
1198 1198 .int L(P3Q2)-L(setPxQx)
1199 1199 .int L(P4Q2)-L(setPxQx)
1200 1200 .int L(P5Q2)-L(setPxQx)
1201 1201 .int L(P6Q2)-L(setPxQx)
1202 - .int L(P7Q2)-L(setPxQx)
1202 + .int L(P7Q2)-L(setPxQx)
1203 1203
1204 1204 .int L(P0Q3)-L(setPxQx) /* 24 */
1205 1205 .int L(P1Q3)-L(setPxQx)
1206 1206 .int L(P2Q3)-L(setPxQx)
1207 1207 .int L(P3Q3)-L(setPxQx)
1208 1208 .int L(P4Q3)-L(setPxQx)
1209 1209 .int L(P5Q3)-L(setPxQx)
1210 1210 .int L(P6Q3)-L(setPxQx)
1211 - .int L(P7Q3)-L(setPxQx)
1211 + .int L(P7Q3)-L(setPxQx)
1212 1212
1213 1213 .int L(P0Q4)-L(setPxQx) /* 32 */
1214 1214 .int L(P1Q4)-L(setPxQx)
1215 1215 .int L(P2Q4)-L(setPxQx)
1216 1216 .int L(P3Q4)-L(setPxQx)
1217 1217 .int L(P4Q4)-L(setPxQx)
1218 1218 .int L(P5Q4)-L(setPxQx)
1219 1219 .int L(P6Q4)-L(setPxQx)
1220 - .int L(P7Q4)-L(setPxQx)
1220 + .int L(P7Q4)-L(setPxQx)
1221 1221
1222 1222 .int L(P0Q5)-L(setPxQx) /* 40 */
1223 1223 .int L(P1Q5)-L(setPxQx)
1224 1224 .int L(P2Q5)-L(setPxQx)
1225 1225 .int L(P3Q5)-L(setPxQx)
1226 1226 .int L(P4Q5)-L(setPxQx)
1227 1227 .int L(P5Q5)-L(setPxQx)
1228 1228 .int L(P6Q5)-L(setPxQx)
1229 - .int L(P7Q5)-L(setPxQx)
1229 + .int L(P7Q5)-L(setPxQx)
1230 1230
1231 1231 .int L(P0Q6)-L(setPxQx) /* 48 */
1232 1232 .int L(P1Q6)-L(setPxQx)
1233 1233 .int L(P2Q6)-L(setPxQx)
1234 1234 .int L(P3Q6)-L(setPxQx)
1235 1235 .int L(P4Q6)-L(setPxQx)
1236 1236 .int L(P5Q6)-L(setPxQx)
1237 1237 .int L(P6Q6)-L(setPxQx)
1238 - .int L(P7Q6)-L(setPxQx)
1238 + .int L(P7Q6)-L(setPxQx)
1239 1239
1240 1240 .int L(P0Q7)-L(setPxQx) /* 56 */
1241 1241 .int L(P1Q7)-L(setPxQx)
1242 1242 .int L(P2Q7)-L(setPxQx)
1243 1243 .int L(P3Q7)-L(setPxQx)
1244 1244 .int L(P4Q7)-L(setPxQx)
1245 1245 .int L(P5Q7)-L(setPxQx)
1246 1246 .int L(P6Q7)-L(setPxQx)
1247 - .int L(P7Q7)-L(setPxQx)
1247 + .int L(P7Q7)-L(setPxQx)
1248 1248
1249 1249 .int L(P0Q8)-L(setPxQx) /* 64 */
1250 1250 .int L(P1Q8)-L(setPxQx)
1251 1251 .int L(P2Q8)-L(setPxQx)
1252 1252 .int L(P3Q8)-L(setPxQx)
1253 1253 .int L(P4Q8)-L(setPxQx)
1254 1254 .int L(P5Q8)-L(setPxQx)
1255 1255 .int L(P6Q8)-L(setPxQx)
1256 1256 .int L(P7Q8)-L(setPxQx)
1257 1257
1258 1258 .int L(P0Q9)-L(setPxQx) /* 72 */
1259 1259 .int L(P1Q9)-L(setPxQx)
1260 1260 .int L(P2Q9)-L(setPxQx)
1261 1261 .int L(P3Q9)-L(setPxQx)
1262 1262 .int L(P4Q9)-L(setPxQx)
1263 1263 .int L(P5Q9)-L(setPxQx)
1264 1264 .int L(P6Q9)-L(setPxQx)
1265 1265 .int L(P7Q9)-L(setPxQx) /* 79 */
1266 1266
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
1267 1267 .p2align 4
1268 1268 L(P0Q9): mov %rax, -0x48(%rdi)
1269 1269 L(P0Q8): mov %rax, -0x40(%rdi)
1270 1270 L(P0Q7): mov %rax, -0x38(%rdi)
1271 1271 L(P0Q6): mov %rax, -0x30(%rdi)
1272 1272 L(P0Q5): mov %rax, -0x28(%rdi)
1273 1273 L(P0Q4): mov %rax, -0x20(%rdi)
1274 1274 L(P0Q3): mov %rax, -0x18(%rdi)
1275 1275 L(P0Q2): mov %rax, -0x10(%rdi)
1276 1276 L(P0Q1): mov %rax, -0x8(%rdi)
1277 -L(P0Q0):
1277 +L(P0Q0):
1278 1278 ret
1279 1279
1280 1280 .p2align 4
1281 1281 L(P1Q9): mov %rax, -0x49(%rdi)
1282 1282 L(P1Q8): mov %rax, -0x41(%rdi)
1283 1283 L(P1Q7): mov %rax, -0x39(%rdi)
1284 1284 L(P1Q6): mov %rax, -0x31(%rdi)
1285 1285 L(P1Q5): mov %rax, -0x29(%rdi)
1286 1286 L(P1Q4): mov %rax, -0x21(%rdi)
1287 1287 L(P1Q3): mov %rax, -0x19(%rdi)
1288 1288 L(P1Q2): mov %rax, -0x11(%rdi)
1289 1289 L(P1Q1): mov %rax, -0x9(%rdi)
1290 1290 L(P1Q0): mov %al, -0x1(%rdi)
1291 1291 ret
1292 1292
1293 1293 .p2align 4
1294 1294 L(P2Q9): mov %rax, -0x4a(%rdi)
1295 1295 L(P2Q8): mov %rax, -0x42(%rdi)
1296 1296 L(P2Q7): mov %rax, -0x3a(%rdi)
1297 1297 L(P2Q6): mov %rax, -0x32(%rdi)
1298 1298 L(P2Q5): mov %rax, -0x2a(%rdi)
1299 1299 L(P2Q4): mov %rax, -0x22(%rdi)
1300 1300 L(P2Q3): mov %rax, -0x1a(%rdi)
1301 1301 L(P2Q2): mov %rax, -0x12(%rdi)
1302 1302 L(P2Q1): mov %rax, -0xa(%rdi)
1303 1303 L(P2Q0): mov %ax, -0x2(%rdi)
1304 1304 ret
1305 1305
1306 1306 .p2align 4
1307 1307 L(P3Q9): mov %rax, -0x4b(%rdi)
1308 1308 L(P3Q8): mov %rax, -0x43(%rdi)
1309 1309 L(P3Q7): mov %rax, -0x3b(%rdi)
1310 1310 L(P3Q6): mov %rax, -0x33(%rdi)
1311 1311 L(P3Q5): mov %rax, -0x2b(%rdi)
1312 1312 L(P3Q4): mov %rax, -0x23(%rdi)
1313 1313 L(P3Q3): mov %rax, -0x1b(%rdi)
1314 1314 L(P3Q2): mov %rax, -0x13(%rdi)
1315 1315 L(P3Q1): mov %rax, -0xb(%rdi)
1316 1316 L(P3Q0): mov %ax, -0x3(%rdi)
1317 1317 mov %al, -0x1(%rdi)
1318 1318 ret
1319 1319
1320 1320 .p2align 4
1321 1321 L(P4Q9): mov %rax, -0x4c(%rdi)
1322 1322 L(P4Q8): mov %rax, -0x44(%rdi)
1323 1323 L(P4Q7): mov %rax, -0x3c(%rdi)
1324 1324 L(P4Q6): mov %rax, -0x34(%rdi)
1325 1325 L(P4Q5): mov %rax, -0x2c(%rdi)
1326 1326 L(P4Q4): mov %rax, -0x24(%rdi)
1327 1327 L(P4Q3): mov %rax, -0x1c(%rdi)
1328 1328 L(P4Q2): mov %rax, -0x14(%rdi)
1329 1329 L(P4Q1): mov %rax, -0xc(%rdi)
1330 1330 L(P4Q0): mov %eax, -0x4(%rdi)
1331 1331 ret
1332 1332
1333 1333 .p2align 4
1334 1334 L(P5Q9): mov %rax, -0x4d(%rdi)
1335 1335 L(P5Q8): mov %rax, -0x45(%rdi)
1336 1336 L(P5Q7): mov %rax, -0x3d(%rdi)
1337 1337 L(P5Q6): mov %rax, -0x35(%rdi)
1338 1338 L(P5Q5): mov %rax, -0x2d(%rdi)
1339 1339 L(P5Q4): mov %rax, -0x25(%rdi)
1340 1340 L(P5Q3): mov %rax, -0x1d(%rdi)
1341 1341 L(P5Q2): mov %rax, -0x15(%rdi)
1342 1342 L(P5Q1): mov %rax, -0xd(%rdi)
1343 1343 L(P5Q0): mov %eax, -0x5(%rdi)
1344 1344 mov %al, -0x1(%rdi)
1345 1345 ret
1346 1346
1347 1347 .p2align 4
1348 1348 L(P6Q9): mov %rax, -0x4e(%rdi)
1349 1349 L(P6Q8): mov %rax, -0x46(%rdi)
1350 1350 L(P6Q7): mov %rax, -0x3e(%rdi)
1351 1351 L(P6Q6): mov %rax, -0x36(%rdi)
1352 1352 L(P6Q5): mov %rax, -0x2e(%rdi)
1353 1353 L(P6Q4): mov %rax, -0x26(%rdi)
1354 1354 L(P6Q3): mov %rax, -0x1e(%rdi)
1355 1355 L(P6Q2): mov %rax, -0x16(%rdi)
1356 1356 L(P6Q1): mov %rax, -0xe(%rdi)
1357 1357 L(P6Q0): mov %eax, -0x6(%rdi)
1358 1358 mov %ax, -0x2(%rdi)
1359 1359 ret
1360 1360
1361 1361 .p2align 4
1362 1362 L(P7Q9): mov %rax, -0x4f(%rdi)
1363 1363 L(P7Q8): mov %rax, -0x47(%rdi)
1364 1364 L(P7Q7): mov %rax, -0x3f(%rdi)
1365 1365 L(P7Q6): mov %rax, -0x37(%rdi)
1366 1366 L(P7Q5): mov %rax, -0x2f(%rdi)
1367 1367 L(P7Q4): mov %rax, -0x27(%rdi)
1368 1368 L(P7Q3): mov %rax, -0x1f(%rdi)
1369 1369 L(P7Q2): mov %rax, -0x17(%rdi)
1370 1370 L(P7Q1): mov %rax, -0xf(%rdi)
1371 1371 L(P7Q0): mov %eax, -0x7(%rdi)
1372 1372 mov %ax, -0x3(%rdi)
1373 1373 mov %al, -0x1(%rdi)
1374 1374 ret
1375 1375
1376 1376 /*
1377 1377 * Align to a 16-byte boundary. Avoids penalties from unaligned stores
1378 1378 * as well as from stores spanning cachelines. Note 16-byte alignment
1379 1379 * is better in case where rep sstosq is used.
1380 1380 */
1381 1381 .p2align 4
1382 1382 L(ck_align):
1383 1383 test $0xf, %rdi
1384 1384 jz L(aligned_now)
1385 1385 test $1, %rdi
1386 1386 jz 2f
1387 1387 mov %al, (%rdi)
1388 1388 dec %rsi
1389 1389 lea 1(%rdi),%rdi
1390 1390 2:
1391 1391 test $2, %rdi
1392 1392 jz 4f
1393 1393 mov %ax, (%rdi)
1394 1394 sub $2, %rsi
1395 1395 lea 2(%rdi),%rdi
1396 1396 4:
1397 1397 test $4, %rdi
1398 1398 jz 8f
1399 1399 mov %eax, (%rdi)
1400 1400 sub $4, %rsi
1401 1401 lea 4(%rdi),%rdi
1402 1402 8:
1403 1403 test $8, %rdi
1404 1404 jz L(aligned_now)
1405 1405 mov %rax, (%rdi)
1406 1406 sub $8, %rsi
1407 1407 lea 8(%rdi),%rdi
1408 1408
1409 1409 /*
1410 1410 * For large sizes rep sstoq is fastest.
1411 1411 * Transition point determined experimentally as measured on
1412 1412 * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
1413 1413 */
1414 1414 L(aligned_now):
↓ open down ↓ |
127 lines elided |
↑ open up ↑ |
1415 1415 cmp $BZERO_USE_REP, %rsi
1416 1416 ja L(use_rep)
1417 1417
1418 1418 /*
1419 1419 * zero 64-bytes per loop
1420 1420 */
1421 1421 .p2align 4
1422 1422 L(bzero_loop):
1423 1423 leaq -0x40(%rsi), %rsi
1424 1424 cmpq $0x40, %rsi
1425 - movq %rax, (%rdi)
1426 - movq %rax, 0x8(%rdi)
1427 - movq %rax, 0x10(%rdi)
1428 - movq %rax, 0x18(%rdi)
1429 - movq %rax, 0x20(%rdi)
1430 - movq %rax, 0x28(%rdi)
1431 - movq %rax, 0x30(%rdi)
1432 - movq %rax, 0x38(%rdi)
1425 + movq %rax, (%rdi)
1426 + movq %rax, 0x8(%rdi)
1427 + movq %rax, 0x10(%rdi)
1428 + movq %rax, 0x18(%rdi)
1429 + movq %rax, 0x20(%rdi)
1430 + movq %rax, 0x28(%rdi)
1431 + movq %rax, 0x30(%rdi)
1432 + movq %rax, 0x38(%rdi)
1433 1433 leaq 0x40(%rdi), %rdi
1434 1434 jae L(bzero_loop)
1435 1435
1436 1436 /*
1437 1437 * Clear any remaining bytes..
1438 1438 */
1439 1439 9:
1440 1440 leaq L(setPxQx)(%rip), %r10
1441 1441 addq %rsi, %rdi
1442 1442 movslq (%r10,%rsi,4), %rcx
1443 1443 leaq (%rcx,%r10,1), %r10
1444 - jmpq *%r10
1444 + INDIRECT_JMP_REG(r10)
1445 1445
1446 1446 /*
1447 1447 * Use rep sstoq. Clear any remainder via unrolled code
1448 1448 */
1449 1449 .p2align 4
1450 1450 L(use_rep):
1451 1451 movq %rsi, %rcx /* get size in bytes */
1452 1452 shrq $3, %rcx /* count of 8-byte words to zero */
1453 1453 rep
1454 1454 sstoq /* %rcx = words to clear (%rax=0) */
1455 1455 andq $7, %rsi /* remaining bytes */
1456 1456 jnz 9b
1457 1457 ret
1458 1458 #undef L
1459 1459 SET_SIZE(bzero_altentry)
1460 1460 SET_SIZE(bzero)
1461 1461
1462 1462 #elif defined(__i386)
1463 1463
1464 1464 #define ARG_ADDR 4
1465 1465 #define ARG_COUNT 8
1466 1466
1467 1467 ENTRY(bzero)
1468 1468 #ifdef DEBUG
1469 1469 movl postbootkernelbase, %eax
1470 1470 cmpl %eax, ARG_ADDR(%esp)
1471 1471 jnb 0f
1472 1472 pushl %ebp
1473 1473 movl %esp, %ebp
1474 1474 pushl $.bzero_panic_msg
1475 1475 call panic
1476 1476 0:
1477 1477 #endif
1478 1478 do_zero:
1479 1479 movl %edi, %edx
1480 1480 movl ARG_COUNT(%esp), %ecx
1481 1481 movl ARG_ADDR(%esp), %edi
1482 1482 shrl $2, %ecx
1483 1483 xorl %eax, %eax
1484 1484 rep
1485 1485 sstol
1486 1486 movl ARG_COUNT(%esp), %ecx
1487 1487 andl $3, %ecx
1488 1488 rep
1489 1489 sstob
1490 1490 movl %edx, %edi
1491 1491 ret
1492 1492 SET_SIZE(bzero)
1493 1493
1494 1494 #undef ARG_ADDR
1495 1495 #undef ARG_COUNT
1496 1496
1497 1497 #endif /* __i386 */
1498 1498 #endif /* __lint */
1499 1499
1500 1500 /*
1501 1501 * Transfer data to and from user space -
1502 1502 * Note that these routines can cause faults
1503 1503 * It is assumed that the kernel has nothing at
1504 1504 * less than KERNELBASE in the virtual address space.
1505 1505 *
1506 1506 * Note that copyin(9F) and copyout(9F) are part of the
1507 1507 * DDI/DKI which specifies that they return '-1' on "errors."
1508 1508 *
1509 1509 * Sigh.
1510 1510 *
1511 1511 * So there's two extremely similar routines - xcopyin_nta() and
1512 1512 * xcopyout_nta() which return the errno that we've faithfully computed.
1513 1513 * This allows other callers (e.g. uiomove(9F)) to work correctly.
1514 1514 * Given that these are used pretty heavily, we expand the calling
1515 1515 * sequences inline for all flavours (rather than making wrappers).
1516 1516 */
1517 1517
1518 1518 /*
1519 1519 * Copy user data to kernel space.
1520 1520 */
1521 1521
1522 1522 #if defined(__lint)
1523 1523
1524 1524 /* ARGSUSED */
1525 1525 int
1526 1526 copyin(const void *uaddr, void *kaddr, size_t count)
1527 1527 { return (0); }
1528 1528
1529 1529 #else /* lint */
1530 1530
1531 1531 #if defined(__amd64)
1532 1532
1533 1533 ENTRY(copyin)
1534 1534 pushq %rbp
1535 1535 movq %rsp, %rbp
1536 1536 subq $24, %rsp
1537 1537
1538 1538 /*
1539 1539 * save args in case we trap and need to rerun as a copyop
1540 1540 */
1541 1541 movq %rdi, (%rsp)
1542 1542 movq %rsi, 0x8(%rsp)
1543 1543 movq %rdx, 0x10(%rsp)
1544 1544
1545 1545 movq kernelbase(%rip), %rax
1546 1546 #ifdef DEBUG
1547 1547 cmpq %rax, %rsi /* %rsi = kaddr */
1548 1548 jnb 1f
1549 1549 leaq .copyin_panic_msg(%rip), %rdi
1550 1550 xorl %eax, %eax
1551 1551 call panic
1552 1552 1:
1553 1553 #endif
1554 1554 /*
1555 1555 * pass lofault value as 4th argument to do_copy_fault
1556 1556 */
↓ open down ↓ |
102 lines elided |
↑ open up ↑ |
1557 1557 leaq _copyin_err(%rip), %rcx
1558 1558
1559 1559 movq %gs:CPU_THREAD, %r9
1560 1560 cmpq %rax, %rdi /* test uaddr < kernelbase */
1561 1561 jae 3f /* take copyop if uaddr > kernelbase */
1562 1562 SMAP_DISABLE_INSTR(0)
1563 1563 jmp do_copy_fault /* Takes care of leave for us */
1564 1564
1565 1565 _copyin_err:
1566 1566 SMAP_ENABLE_INSTR(2)
1567 - movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1567 + movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1568 1568 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1569 1569 3:
1570 1570 movq T_COPYOPS(%r9), %rax
1571 1571 cmpq $0, %rax
1572 1572 jz 2f
1573 1573 /*
1574 1574 * reload args for the copyop
1575 1575 */
1576 1576 movq (%rsp), %rdi
1577 1577 movq 0x8(%rsp), %rsi
1578 1578 movq 0x10(%rsp), %rdx
1579 1579 leave
1580 - jmp *CP_COPYIN(%rax)
1580 + movq CP_COPYIN(%rax), %rax
1581 + INDIRECT_JMP_REG(rax)
1581 1582
1582 -2: movl $-1, %eax
1583 +2: movl $-1, %eax
1583 1584 leave
1584 1585 ret
1585 1586 SET_SIZE(copyin)
1586 1587
1587 1588 #elif defined(__i386)
1588 1589
1589 1590 #define ARG_UADDR 4
1590 1591 #define ARG_KADDR 8
1591 1592
1592 1593 ENTRY(copyin)
1593 1594 movl kernelbase, %ecx
1594 1595 #ifdef DEBUG
1595 1596 cmpl %ecx, ARG_KADDR(%esp)
1596 1597 jnb 1f
1597 1598 pushl %ebp
1598 1599 movl %esp, %ebp
1599 1600 pushl $.copyin_panic_msg
1600 1601 call panic
1601 1602 1:
1602 1603 #endif
1603 1604 lea _copyin_err, %eax
1604 1605
1605 1606 movl %gs:CPU_THREAD, %edx
1606 1607 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1607 1608 jb do_copy_fault
1608 1609 jmp 3f
1609 1610
1610 1611 _copyin_err:
1611 1612 popl %ecx
1612 1613 popl %edi
1613 1614 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1614 1615 popl %esi
1615 1616 popl %ebp
1616 1617 3:
1617 1618 movl T_COPYOPS(%edx), %eax
1618 1619 cmpl $0, %eax
1619 1620 jz 2f
1620 1621 jmp *CP_COPYIN(%eax)
1621 1622
1622 1623 2: movl $-1, %eax
1623 1624 ret
1624 1625 SET_SIZE(copyin)
1625 1626
1626 1627 #undef ARG_UADDR
1627 1628 #undef ARG_KADDR
1628 1629
1629 1630 #endif /* __i386 */
1630 1631 #endif /* __lint */
1631 1632
1632 1633 #if defined(__lint)
1633 1634
1634 1635 /* ARGSUSED */
1635 1636 int
1636 1637 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached)
1637 1638 { return (0); }
1638 1639
1639 1640 #else /* __lint */
1640 1641
1641 1642 #if defined(__amd64)
1642 1643
1643 1644 ENTRY(xcopyin_nta)
1644 1645 pushq %rbp
1645 1646 movq %rsp, %rbp
1646 1647 subq $24, %rsp
1647 1648
1648 1649 /*
1649 1650 * save args in case we trap and need to rerun as a copyop
1650 1651 * %rcx is consumed in this routine so we don't need to save
1651 1652 * it.
1652 1653 */
1653 1654 movq %rdi, (%rsp)
1654 1655 movq %rsi, 0x8(%rsp)
1655 1656 movq %rdx, 0x10(%rsp)
1656 1657
1657 1658 movq kernelbase(%rip), %rax
1658 1659 #ifdef DEBUG
1659 1660 cmpq %rax, %rsi /* %rsi = kaddr */
1660 1661 jnb 1f
1661 1662 leaq .xcopyin_panic_msg(%rip), %rdi
1662 1663 xorl %eax, %eax
1663 1664 call panic
1664 1665 1:
1665 1666 #endif
1666 1667 movq %gs:CPU_THREAD, %r9
1667 1668 cmpq %rax, %rdi /* test uaddr < kernelbase */
1668 1669 jae 4f
1669 1670 cmpq $0, %rcx /* No non-temporal access? */
1670 1671 /*
1671 1672 * pass lofault value as 4th argument to do_copy_fault
1672 1673 */
↓ open down ↓ |
80 lines elided |
↑ open up ↑ |
1673 1674 leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */
1674 1675 jnz 6f /* use regular access */
1675 1676 /*
1676 1677 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1677 1678 */
1678 1679 cmpq $XCOPY_MIN_SIZE, %rdx
1679 1680 jae 5f
1680 1681 6:
1681 1682 SMAP_DISABLE_INSTR(1)
1682 1683 jmp do_copy_fault
1683 -
1684 +
1684 1685 /*
1685 1686 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1686 1687 * count is COUNT_ALIGN_SIZE aligned.
1687 1688 */
1688 1689 5:
1689 1690 movq %rdi, %r10
1690 1691 orq %rsi, %r10
1691 1692 andq $NTA_ALIGN_MASK, %r10
1692 1693 orq %rdx, %r10
1693 1694 andq $COUNT_ALIGN_MASK, %r10
1694 - jnz 6b
1695 + jnz 6b
1695 1696 leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */
1696 1697 SMAP_DISABLE_INSTR(2)
1697 1698 jmp do_copy_fault_nta /* use non-temporal access */
1698 -
1699 +
1699 1700 4:
1700 1701 movl $EFAULT, %eax
1701 1702 jmp 3f
1702 1703
1703 1704 /*
1704 1705 * A fault during do_copy_fault or do_copy_fault_nta is
1705 1706 * indicated through an errno value in %rax and we iret from the
1706 1707 * trap handler to here.
1707 1708 */
1708 1709 _xcopyin_err:
1709 1710 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1710 1711 _xcopyin_nta_err:
1711 1712 SMAP_ENABLE_INSTR(3)
1712 1713 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1713 1714 3:
1714 1715 movq T_COPYOPS(%r9), %r8
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
1715 1716 cmpq $0, %r8
1716 1717 jz 2f
1717 1718
1718 1719 /*
1719 1720 * reload args for the copyop
1720 1721 */
1721 1722 movq (%rsp), %rdi
1722 1723 movq 0x8(%rsp), %rsi
1723 1724 movq 0x10(%rsp), %rdx
1724 1725 leave
1725 - jmp *CP_XCOPYIN(%r8)
1726 + movq CP_XCOPYIN(%r8), %r8
1727 + INDIRECT_JMP_REG(r8)
1726 1728
1727 1729 2: leave
1728 1730 ret
1729 1731 SET_SIZE(xcopyin_nta)
1730 1732
1731 1733 #elif defined(__i386)
1732 1734
1733 1735 #define ARG_UADDR 4
1734 1736 #define ARG_KADDR 8
1735 1737 #define ARG_COUNT 12
1736 1738 #define ARG_CACHED 16
1737 1739
1738 1740 .globl use_sse_copy
1739 1741
1740 1742 ENTRY(xcopyin_nta)
1741 1743 movl kernelbase, %ecx
1742 1744 lea _xcopyin_err, %eax
1743 1745 movl %gs:CPU_THREAD, %edx
1744 1746 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1745 1747 jae 4f
1746 1748
1747 1749 cmpl $0, use_sse_copy /* no sse support */
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
1748 1750 jz do_copy_fault
1749 1751
1750 1752 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */
1751 1753 jnz do_copy_fault
1752 1754
1753 1755 /*
1754 1756 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1755 1757 */
1756 1758 cmpl $XCOPY_MIN_SIZE, ARG_COUNT(%esp)
1757 1759 jb do_copy_fault
1758 -
1760 +
1759 1761 /*
1760 1762 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1761 1763 * count is COUNT_ALIGN_SIZE aligned.
1762 1764 */
1763 1765 movl ARG_UADDR(%esp), %ecx
1764 1766 orl ARG_KADDR(%esp), %ecx
1765 1767 andl $NTA_ALIGN_MASK, %ecx
1766 1768 orl ARG_COUNT(%esp), %ecx
1767 1769 andl $COUNT_ALIGN_MASK, %ecx
1768 1770 jnz do_copy_fault
1769 1771
1770 1772 jmp do_copy_fault_nta /* use regular access */
1771 1773
1772 1774 4:
1773 1775 movl $EFAULT, %eax
1774 1776 jmp 3f
1775 1777
1776 1778 /*
1777 1779 * A fault during do_copy_fault or do_copy_fault_nta is
1778 1780 * indicated through an errno value in %eax and we iret from the
1779 1781 * trap handler to here.
1780 1782 */
1781 1783 _xcopyin_err:
1782 1784 popl %ecx
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
1783 1785 popl %edi
1784 1786 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1785 1787 popl %esi
1786 1788 popl %ebp
1787 1789 3:
1788 1790 cmpl $0, T_COPYOPS(%edx)
1789 1791 jz 2f
1790 1792 movl T_COPYOPS(%edx), %eax
1791 1793 jmp *CP_XCOPYIN(%eax)
1792 1794
1793 -2: rep; ret /* use 2 byte return instruction when branch target */
1795 +2: rep; ret /* use 2 byte return instruction when branch target */
1794 1796 /* AMD Software Optimization Guide - Section 6.2 */
1795 1797 SET_SIZE(xcopyin_nta)
1796 1798
1797 1799 #undef ARG_UADDR
1798 1800 #undef ARG_KADDR
1799 1801 #undef ARG_COUNT
1800 1802 #undef ARG_CACHED
1801 1803
1802 1804 #endif /* __i386 */
1803 1805 #endif /* __lint */
1804 1806
1805 1807 /*
1806 1808 * Copy kernel data to user space.
1807 1809 */
1808 1810
1809 1811 #if defined(__lint)
1810 1812
1811 1813 /* ARGSUSED */
1812 1814 int
1813 1815 copyout(const void *kaddr, void *uaddr, size_t count)
1814 1816 { return (0); }
1815 1817
1816 1818 #else /* __lint */
1817 1819
1818 1820 #if defined(__amd64)
1819 1821
1820 1822 ENTRY(copyout)
1821 1823 pushq %rbp
1822 1824 movq %rsp, %rbp
1823 1825 subq $24, %rsp
1824 1826
1825 1827 /*
1826 1828 * save args in case we trap and need to rerun as a copyop
1827 1829 */
1828 1830 movq %rdi, (%rsp)
1829 1831 movq %rsi, 0x8(%rsp)
1830 1832 movq %rdx, 0x10(%rsp)
1831 1833
1832 1834 movq kernelbase(%rip), %rax
1833 1835 #ifdef DEBUG
1834 1836 cmpq %rax, %rdi /* %rdi = kaddr */
1835 1837 jnb 1f
1836 1838 leaq .copyout_panic_msg(%rip), %rdi
1837 1839 xorl %eax, %eax
1838 1840 call panic
1839 1841 1:
1840 1842 #endif
1841 1843 /*
1842 1844 * pass lofault value as 4th argument to do_copy_fault
1843 1845 */
1844 1846 leaq _copyout_err(%rip), %rcx
1845 1847
1846 1848 movq %gs:CPU_THREAD, %r9
1847 1849 cmpq %rax, %rsi /* test uaddr < kernelbase */
1848 1850 jae 3f /* take copyop if uaddr > kernelbase */
1849 1851 SMAP_DISABLE_INSTR(3)
1850 1852 jmp do_copy_fault /* Calls leave for us */
1851 1853
1852 1854 _copyout_err:
1853 1855 SMAP_ENABLE_INSTR(4)
1854 1856 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1855 1857 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1856 1858 3:
1857 1859 movq T_COPYOPS(%r9), %rax
↓ open down ↓ |
54 lines elided |
↑ open up ↑ |
1858 1860 cmpq $0, %rax
1859 1861 jz 2f
1860 1862
1861 1863 /*
1862 1864 * reload args for the copyop
1863 1865 */
1864 1866 movq (%rsp), %rdi
1865 1867 movq 0x8(%rsp), %rsi
1866 1868 movq 0x10(%rsp), %rdx
1867 1869 leave
1868 - jmp *CP_COPYOUT(%rax)
1870 + movq CP_COPYOUT(%rax), %rax
1871 + INDIRECT_JMP_REG(rax)
1869 1872
1870 1873 2: movl $-1, %eax
1871 1874 leave
1872 1875 ret
1873 1876 SET_SIZE(copyout)
1874 1877
1875 1878 #elif defined(__i386)
1876 1879
1877 1880 #define ARG_KADDR 4
1878 1881 #define ARG_UADDR 8
1879 1882
1880 1883 ENTRY(copyout)
1881 1884 movl kernelbase, %ecx
1882 1885 #ifdef DEBUG
1883 1886 cmpl %ecx, ARG_KADDR(%esp)
1884 1887 jnb 1f
1885 1888 pushl %ebp
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
1886 1889 movl %esp, %ebp
1887 1890 pushl $.copyout_panic_msg
1888 1891 call panic
1889 1892 1:
1890 1893 #endif
1891 1894 lea _copyout_err, %eax
1892 1895 movl %gs:CPU_THREAD, %edx
1893 1896 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1894 1897 jb do_copy_fault
1895 1898 jmp 3f
1896 -
1899 +
1897 1900 _copyout_err:
1898 1901 popl %ecx
1899 1902 popl %edi
1900 1903 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1901 1904 popl %esi
1902 1905 popl %ebp
1903 1906 3:
1904 1907 movl T_COPYOPS(%edx), %eax
1905 1908 cmpl $0, %eax
1906 1909 jz 2f
1907 1910 jmp *CP_COPYOUT(%eax)
1908 1911
1909 1912 2: movl $-1, %eax
1910 1913 ret
1911 1914 SET_SIZE(copyout)
1912 1915
1913 1916 #undef ARG_UADDR
1914 1917 #undef ARG_KADDR
1915 1918
1916 1919 #endif /* __i386 */
1917 1920 #endif /* __lint */
1918 1921
1919 1922 #if defined(__lint)
1920 1923
1921 1924 /* ARGSUSED */
1922 1925 int
1923 1926 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached)
1924 1927 { return (0); }
1925 1928
1926 1929 #else /* __lint */
1927 1930
1928 1931 #if defined(__amd64)
1929 1932
1930 1933 ENTRY(xcopyout_nta)
1931 1934 pushq %rbp
1932 1935 movq %rsp, %rbp
1933 1936 subq $24, %rsp
1934 1937
1935 1938 /*
1936 1939 * save args in case we trap and need to rerun as a copyop
1937 1940 */
1938 1941 movq %rdi, (%rsp)
1939 1942 movq %rsi, 0x8(%rsp)
1940 1943 movq %rdx, 0x10(%rsp)
1941 1944
1942 1945 movq kernelbase(%rip), %rax
1943 1946 #ifdef DEBUG
1944 1947 cmpq %rax, %rdi /* %rdi = kaddr */
1945 1948 jnb 1f
1946 1949 leaq .xcopyout_panic_msg(%rip), %rdi
1947 1950 xorl %eax, %eax
1948 1951 call panic
1949 1952 1:
1950 1953 #endif
1951 1954 movq %gs:CPU_THREAD, %r9
1952 1955 cmpq %rax, %rsi /* test uaddr < kernelbase */
1953 1956 jae 4f
1954 1957
1955 1958 cmpq $0, %rcx /* No non-temporal access? */
1956 1959 /*
1957 1960 * pass lofault value as 4th argument to do_copy_fault
1958 1961 */
↓ open down ↓ |
52 lines elided |
↑ open up ↑ |
1959 1962 leaq _xcopyout_err(%rip), %rcx
1960 1963 jnz 6f
1961 1964 /*
1962 1965 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1963 1966 */
1964 1967 cmpq $XCOPY_MIN_SIZE, %rdx
1965 1968 jae 5f
1966 1969 6:
1967 1970 SMAP_DISABLE_INSTR(4)
1968 1971 jmp do_copy_fault
1969 -
1972 +
1970 1973 /*
1971 1974 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1972 1975 * count is COUNT_ALIGN_SIZE aligned.
1973 1976 */
1974 1977 5:
1975 1978 movq %rdi, %r10
1976 1979 orq %rsi, %r10
1977 1980 andq $NTA_ALIGN_MASK, %r10
1978 1981 orq %rdx, %r10
1979 1982 andq $COUNT_ALIGN_MASK, %r10
1980 - jnz 6b
1983 + jnz 6b
1981 1984 leaq _xcopyout_nta_err(%rip), %rcx
1982 1985 SMAP_DISABLE_INSTR(5)
1983 1986 call do_copy_fault_nta
1984 1987 SMAP_ENABLE_INSTR(5)
1985 1988 ret
1986 1989
1987 1990 4:
1988 1991 movl $EFAULT, %eax
1989 1992 jmp 3f
1990 1993
1991 1994 /*
1992 1995 * A fault during do_copy_fault or do_copy_fault_nta is
1993 1996 * indicated through an errno value in %rax and we iret from the
1994 1997 * trap handler to here.
1995 1998 */
1996 1999 _xcopyout_err:
1997 2000 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1998 2001 _xcopyout_nta_err:
1999 2002 SMAP_ENABLE_INSTR(6)
2000 2003 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2001 2004 3:
2002 2005 movq T_COPYOPS(%r9), %r8
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
2003 2006 cmpq $0, %r8
2004 2007 jz 2f
2005 2008
2006 2009 /*
2007 2010 * reload args for the copyop
2008 2011 */
2009 2012 movq (%rsp), %rdi
2010 2013 movq 0x8(%rsp), %rsi
2011 2014 movq 0x10(%rsp), %rdx
2012 2015 leave
2013 - jmp *CP_XCOPYOUT(%r8)
2016 + movq CP_XCOPYOUT(%r8), %r8
2017 + INDIRECT_JMP_REG(r8)
2014 2018
2015 2019 2: leave
2016 2020 ret
2017 2021 SET_SIZE(xcopyout_nta)
2018 2022
2019 2023 #elif defined(__i386)
2020 2024
2021 2025 #define ARG_KADDR 4
2022 2026 #define ARG_UADDR 8
2023 2027 #define ARG_COUNT 12
2024 2028 #define ARG_CACHED 16
2025 2029
2026 2030 ENTRY(xcopyout_nta)
2027 2031 movl kernelbase, %ecx
2028 2032 lea _xcopyout_err, %eax
2029 2033 movl %gs:CPU_THREAD, %edx
2030 2034 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2031 2035 jae 4f
2032 2036
2033 2037 cmpl $0, use_sse_copy /* no sse support */
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
2034 2038 jz do_copy_fault
2035 2039
2036 2040 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */
2037 2041 jnz do_copy_fault
2038 2042
2039 2043 /*
2040 2044 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
2041 2045 */
2042 2046 cmpl $XCOPY_MIN_SIZE, %edx
2043 2047 jb do_copy_fault
2044 -
2048 +
2045 2049 /*
2046 2050 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2047 2051 * count is COUNT_ALIGN_SIZE aligned.
2048 2052 */
2049 2053 movl ARG_UADDR(%esp), %ecx
2050 2054 orl ARG_KADDR(%esp), %ecx
2051 2055 andl $NTA_ALIGN_MASK, %ecx
2052 2056 orl ARG_COUNT(%esp), %ecx
2053 2057 andl $COUNT_ALIGN_MASK, %ecx
2054 2058 jnz do_copy_fault
2055 2059 jmp do_copy_fault_nta
2056 2060
2057 2061 4:
2058 2062 movl $EFAULT, %eax
2059 2063 jmp 3f
2060 2064
2061 2065 /*
2062 2066 * A fault during do_copy_fault or do_copy_fault_nta is
2063 2067 * indicated through an errno value in %eax and we iret from the
2064 2068 * trap handler to here.
2065 2069 */
2066 2070 _xcopyout_err:
2067 2071 / restore the original lofault
2068 2072 popl %ecx
2069 2073 popl %edi
2070 2074 movl %ecx, T_LOFAULT(%edx) / original lofault
2071 2075 popl %esi
2072 2076 popl %ebp
2073 2077 3:
2074 2078 cmpl $0, T_COPYOPS(%edx)
2075 2079 jz 2f
2076 2080 movl T_COPYOPS(%edx), %eax
2077 2081 jmp *CP_XCOPYOUT(%eax)
2078 2082
2079 2083 2: rep; ret /* use 2 byte return instruction when branch target */
2080 2084 /* AMD Software Optimization Guide - Section 6.2 */
2081 2085 SET_SIZE(xcopyout_nta)
2082 2086
2083 2087 #undef ARG_UADDR
2084 2088 #undef ARG_KADDR
2085 2089 #undef ARG_COUNT
2086 2090 #undef ARG_CACHED
2087 2091
2088 2092 #endif /* __i386 */
2089 2093 #endif /* __lint */
2090 2094
2091 2095 /*
2092 2096 * Copy a null terminated string from one point to another in
2093 2097 * the kernel address space.
2094 2098 */
2095 2099
2096 2100 #if defined(__lint)
2097 2101
2098 2102 /* ARGSUSED */
2099 2103 int
2100 2104 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied)
2101 2105 { return (0); }
2102 2106
2103 2107 #else /* __lint */
2104 2108
2105 2109 #if defined(__amd64)
2106 2110
2107 2111 ENTRY(copystr)
2108 2112 pushq %rbp
2109 2113 movq %rsp, %rbp
2110 2114 #ifdef DEBUG
2111 2115 movq kernelbase(%rip), %rax
2112 2116 cmpq %rax, %rdi /* %rdi = from */
2113 2117 jb 0f
2114 2118 cmpq %rax, %rsi /* %rsi = to */
2115 2119 jnb 1f
2116 2120 0: leaq .copystr_panic_msg(%rip), %rdi
2117 2121 xorl %eax, %eax
2118 2122 call panic
2119 2123 1:
2120 2124 #endif
2121 2125 movq %gs:CPU_THREAD, %r9
2122 2126 movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */
2123 2127 /* 5th argument to do_copystr */
2124 2128 xorl %r10d,%r10d /* pass smap restore need in %r10d */
2125 2129 /* as a non-ABI 6th arg */
2126 2130 do_copystr:
2127 2131 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
2128 2132 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
2129 2133 movq %r8, T_LOFAULT(%r9) /* new lofault */
2130 2134
2131 2135 movq %rdx, %r8 /* save maxlength */
2132 2136
2133 2137 cmpq $0, %rdx /* %rdx = maxlength */
2134 2138 je copystr_enametoolong /* maxlength == 0 */
2135 2139
2136 2140 copystr_loop:
2137 2141 decq %r8
2138 2142 movb (%rdi), %al
2139 2143 incq %rdi
2140 2144 movb %al, (%rsi)
2141 2145 incq %rsi
2142 2146 cmpb $0, %al
2143 2147 je copystr_null /* null char */
2144 2148 cmpq $0, %r8
2145 2149 jne copystr_loop
2146 2150
2147 2151 copystr_enametoolong:
2148 2152 movl $ENAMETOOLONG, %eax
2149 2153 jmp copystr_out
2150 2154
2151 2155 copystr_null:
2152 2156 xorl %eax, %eax /* no error */
2153 2157
2154 2158 copystr_out:
2155 2159 cmpq $0, %rcx /* want length? */
2156 2160 je copystr_smap /* no */
2157 2161 subq %r8, %rdx /* compute length and store it */
2158 2162 movq %rdx, (%rcx)
2159 2163
2160 2164 copystr_smap:
2161 2165 cmpl $0, %r10d
2162 2166 jz copystr_done
2163 2167 SMAP_ENABLE_INSTR(7)
2164 2168
2165 2169 copystr_done:
2166 2170 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2167 2171 leave
2168 2172 ret
2169 2173 SET_SIZE(copystr)
2170 2174
2171 2175 #elif defined(__i386)
2172 2176
2173 2177 #define ARG_FROM 8
2174 2178 #define ARG_TO 12
2175 2179 #define ARG_MAXLEN 16
2176 2180 #define ARG_LENCOPIED 20
2177 2181
2178 2182 ENTRY(copystr)
2179 2183 #ifdef DEBUG
2180 2184 pushl %ebp
2181 2185 movl %esp, %ebp
2182 2186 movl kernelbase, %eax
2183 2187 cmpl %eax, ARG_FROM(%esp)
2184 2188 jb 0f
2185 2189 cmpl %eax, ARG_TO(%esp)
2186 2190 jnb 1f
2187 2191 0: pushl $.copystr_panic_msg
2188 2192 call panic
2189 2193 1: popl %ebp
↓ open down ↓ |
135 lines elided |
↑ open up ↑ |
2190 2194 #endif
2191 2195 /* get the current lofault address */
2192 2196 movl %gs:CPU_THREAD, %eax
2193 2197 movl T_LOFAULT(%eax), %eax
2194 2198 do_copystr:
2195 2199 pushl %ebp /* setup stack frame */
2196 2200 movl %esp, %ebp
2197 2201 pushl %ebx /* save registers */
2198 2202 pushl %edi
2199 2203
2200 - movl %gs:CPU_THREAD, %ebx
2204 + movl %gs:CPU_THREAD, %ebx
2201 2205 movl T_LOFAULT(%ebx), %edi
2202 2206 pushl %edi /* save the current lofault */
2203 2207 movl %eax, T_LOFAULT(%ebx) /* new lofault */
2204 2208
2205 2209 movl ARG_MAXLEN(%ebp), %ecx
2206 2210 cmpl $0, %ecx
2207 2211 je copystr_enametoolong /* maxlength == 0 */
2208 2212
2209 2213 movl ARG_FROM(%ebp), %ebx /* source address */
2210 2214 movl ARG_TO(%ebp), %edx /* destination address */
2211 2215
2212 2216 copystr_loop:
2213 2217 decl %ecx
2214 2218 movb (%ebx), %al
2215 - incl %ebx
2219 + incl %ebx
2216 2220 movb %al, (%edx)
2217 2221 incl %edx
2218 2222 cmpb $0, %al
2219 2223 je copystr_null /* null char */
2220 2224 cmpl $0, %ecx
2221 2225 jne copystr_loop
2222 2226
2223 2227 copystr_enametoolong:
2224 2228 movl $ENAMETOOLONG, %eax
2225 2229 jmp copystr_out
2226 2230
2227 2231 copystr_null:
2228 2232 xorl %eax, %eax /* no error */
2229 2233
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
2230 2234 copystr_out:
2231 2235 cmpl $0, ARG_LENCOPIED(%ebp) /* want length? */
2232 2236 je copystr_done /* no */
2233 2237 movl ARG_MAXLEN(%ebp), %edx
2234 2238 subl %ecx, %edx /* compute length and store it */
2235 2239 movl ARG_LENCOPIED(%ebp), %ecx
2236 2240 movl %edx, (%ecx)
2237 2241
2238 2242 copystr_done:
2239 2243 popl %edi
2240 - movl %gs:CPU_THREAD, %ebx
2244 + movl %gs:CPU_THREAD, %ebx
2241 2245 movl %edi, T_LOFAULT(%ebx) /* restore the original lofault */
2242 2246
2243 2247 popl %edi
2244 2248 popl %ebx
2245 2249 popl %ebp
2246 - ret
2250 + ret
2247 2251 SET_SIZE(copystr)
2248 2252
2249 2253 #undef ARG_FROM
2250 2254 #undef ARG_TO
2251 2255 #undef ARG_MAXLEN
2252 2256 #undef ARG_LENCOPIED
2253 2257
2254 2258 #endif /* __i386 */
2255 2259 #endif /* __lint */
2256 2260
2257 2261 /*
2258 2262 * Copy a null terminated string from the user address space into
2259 2263 * the kernel address space.
2260 2264 */
2261 2265
2262 2266 #if defined(__lint)
2263 2267
2264 2268 /* ARGSUSED */
2265 2269 int
2266 2270 copyinstr(const char *uaddr, char *kaddr, size_t maxlength,
2267 2271 size_t *lencopied)
2268 2272 { return (0); }
2269 2273
2270 2274 #else /* __lint */
2271 2275
2272 2276 #if defined(__amd64)
2273 2277
2274 2278 ENTRY(copyinstr)
2275 2279 pushq %rbp
2276 2280 movq %rsp, %rbp
2277 2281 subq $32, %rsp
2278 2282
2279 2283 /*
2280 2284 * save args in case we trap and need to rerun as a copyop
2281 2285 */
2282 2286 movq %rdi, (%rsp)
2283 2287 movq %rsi, 0x8(%rsp)
2284 2288 movq %rdx, 0x10(%rsp)
2285 2289 movq %rcx, 0x18(%rsp)
2286 2290
2287 2291 movq kernelbase(%rip), %rax
2288 2292 #ifdef DEBUG
2289 2293 cmpq %rax, %rsi /* %rsi = kaddr */
2290 2294 jnb 1f
2291 2295 leaq .copyinstr_panic_msg(%rip), %rdi
2292 2296 xorl %eax, %eax
2293 2297 call panic
2294 2298 1:
2295 2299 #endif
2296 2300 /*
2297 2301 * pass lofault value as 5th argument to do_copystr
2298 2302 * do_copystr expects whether or not we need smap in %r10d
2299 2303 */
2300 2304 leaq _copyinstr_error(%rip), %r8
2301 2305 movl $1, %r10d
2302 2306
2303 2307 cmpq %rax, %rdi /* test uaddr < kernelbase */
2304 2308 jae 4f
2305 2309 SMAP_DISABLE_INSTR(6)
2306 2310 jmp do_copystr
2307 2311 4:
2308 2312 movq %gs:CPU_THREAD, %r9
2309 2313 jmp 3f
2310 2314
2311 2315 _copyinstr_error:
2312 2316 SMAP_ENABLE_INSTR(8)
2313 2317 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2314 2318 3:
2315 2319 movq T_COPYOPS(%r9), %rax
2316 2320 cmpq $0, %rax
↓ open down ↓ |
60 lines elided |
↑ open up ↑ |
2317 2321 jz 2f
2318 2322
2319 2323 /*
2320 2324 * reload args for the copyop
2321 2325 */
2322 2326 movq (%rsp), %rdi
2323 2327 movq 0x8(%rsp), %rsi
2324 2328 movq 0x10(%rsp), %rdx
2325 2329 movq 0x18(%rsp), %rcx
2326 2330 leave
2327 - jmp *CP_COPYINSTR(%rax)
2328 -
2331 + movq CP_COPYINSTR(%rax), %rax
2332 + INDIRECT_JMP_REG(rax)
2333 +
2329 2334 2: movl $EFAULT, %eax /* return EFAULT */
2330 2335 leave
2331 2336 ret
2332 2337 SET_SIZE(copyinstr)
2333 2338
2334 2339 #elif defined(__i386)
2335 2340
2336 2341 #define ARG_UADDR 4
2337 2342 #define ARG_KADDR 8
2338 2343
2339 2344 ENTRY(copyinstr)
2340 2345 movl kernelbase, %ecx
2341 2346 #ifdef DEBUG
2342 2347 cmpl %ecx, ARG_KADDR(%esp)
2343 2348 jnb 1f
2344 2349 pushl %ebp
2345 2350 movl %esp, %ebp
2346 2351 pushl $.copyinstr_panic_msg
2347 2352 call panic
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
2348 2353 1:
2349 2354 #endif
2350 2355 lea _copyinstr_error, %eax
2351 2356 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2352 2357 jb do_copystr
2353 2358 movl %gs:CPU_THREAD, %edx
2354 2359 jmp 3f
2355 2360
2356 2361 _copyinstr_error:
2357 2362 popl %edi
2358 - movl %gs:CPU_THREAD, %edx
2363 + movl %gs:CPU_THREAD, %edx
2359 2364 movl %edi, T_LOFAULT(%edx) /* original lofault */
2360 2365
2361 2366 popl %edi
2362 2367 popl %ebx
2363 2368 popl %ebp
2364 2369 3:
2365 2370 movl T_COPYOPS(%edx), %eax
2366 2371 cmpl $0, %eax
2367 2372 jz 2f
2368 2373 jmp *CP_COPYINSTR(%eax)
2369 -
2374 +
2370 2375 2: movl $EFAULT, %eax /* return EFAULT */
2371 2376 ret
2372 2377 SET_SIZE(copyinstr)
2373 2378
2374 2379 #undef ARG_UADDR
2375 2380 #undef ARG_KADDR
2376 2381
2377 2382 #endif /* __i386 */
2378 2383 #endif /* __lint */
2379 2384
2380 2385 /*
2381 2386 * Copy a null terminated string from the kernel
2382 2387 * address space to the user address space.
2383 2388 */
2384 2389
2385 2390 #if defined(__lint)
2386 2391
2387 2392 /* ARGSUSED */
2388 2393 int
2389 2394 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength,
2390 2395 size_t *lencopied)
2391 2396 { return (0); }
2392 2397
2393 2398 #else /* __lint */
2394 2399
2395 2400 #if defined(__amd64)
2396 2401
2397 2402 ENTRY(copyoutstr)
2398 2403 pushq %rbp
2399 2404 movq %rsp, %rbp
2400 2405 subq $32, %rsp
2401 2406
2402 2407 /*
2403 2408 * save args in case we trap and need to rerun as a copyop
2404 2409 */
2405 2410 movq %rdi, (%rsp)
2406 2411 movq %rsi, 0x8(%rsp)
2407 2412 movq %rdx, 0x10(%rsp)
2408 2413 movq %rcx, 0x18(%rsp)
2409 2414
2410 2415 movq kernelbase(%rip), %rax
2411 2416 #ifdef DEBUG
2412 2417 cmpq %rax, %rdi /* %rdi = kaddr */
2413 2418 jnb 1f
2414 2419 leaq .copyoutstr_panic_msg(%rip), %rdi
2415 2420 jmp call_panic /* setup stack and call panic */
2416 2421 1:
2417 2422 #endif
2418 2423 /*
2419 2424 * pass lofault value as 5th argument to do_copystr
2420 2425 * pass one as 6th argument to do_copystr in %r10d
2421 2426 */
2422 2427 leaq _copyoutstr_error(%rip), %r8
2423 2428 movl $1, %r10d
2424 2429
2425 2430 cmpq %rax, %rsi /* test uaddr < kernelbase */
2426 2431 jae 4f
2427 2432 SMAP_DISABLE_INSTR(7)
2428 2433 jmp do_copystr
2429 2434 4:
2430 2435 movq %gs:CPU_THREAD, %r9
2431 2436 jmp 3f
2432 2437
2433 2438 _copyoutstr_error:
2434 2439 SMAP_ENABLE_INSTR(9)
2435 2440 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2436 2441 3:
2437 2442 movq T_COPYOPS(%r9), %rax
2438 2443 cmpq $0, %rax
↓ open down ↓ |
59 lines elided |
↑ open up ↑ |
2439 2444 jz 2f
2440 2445
2441 2446 /*
2442 2447 * reload args for the copyop
2443 2448 */
2444 2449 movq (%rsp), %rdi
2445 2450 movq 0x8(%rsp), %rsi
2446 2451 movq 0x10(%rsp), %rdx
2447 2452 movq 0x18(%rsp), %rcx
2448 2453 leave
2449 - jmp *CP_COPYOUTSTR(%rax)
2450 -
2454 + movq CP_COPYOUTSTR(%rax), %rax
2455 + INDIRECT_JMP_REG(rax)
2456 +
2451 2457 2: movl $EFAULT, %eax /* return EFAULT */
2452 2458 leave
2453 2459 ret
2454 - SET_SIZE(copyoutstr)
2455 -
2460 + SET_SIZE(copyoutstr)
2461 +
2456 2462 #elif defined(__i386)
2457 2463
2458 2464 #define ARG_KADDR 4
2459 2465 #define ARG_UADDR 8
2460 2466
2461 2467 ENTRY(copyoutstr)
2462 2468 movl kernelbase, %ecx
2463 2469 #ifdef DEBUG
2464 2470 cmpl %ecx, ARG_KADDR(%esp)
2465 2471 jnb 1f
2466 2472 pushl %ebp
2467 2473 movl %esp, %ebp
2468 2474 pushl $.copyoutstr_panic_msg
2469 2475 call panic
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
2470 2476 1:
2471 2477 #endif
2472 2478 lea _copyoutstr_error, %eax
2473 2479 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2474 2480 jb do_copystr
2475 2481 movl %gs:CPU_THREAD, %edx
2476 2482 jmp 3f
2477 2483
2478 2484 _copyoutstr_error:
2479 2485 popl %edi
2480 - movl %gs:CPU_THREAD, %edx
2486 + movl %gs:CPU_THREAD, %edx
2481 2487 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */
2482 2488
2483 2489 popl %edi
2484 2490 popl %ebx
2485 2491 popl %ebp
2486 2492 3:
2487 2493 movl T_COPYOPS(%edx), %eax
2488 2494 cmpl $0, %eax
2489 2495 jz 2f
2490 2496 jmp *CP_COPYOUTSTR(%eax)
2491 2497
2492 2498 2: movl $EFAULT, %eax /* return EFAULT */
2493 2499 ret
2494 2500 SET_SIZE(copyoutstr)
2495 -
2501 +
2496 2502 #undef ARG_KADDR
2497 2503 #undef ARG_UADDR
2498 2504
2499 2505 #endif /* __i386 */
2500 2506 #endif /* __lint */
2501 2507
2502 2508 /*
2503 2509 * Since all of the fuword() variants are so similar, we have a macro to spit
2504 2510 * them out. This allows us to create DTrace-unobservable functions easily.
2505 2511 */
2506 -
2512 +
2507 2513 #if defined(__lint)
2508 2514
2509 2515 #if defined(__amd64)
2510 2516
2511 2517 /* ARGSUSED */
2512 2518 int
2513 2519 fuword64(const void *addr, uint64_t *dst)
2514 2520 { return (0); }
2515 2521
2516 2522 #endif
2517 2523
2518 2524 /* ARGSUSED */
2519 2525 int
2520 2526 fuword32(const void *addr, uint32_t *dst)
2521 2527 { return (0); }
2522 2528
2523 2529 /* ARGSUSED */
2524 2530 int
2525 2531 fuword16(const void *addr, uint16_t *dst)
2526 2532 { return (0); }
2527 2533
2528 2534 /* ARGSUSED */
2529 2535 int
2530 2536 fuword8(const void *addr, uint8_t *dst)
2531 2537 { return (0); }
2532 2538
2533 2539 #else /* __lint */
2534 2540
2535 2541 #if defined(__amd64)
2536 2542
2537 2543 /*
2538 2544 * Note that we don't save and reload the arguments here
2539 2545 * because their values are not altered in the copy path.
2540 2546 * Additionally, when successful, the smap_enable jmp will
2541 2547 * actually return us to our original caller.
2542 2548 */
2543 2549
2544 2550 #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2545 2551 ENTRY(NAME) \
2546 2552 movq %gs:CPU_THREAD, %r9; \
2547 2553 cmpq kernelbase(%rip), %rdi; \
2548 2554 jae 1f; \
2549 2555 leaq _flt_/**/NAME, %rdx; \
2550 2556 movq %rdx, T_LOFAULT(%r9); \
2551 2557 SMAP_DISABLE_INSTR(DISNUM) \
2552 2558 INSTR (%rdi), REG; \
2553 2559 movq $0, T_LOFAULT(%r9); \
2554 2560 INSTR REG, (%rsi); \
↓ open down ↓ |
38 lines elided |
↑ open up ↑ |
2555 2561 xorl %eax, %eax; \
2556 2562 SMAP_ENABLE_INSTR(EN1) \
2557 2563 ret; \
2558 2564 _flt_/**/NAME: \
2559 2565 SMAP_ENABLE_INSTR(EN2) \
2560 2566 movq $0, T_LOFAULT(%r9); \
2561 2567 1: \
2562 2568 movq T_COPYOPS(%r9), %rax; \
2563 2569 cmpq $0, %rax; \
2564 2570 jz 2f; \
2565 - jmp *COPYOP(%rax); \
2571 + movq COPYOP(%rax), %rax; \
2572 + INDIRECT_JMP_REG(rax); \
2566 2573 2: \
2567 2574 movl $-1, %eax; \
2568 2575 ret; \
2569 2576 SET_SIZE(NAME)
2570 -
2577 +
2571 2578 FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2572 2579 FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2573 2580 FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2574 2581 FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2575 2582
2576 2583 #elif defined(__i386)
2577 2584
2578 2585 #define FUWORD(NAME, INSTR, REG, COPYOP) \
2579 2586 ENTRY(NAME) \
2580 2587 movl %gs:CPU_THREAD, %ecx; \
2581 2588 movl kernelbase, %eax; \
2582 2589 cmpl %eax, 4(%esp); \
2583 2590 jae 1f; \
2584 2591 lea _flt_/**/NAME, %edx; \
2585 2592 movl %edx, T_LOFAULT(%ecx); \
2586 2593 movl 4(%esp), %eax; \
2587 2594 movl 8(%esp), %edx; \
2588 2595 INSTR (%eax), REG; \
2589 2596 movl $0, T_LOFAULT(%ecx); \
2590 2597 INSTR REG, (%edx); \
2591 2598 xorl %eax, %eax; \
2592 2599 ret; \
2593 2600 _flt_/**/NAME: \
2594 2601 movl $0, T_LOFAULT(%ecx); \
2595 2602 1: \
2596 2603 movl T_COPYOPS(%ecx), %eax; \
2597 2604 cmpl $0, %eax; \
2598 2605 jz 2f; \
2599 2606 jmp *COPYOP(%eax); \
2600 2607 2: \
2601 2608 movl $-1, %eax; \
2602 2609 ret; \
2603 2610 SET_SIZE(NAME)
2604 2611
2605 2612 FUWORD(fuword32, movl, %eax, CP_FUWORD32)
2606 2613 FUWORD(fuword16, movw, %ax, CP_FUWORD16)
2607 2614 FUWORD(fuword8, movb, %al, CP_FUWORD8)
2608 2615
2609 2616 #endif /* __i386 */
2610 2617
2611 2618 #undef FUWORD
2612 2619
2613 2620 #endif /* __lint */
2614 2621
2615 2622 /*
2616 2623 * Set user word.
2617 2624 */
2618 2625
2619 2626 #if defined(__lint)
2620 2627
2621 2628 #if defined(__amd64)
2622 2629
2623 2630 /* ARGSUSED */
2624 2631 int
2625 2632 suword64(void *addr, uint64_t value)
2626 2633 { return (0); }
2627 2634
2628 2635 #endif
2629 2636
2630 2637 /* ARGSUSED */
2631 2638 int
2632 2639 suword32(void *addr, uint32_t value)
2633 2640 { return (0); }
2634 2641
2635 2642 /* ARGSUSED */
2636 2643 int
2637 2644 suword16(void *addr, uint16_t value)
2638 2645 { return (0); }
2639 2646
2640 2647 /* ARGSUSED */
2641 2648 int
2642 2649 suword8(void *addr, uint8_t value)
2643 2650 { return (0); }
2644 2651
2645 2652 #else /* lint */
2646 2653
2647 2654 #if defined(__amd64)
2648 2655
2649 2656 /*
2650 2657 * Note that we don't save and reload the arguments here
2651 2658 * because their values are not altered in the copy path.
2652 2659 */
2653 2660
2654 2661 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2655 2662 ENTRY(NAME) \
2656 2663 movq %gs:CPU_THREAD, %r9; \
2657 2664 cmpq kernelbase(%rip), %rdi; \
2658 2665 jae 1f; \
2659 2666 leaq _flt_/**/NAME, %rdx; \
2660 2667 SMAP_DISABLE_INSTR(DISNUM) \
2661 2668 movq %rdx, T_LOFAULT(%r9); \
2662 2669 INSTR REG, (%rdi); \
2663 2670 movq $0, T_LOFAULT(%r9); \
↓ open down ↓ |
83 lines elided |
↑ open up ↑ |
2664 2671 xorl %eax, %eax; \
2665 2672 SMAP_ENABLE_INSTR(EN1) \
2666 2673 ret; \
2667 2674 _flt_/**/NAME: \
2668 2675 SMAP_ENABLE_INSTR(EN2) \
2669 2676 movq $0, T_LOFAULT(%r9); \
2670 2677 1: \
2671 2678 movq T_COPYOPS(%r9), %rax; \
2672 2679 cmpq $0, %rax; \
2673 2680 jz 3f; \
2674 - jmp *COPYOP(%rax); \
2681 + movq COPYOP(%rax), %rax; \
2682 + INDIRECT_JMP_REG(rax); \
2675 2683 3: \
2676 2684 movl $-1, %eax; \
2677 2685 ret; \
2678 2686 SET_SIZE(NAME)
2679 2687
2680 2688 SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2681 2689 SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2682 2690 SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2683 2691 SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2684 2692
2685 2693 #elif defined(__i386)
2686 2694
2687 2695 #define SUWORD(NAME, INSTR, REG, COPYOP) \
2688 2696 ENTRY(NAME) \
2689 2697 movl %gs:CPU_THREAD, %ecx; \
2690 2698 movl kernelbase, %eax; \
2691 2699 cmpl %eax, 4(%esp); \
2692 2700 jae 1f; \
2693 2701 lea _flt_/**/NAME, %edx; \
2694 2702 movl %edx, T_LOFAULT(%ecx); \
2695 2703 movl 4(%esp), %eax; \
2696 2704 movl 8(%esp), %edx; \
2697 2705 INSTR REG, (%eax); \
2698 2706 movl $0, T_LOFAULT(%ecx); \
2699 2707 xorl %eax, %eax; \
2700 2708 ret; \
2701 2709 _flt_/**/NAME: \
2702 2710 movl $0, T_LOFAULT(%ecx); \
2703 2711 1: \
2704 2712 movl T_COPYOPS(%ecx), %eax; \
2705 2713 cmpl $0, %eax; \
2706 2714 jz 3f; \
2707 2715 movl COPYOP(%eax), %ecx; \
2708 2716 jmp *%ecx; \
2709 2717 3: \
2710 2718 movl $-1, %eax; \
2711 2719 ret; \
2712 2720 SET_SIZE(NAME)
2713 2721
2714 2722 SUWORD(suword32, movl, %edx, CP_SUWORD32)
2715 2723 SUWORD(suword16, movw, %dx, CP_SUWORD16)
2716 2724 SUWORD(suword8, movb, %dl, CP_SUWORD8)
2717 2725
2718 2726 #endif /* __i386 */
2719 2727
2720 2728 #undef SUWORD
2721 2729
2722 2730 #endif /* __lint */
2723 2731
2724 2732 #if defined(__lint)
2725 2733
2726 2734 #if defined(__amd64)
2727 2735
2728 2736 /*ARGSUSED*/
2729 2737 void
2730 2738 fuword64_noerr(const void *addr, uint64_t *dst)
2731 2739 {}
2732 2740
2733 2741 #endif
2734 2742
2735 2743 /*ARGSUSED*/
2736 2744 void
2737 2745 fuword32_noerr(const void *addr, uint32_t *dst)
2738 2746 {}
2739 2747
2740 2748 /*ARGSUSED*/
2741 2749 void
2742 2750 fuword8_noerr(const void *addr, uint8_t *dst)
2743 2751 {}
2744 2752
2745 2753 /*ARGSUSED*/
2746 2754 void
2747 2755 fuword16_noerr(const void *addr, uint16_t *dst)
2748 2756 {}
2749 2757
2750 2758 #else /* __lint */
2751 2759
2752 2760 #if defined(__amd64)
2753 2761
2754 2762 #define FUWORD_NOERR(NAME, INSTR, REG) \
2755 2763 ENTRY(NAME) \
2756 2764 cmpq kernelbase(%rip), %rdi; \
2757 2765 cmovnbq kernelbase(%rip), %rdi; \
2758 2766 INSTR (%rdi), REG; \
2759 2767 INSTR REG, (%rsi); \
2760 2768 ret; \
2761 2769 SET_SIZE(NAME)
2762 2770
2763 2771 FUWORD_NOERR(fuword64_noerr, movq, %rax)
2764 2772 FUWORD_NOERR(fuword32_noerr, movl, %eax)
2765 2773 FUWORD_NOERR(fuword16_noerr, movw, %ax)
2766 2774 FUWORD_NOERR(fuword8_noerr, movb, %al)
2767 2775
2768 2776 #elif defined(__i386)
2769 2777
2770 2778 #define FUWORD_NOERR(NAME, INSTR, REG) \
2771 2779 ENTRY(NAME) \
2772 2780 movl 4(%esp), %eax; \
2773 2781 cmpl kernelbase, %eax; \
2774 2782 jb 1f; \
2775 2783 movl kernelbase, %eax; \
2776 2784 1: movl 8(%esp), %edx; \
2777 2785 INSTR (%eax), REG; \
2778 2786 INSTR REG, (%edx); \
2779 2787 ret; \
2780 2788 SET_SIZE(NAME)
2781 2789
2782 2790 FUWORD_NOERR(fuword32_noerr, movl, %ecx)
2783 2791 FUWORD_NOERR(fuword16_noerr, movw, %cx)
2784 2792 FUWORD_NOERR(fuword8_noerr, movb, %cl)
2785 2793
2786 2794 #endif /* __i386 */
2787 2795
2788 2796 #undef FUWORD_NOERR
2789 2797
2790 2798 #endif /* __lint */
2791 2799
2792 2800 #if defined(__lint)
2793 2801
2794 2802 #if defined(__amd64)
2795 2803
2796 2804 /*ARGSUSED*/
2797 2805 void
2798 2806 suword64_noerr(void *addr, uint64_t value)
2799 2807 {}
2800 2808
2801 2809 #endif
2802 2810
2803 2811 /*ARGSUSED*/
2804 2812 void
2805 2813 suword32_noerr(void *addr, uint32_t value)
2806 2814 {}
2807 2815
2808 2816 /*ARGSUSED*/
2809 2817 void
2810 2818 suword16_noerr(void *addr, uint16_t value)
2811 2819 {}
2812 2820
2813 2821 /*ARGSUSED*/
2814 2822 void
2815 2823 suword8_noerr(void *addr, uint8_t value)
2816 2824 {}
2817 2825
2818 2826 #else /* lint */
2819 2827
2820 2828 #if defined(__amd64)
2821 2829
2822 2830 #define SUWORD_NOERR(NAME, INSTR, REG) \
2823 2831 ENTRY(NAME) \
2824 2832 cmpq kernelbase(%rip), %rdi; \
2825 2833 cmovnbq kernelbase(%rip), %rdi; \
2826 2834 INSTR REG, (%rdi); \
2827 2835 ret; \
2828 2836 SET_SIZE(NAME)
2829 2837
2830 2838 SUWORD_NOERR(suword64_noerr, movq, %rsi)
2831 2839 SUWORD_NOERR(suword32_noerr, movl, %esi)
2832 2840 SUWORD_NOERR(suword16_noerr, movw, %si)
2833 2841 SUWORD_NOERR(suword8_noerr, movb, %sil)
2834 2842
2835 2843 #elif defined(__i386)
2836 2844
2837 2845 #define SUWORD_NOERR(NAME, INSTR, REG) \
2838 2846 ENTRY(NAME) \
2839 2847 movl 4(%esp), %eax; \
2840 2848 cmpl kernelbase, %eax; \
2841 2849 jb 1f; \
2842 2850 movl kernelbase, %eax; \
2843 2851 1: \
2844 2852 movl 8(%esp), %edx; \
2845 2853 INSTR REG, (%eax); \
2846 2854 ret; \
2847 2855 SET_SIZE(NAME)
2848 2856
2849 2857 SUWORD_NOERR(suword32_noerr, movl, %edx)
2850 2858 SUWORD_NOERR(suword16_noerr, movw, %dx)
2851 2859 SUWORD_NOERR(suword8_noerr, movb, %dl)
2852 2860
2853 2861 #endif /* __i386 */
2854 2862
2855 2863 #undef SUWORD_NOERR
2856 2864
2857 2865 #endif /* lint */
2858 2866
2859 2867
2860 2868 #if defined(__lint)
2861 2869
2862 2870 /*ARGSUSED*/
2863 2871 int
2864 2872 subyte(void *addr, uchar_t value)
2865 2873 { return (0); }
2866 2874
2867 2875 /*ARGSUSED*/
2868 2876 void
2869 2877 subyte_noerr(void *addr, uchar_t value)
2870 2878 {}
2871 2879
2872 2880 /*ARGSUSED*/
2873 2881 int
2874 2882 fulword(const void *addr, ulong_t *valuep)
2875 2883 { return (0); }
2876 2884
2877 2885 /*ARGSUSED*/
2878 2886 void
2879 2887 fulword_noerr(const void *addr, ulong_t *valuep)
2880 2888 {}
2881 2889
2882 2890 /*ARGSUSED*/
2883 2891 int
2884 2892 sulword(void *addr, ulong_t valuep)
2885 2893 { return (0); }
2886 2894
2887 2895 /*ARGSUSED*/
2888 2896 void
2889 2897 sulword_noerr(void *addr, ulong_t valuep)
2890 2898 {}
2891 2899
2892 2900 #else
2893 2901
2894 2902 .weak subyte
2895 2903 subyte=suword8
2896 2904 .weak subyte_noerr
2897 2905 subyte_noerr=suword8_noerr
2898 2906
2899 2907 #if defined(__amd64)
2900 2908
2901 2909 .weak fulword
2902 2910 fulword=fuword64
2903 2911 .weak fulword_noerr
2904 2912 fulword_noerr=fuword64_noerr
2905 2913 .weak sulword
2906 2914 sulword=suword64
2907 2915 .weak sulword_noerr
2908 2916 sulword_noerr=suword64_noerr
2909 2917
2910 2918 #elif defined(__i386)
2911 2919
2912 2920 .weak fulword
2913 2921 fulword=fuword32
2914 2922 .weak fulword_noerr
2915 2923 fulword_noerr=fuword32_noerr
2916 2924 .weak sulword
2917 2925 sulword=suword32
2918 2926 .weak sulword_noerr
2919 2927 sulword_noerr=suword32_noerr
2920 2928
2921 2929 #endif /* __i386 */
2922 2930
2923 2931 #endif /* __lint */
2924 2932
2925 2933 #if defined(__lint)
2926 2934
2927 2935 /*
2928 2936 * Copy a block of storage - must not overlap (from + len <= to).
2929 2937 * No fault handler installed (to be called under on_fault())
2930 2938 */
2931 2939
2932 2940 /* ARGSUSED */
2933 2941 void
2934 2942 copyout_noerr(const void *kfrom, void *uto, size_t count)
2935 2943 {}
2936 2944
2937 2945 /* ARGSUSED */
2938 2946 void
2939 2947 copyin_noerr(const void *ufrom, void *kto, size_t count)
2940 2948 {}
2941 2949
2942 2950 /*
2943 2951 * Zero a block of storage in user space
2944 2952 */
2945 2953
2946 2954 /* ARGSUSED */
2947 2955 void
2948 2956 uzero(void *addr, size_t count)
2949 2957 {}
2950 2958
2951 2959 /*
2952 2960 * copy a block of storage in user space
2953 2961 */
2954 2962
2955 2963 /* ARGSUSED */
2956 2964 void
2957 2965 ucopy(const void *ufrom, void *uto, size_t ulength)
2958 2966 {}
2959 2967
2960 2968 /*
2961 2969 * copy a string in user space
2962 2970 */
2963 2971
2964 2972 /* ARGSUSED */
2965 2973 void
2966 2974 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied)
2967 2975 {}
2968 2976
2969 2977 #else /* __lint */
2970 2978
2971 2979 #if defined(__amd64)
2972 2980
2973 2981 ENTRY(copyin_noerr)
2974 2982 movq kernelbase(%rip), %rax
2975 2983 #ifdef DEBUG
2976 2984 cmpq %rax, %rsi /* %rsi = kto */
2977 2985 jae 1f
2978 2986 leaq .cpyin_ne_pmsg(%rip), %rdi
2979 2987 jmp call_panic /* setup stack and call panic */
2980 2988 1:
2981 2989 #endif
2982 2990 cmpq %rax, %rdi /* ufrom < kernelbase */
2983 2991 jb do_copy
2984 2992 movq %rax, %rdi /* force fault at kernelbase */
2985 2993 jmp do_copy
2986 2994 SET_SIZE(copyin_noerr)
2987 2995
2988 2996 ENTRY(copyout_noerr)
2989 2997 movq kernelbase(%rip), %rax
2990 2998 #ifdef DEBUG
2991 2999 cmpq %rax, %rdi /* %rdi = kfrom */
2992 3000 jae 1f
2993 3001 leaq .cpyout_ne_pmsg(%rip), %rdi
2994 3002 jmp call_panic /* setup stack and call panic */
2995 3003 1:
2996 3004 #endif
2997 3005 cmpq %rax, %rsi /* uto < kernelbase */
2998 3006 jb do_copy
2999 3007 movq %rax, %rsi /* force fault at kernelbase */
3000 3008 jmp do_copy
3001 3009 SET_SIZE(copyout_noerr)
3002 3010
3003 3011 ENTRY(uzero)
3004 3012 movq kernelbase(%rip), %rax
3005 3013 cmpq %rax, %rdi
3006 3014 jb do_zero
3007 3015 movq %rax, %rdi /* force fault at kernelbase */
3008 3016 jmp do_zero
3009 3017 SET_SIZE(uzero)
3010 3018
3011 3019 ENTRY(ucopy)
3012 3020 movq kernelbase(%rip), %rax
3013 3021 cmpq %rax, %rdi
3014 3022 cmovaeq %rax, %rdi /* force fault at kernelbase */
3015 3023 cmpq %rax, %rsi
3016 3024 cmovaeq %rax, %rsi /* force fault at kernelbase */
3017 3025 jmp do_copy
3018 3026 SET_SIZE(ucopy)
3019 3027
3020 3028 /*
3021 3029 * Note, the frame pointer is required here becuase do_copystr expects
3022 3030 * to be able to pop it off!
3023 3031 */
3024 3032 ENTRY(ucopystr)
3025 3033 pushq %rbp
3026 3034 movq %rsp, %rbp
3027 3035 movq kernelbase(%rip), %rax
3028 3036 cmpq %rax, %rdi
3029 3037 cmovaeq %rax, %rdi /* force fault at kernelbase */
3030 3038 cmpq %rax, %rsi
3031 3039 cmovaeq %rax, %rsi /* force fault at kernelbase */
3032 3040 /* do_copystr expects lofault address in %r8 */
3033 3041 /* do_copystr expects whether or not we need smap in %r10 */
3034 3042 xorl %r10d, %r10d
3035 3043 movq %gs:CPU_THREAD, %r8
3036 3044 movq T_LOFAULT(%r8), %r8
3037 3045 jmp do_copystr
3038 3046 SET_SIZE(ucopystr)
3039 3047
3040 3048 #elif defined(__i386)
3041 3049
3042 3050 ENTRY(copyin_noerr)
3043 3051 movl kernelbase, %eax
3044 3052 #ifdef DEBUG
3045 3053 cmpl %eax, 8(%esp)
3046 3054 jae 1f
3047 3055 pushl $.cpyin_ne_pmsg
3048 3056 call panic
3049 3057 1:
3050 3058 #endif
3051 3059 cmpl %eax, 4(%esp)
3052 3060 jb do_copy
3053 3061 movl %eax, 4(%esp) /* force fault at kernelbase */
3054 3062 jmp do_copy
3055 3063 SET_SIZE(copyin_noerr)
3056 3064
3057 3065 ENTRY(copyout_noerr)
3058 3066 movl kernelbase, %eax
3059 3067 #ifdef DEBUG
3060 3068 cmpl %eax, 4(%esp)
3061 3069 jae 1f
3062 3070 pushl $.cpyout_ne_pmsg
3063 3071 call panic
3064 3072 1:
3065 3073 #endif
3066 3074 cmpl %eax, 8(%esp)
3067 3075 jb do_copy
3068 3076 movl %eax, 8(%esp) /* force fault at kernelbase */
3069 3077 jmp do_copy
3070 3078 SET_SIZE(copyout_noerr)
3071 3079
3072 3080 ENTRY(uzero)
3073 3081 movl kernelbase, %eax
3074 3082 cmpl %eax, 4(%esp)
3075 3083 jb do_zero
3076 3084 movl %eax, 4(%esp) /* force fault at kernelbase */
3077 3085 jmp do_zero
3078 3086 SET_SIZE(uzero)
3079 3087
3080 3088 ENTRY(ucopy)
3081 3089 movl kernelbase, %eax
3082 3090 cmpl %eax, 4(%esp)
3083 3091 jb 1f
3084 3092 movl %eax, 4(%esp) /* force fault at kernelbase */
3085 3093 1:
3086 3094 cmpl %eax, 8(%esp)
3087 3095 jb do_copy
3088 3096 movl %eax, 8(%esp) /* force fault at kernelbase */
3089 3097 jmp do_copy
3090 3098 SET_SIZE(ucopy)
3091 3099
3092 3100 ENTRY(ucopystr)
3093 3101 movl kernelbase, %eax
3094 3102 cmpl %eax, 4(%esp)
3095 3103 jb 1f
3096 3104 movl %eax, 4(%esp) /* force fault at kernelbase */
3097 3105 1:
3098 3106 cmpl %eax, 8(%esp)
3099 3107 jb 2f
3100 3108 movl %eax, 8(%esp) /* force fault at kernelbase */
3101 3109 2:
3102 3110 /* do_copystr expects the lofault address in %eax */
3103 3111 movl %gs:CPU_THREAD, %eax
3104 3112 movl T_LOFAULT(%eax), %eax
3105 3113 jmp do_copystr
3106 3114 SET_SIZE(ucopystr)
3107 3115
3108 3116 #endif /* __i386 */
3109 3117
3110 3118 #ifdef DEBUG
3111 3119 .data
3112 3120 .kcopy_panic_msg:
3113 3121 .string "kcopy: arguments below kernelbase"
3114 3122 .bcopy_panic_msg:
3115 3123 .string "bcopy: arguments below kernelbase"
3116 3124 .kzero_panic_msg:
3117 3125 .string "kzero: arguments below kernelbase"
3118 3126 .bzero_panic_msg:
3119 3127 .string "bzero: arguments below kernelbase"
3120 3128 .copyin_panic_msg:
3121 3129 .string "copyin: kaddr argument below kernelbase"
3122 3130 .xcopyin_panic_msg:
3123 3131 .string "xcopyin: kaddr argument below kernelbase"
3124 3132 .copyout_panic_msg:
3125 3133 .string "copyout: kaddr argument below kernelbase"
3126 3134 .xcopyout_panic_msg:
3127 3135 .string "xcopyout: kaddr argument below kernelbase"
3128 3136 .copystr_panic_msg:
3129 3137 .string "copystr: arguments in user space"
3130 3138 .copyinstr_panic_msg:
3131 3139 .string "copyinstr: kaddr argument not in kernel address space"
3132 3140 .copyoutstr_panic_msg:
3133 3141 .string "copyoutstr: kaddr argument not in kernel address space"
3134 3142 .cpyin_ne_pmsg:
3135 3143 .string "copyin_noerr: argument not in kernel address space"
3136 3144 .cpyout_ne_pmsg:
3137 3145 .string "copyout_noerr: argument not in kernel address space"
3138 3146 #endif
3139 3147
3140 3148 #endif /* __lint */
3141 3149
3142 3150 /*
3143 3151 * These functions are used for SMAP, supervisor mode access protection. They
3144 3152 * are hotpatched to become real instructions when the system starts up which is
3145 3153 * done in mlsetup() as a part of enabling the other CR4 related features.
3146 3154 *
3147 3155 * Generally speaking, smap_disable() is a stac instruction and smap_enable is a
3148 3156 * clac instruction. It's safe to call these any number of times, and in fact,
3149 3157 * out of paranoia, the kernel will likely call it at several points.
3150 3158 */
3151 3159
3152 3160 #if defined(__lint)
3153 3161
3154 3162 void
3155 3163 smap_enable(void)
3156 3164 {}
3157 3165
3158 3166 void
3159 3167 smap_disable(void)
3160 3168 {}
3161 3169
3162 3170 #else
3163 3171
3164 3172 #if defined (__amd64) || defined(__i386)
3165 3173 ENTRY(smap_disable)
3166 3174 nop
3167 3175 nop
3168 3176 nop
3169 3177 ret
3170 3178 SET_SIZE(smap_disable)
3171 3179
3172 3180 ENTRY(smap_enable)
3173 3181 nop
3174 3182 nop
3175 3183 nop
↓ open down ↓ |
491 lines elided |
↑ open up ↑ |
3176 3184 ret
3177 3185 SET_SIZE(smap_enable)
3178 3186
3179 3187 #endif /* __amd64 || __i386 */
3180 3188
3181 3189 #endif /* __lint */
3182 3190
3183 3191 #ifndef __lint
3184 3192
3185 3193 .data
3186 -.align 4
3194 +.align 4
3187 3195 .globl _smap_enable_patch_count
3188 3196 .type _smap_enable_patch_count,@object
3189 3197 .size _smap_enable_patch_count, 4
3190 3198 _smap_enable_patch_count:
3191 3199 .long SMAP_ENABLE_COUNT
3192 3200
3193 3201 .globl _smap_disable_patch_count
3194 3202 .type _smap_disable_patch_count,@object
3195 3203 .size _smap_disable_patch_count, 4
3196 3204 _smap_disable_patch_count:
3197 3205 .long SMAP_DISABLE_COUNT
3198 3206
3199 3207 #endif /* __lint */
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX