Print this page
10908 Simplify SMAP relocations with krtld
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/ia32/ml/copy.s
+++ new/usr/src/uts/intel/ia32/ml/copy.s
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 2009, Intel Corporation
28 28 * All rights reserved.
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
29 29 */
30 30
31 31 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
32 32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
33 33 /* All Rights Reserved */
34 34
35 35 /* Copyright (c) 1987, 1988 Microsoft Corporation */
36 36 /* All Rights Reserved */
37 37
38 38 /*
39 - * Copyright 2016 Joyent, Inc.
39 + * Copyright (c) 2017 Joyent, Inc.
40 40 */
41 41
42 42 #include <sys/errno.h>
43 43 #include <sys/asm_linkage.h>
44 44
45 45 #if defined(__lint)
46 46 #include <sys/types.h>
47 47 #include <sys/systm.h>
48 48 #else /* __lint */
49 49 #include "assym.h"
50 50 #endif /* __lint */
51 51
52 52 #define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
53 53 #define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
54 54 /*
55 55 * Non-temopral access (NTA) alignment requirement
56 56 */
57 57 #define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */
58 58 #define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1)
59 59 #define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */
60 60 #define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1)
61 61
62 62 /*
63 63 * With the introduction of Broadwell, Intel has introduced supervisor mode
64 64 * access protection -- SMAP. SMAP forces the kernel to set certain bits to
65 65 * enable access of user pages (AC in rflags, defines as PS_ACHK in
66 66 * <sys/psw.h>). One of the challenges is that the implementation of many of the
67 67 * userland copy routines directly use the kernel ones. For example, copyin and
68 68 * copyout simply go and jump to the do_copy_fault label and traditionally let
69 69 * those deal with the return for them. In fact, changing that is a can of frame
70 70 * pointers.
71 71 *
72 72 * Rules and Constraints:
73 73 *
74 74 * 1. For anything that's not in copy.s, we have it do explicit calls to the
75 75 * smap related code. It usually is in a position where it is able to. This is
76 76 * restricted to the following three places: DTrace, resume() in swtch.s and
77 77 * on_fault/no_fault. If you want to add it somewhere else, we should be
78 78 * thinking twice.
79 79 *
80 80 * 2. We try to toggle this at the smallest window possible. This means that if
81 81 * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
82 82 * other function, we will always leave with SMAP enabled (the kernel cannot
83 83 * access user pages).
84 84 *
85 85 * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
86 86 * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
87 87 * which already takes care of ensuring that SMAP is enabled and disabled. Note
88 88 * this means that when under an on_fault()/no_fault() handler, one must not
89 89 * call the non-*_noeer() routines.
90 90 *
91 91 * 4. The first thing we should do after coming out of an lofault handler is to
92 92 * make sure that we call smap_enable again to ensure that we are safely
93 93 * protected, as more often than not, we will have disabled smap to get there.
94 94 *
95 95 * 5. The SMAP functions, smap_enable and smap_disable may not touch any
96 96 * registers beyond those done by the call and ret. These routines may be called
97 97 * from arbitrary contexts in copy.s where we have slightly more special ABIs in
98 98 * place.
99 99 *
100 100 * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
101 101 * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and
102 102 * smap_disable()). If the number of these is changed, you must update the
103 103 * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
104 104 *
105 105 * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is
106 106 * no known technical reason preventing it from being enabled.
107 107 *
108 108 * 8. Generally this .s file is processed by a K&R style cpp. This means that it
109 109 * really has a lot of feelings about whitespace. In particular, if you have a
110 110 * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
111 111 *
112 112 * 9. The smap_enable and smap_disable functions should not generally be called.
113 113 * They exist such that DTrace and on_trap() may use them, that's it.
114 114 *
115 115 * 10. In general, the kernel has its own value for rflags that gets used. This
116 116 * is maintained in a few different places which vary based on how the thread
117 117 * comes into existence and whether it's a user thread. In general, when the
118 118 * kernel takes a trap, it always will set ourselves to a known set of flags,
119 119 * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
120 120 * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
121 121 * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
122 122 * where that gets masked off.
123 123 */
124 124
125 125 /*
126 126 * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
127 127 * "rep smovq" for large sizes. Performance data shows that many calls to
128 128 * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
129 129 * these small sizes unrolled code is used. For medium sizes loops writing
130 130 * 64-bytes per loop are used. Transition points were determined experimentally.
131 131 */
132 132 #define BZERO_USE_REP (1024)
133 133 #define BCOPY_DFLT_REP (128)
134 134 #define BCOPY_NHM_REP (768)
135 135
136 136 /*
137 137 * Copy a block of storage, returning an error code if `from' or
138 138 * `to' takes a kernel pagefault which cannot be resolved.
139 139 * Returns errno value on pagefault error, 0 if all ok
140 140 */
141 141
142 142 /*
143 143 * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
144 144 * additional call instructions.
145 145 */
146 146 #if defined(__amd64)
147 147 #define SMAP_DISABLE_COUNT 16
148 148 #define SMAP_ENABLE_COUNT 26
149 149 #elif defined(__i386)
150 150 #define SMAP_DISABLE_COUNT 0
151 151 #define SMAP_ENABLE_COUNT 0
152 152 #endif
153 153
154 154 #define SMAP_DISABLE_INSTR(ITER) \
155 155 .globl _smap_disable_patch_/**/ITER; \
156 156 _smap_disable_patch_/**/ITER/**/:; \
157 157 nop; nop; nop;
158 158
159 159 #define SMAP_ENABLE_INSTR(ITER) \
160 160 .globl _smap_enable_patch_/**/ITER; \
161 161 _smap_enable_patch_/**/ITER/**/:; \
162 162 nop; nop; nop;
163 163
164 164 #if defined(__lint)
165 165
166 166 /* ARGSUSED */
167 167 int
168 168 kcopy(const void *from, void *to, size_t count)
169 169 { return (0); }
170 170
171 171 #else /* __lint */
172 172
173 173 .globl kernelbase
174 174 .globl postbootkernelbase
175 175
176 176 #if defined(__amd64)
177 177
178 178 ENTRY(kcopy)
179 179 pushq %rbp
180 180 movq %rsp, %rbp
181 181 #ifdef DEBUG
182 182 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
183 183 jb 0f
184 184 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
185 185 jnb 1f
186 186 0: leaq .kcopy_panic_msg(%rip), %rdi
187 187 xorl %eax, %eax
188 188 call panic
189 189 1:
190 190 #endif
191 191 /*
192 192 * pass lofault value as 4th argument to do_copy_fault
193 193 */
194 194 leaq _kcopy_copyerr(%rip), %rcx
195 195 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
196 196
197 197 do_copy_fault:
198 198 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
199 199 movq %rcx, T_LOFAULT(%r9) /* new lofault */
200 200 call bcopy_altentry
201 201 xorl %eax, %eax /* return 0 (success) */
202 202 SMAP_ENABLE_INSTR(0)
203 203
204 204 /*
205 205 * A fault during do_copy_fault is indicated through an errno value
206 206 * in %rax and we iretq from the trap handler to here.
207 207 */
208 208 _kcopy_copyerr:
209 209 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
210 210 leave
211 211 ret
212 212 SET_SIZE(kcopy)
213 213
214 214 #elif defined(__i386)
215 215
216 216 #define ARG_FROM 8
217 217 #define ARG_TO 12
218 218 #define ARG_COUNT 16
219 219
220 220 ENTRY(kcopy)
221 221 #ifdef DEBUG
222 222 pushl %ebp
223 223 movl %esp, %ebp
224 224 movl postbootkernelbase, %eax
225 225 cmpl %eax, ARG_FROM(%ebp)
226 226 jb 0f
227 227 cmpl %eax, ARG_TO(%ebp)
228 228 jnb 1f
229 229 0: pushl $.kcopy_panic_msg
230 230 call panic
231 231 1: popl %ebp
232 232 #endif
233 233 lea _kcopy_copyerr, %eax /* lofault value */
234 234 movl %gs:CPU_THREAD, %edx
235 235
236 236 do_copy_fault:
237 237 pushl %ebp
238 238 movl %esp, %ebp /* setup stack frame */
239 239 pushl %esi
240 240 pushl %edi /* save registers */
241 241
242 242 movl T_LOFAULT(%edx), %edi
243 243 pushl %edi /* save the current lofault */
244 244 movl %eax, T_LOFAULT(%edx) /* new lofault */
245 245
246 246 movl ARG_COUNT(%ebp), %ecx
247 247 movl ARG_FROM(%ebp), %esi
248 248 movl ARG_TO(%ebp), %edi
249 249 shrl $2, %ecx /* word count */
250 250 rep
251 251 smovl
252 252 movl ARG_COUNT(%ebp), %ecx
253 253 andl $3, %ecx /* bytes left over */
254 254 rep
255 255 smovb
256 256 xorl %eax, %eax
257 257
258 258 /*
259 259 * A fault during do_copy_fault is indicated through an errno value
260 260 * in %eax and we iret from the trap handler to here.
261 261 */
262 262 _kcopy_copyerr:
263 263 popl %ecx
264 264 popl %edi
265 265 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */
266 266 popl %esi
267 267 popl %ebp
268 268 ret
269 269 SET_SIZE(kcopy)
270 270
271 271 #undef ARG_FROM
272 272 #undef ARG_TO
273 273 #undef ARG_COUNT
274 274
275 275 #endif /* __i386 */
276 276 #endif /* __lint */
277 277
278 278 #if defined(__lint)
279 279
280 280 /*
281 281 * Copy a block of storage. Similar to kcopy but uses non-temporal
282 282 * instructions.
283 283 */
284 284
285 285 /* ARGSUSED */
286 286 int
287 287 kcopy_nta(const void *from, void *to, size_t count, int copy_cached)
288 288 { return (0); }
289 289
290 290 #else /* __lint */
291 291
292 292 #if defined(__amd64)
293 293
294 294 #define COPY_LOOP_INIT(src, dst, cnt) \
295 295 addq cnt, src; \
296 296 addq cnt, dst; \
297 297 shrq $3, cnt; \
298 298 neg cnt
299 299
300 300 /* Copy 16 bytes per loop. Uses %rax and %r8 */
301 301 #define COPY_LOOP_BODY(src, dst, cnt) \
302 302 prefetchnta 0x100(src, cnt, 8); \
303 303 movq (src, cnt, 8), %rax; \
304 304 movq 0x8(src, cnt, 8), %r8; \
305 305 movnti %rax, (dst, cnt, 8); \
306 306 movnti %r8, 0x8(dst, cnt, 8); \
307 307 addq $2, cnt
308 308
309 309 ENTRY(kcopy_nta)
310 310 pushq %rbp
311 311 movq %rsp, %rbp
312 312 #ifdef DEBUG
313 313 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
314 314 jb 0f
315 315 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
316 316 jnb 1f
317 317 0: leaq .kcopy_panic_msg(%rip), %rdi
318 318 xorl %eax, %eax
319 319 call panic
320 320 1:
321 321 #endif
322 322
323 323 movq %gs:CPU_THREAD, %r9
324 324 cmpq $0, %rcx /* No non-temporal access? */
325 325 /*
326 326 * pass lofault value as 4th argument to do_copy_fault
327 327 */
328 328 leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */
329 329 jnz do_copy_fault /* use regular access */
330 330 /*
331 331 * Make sure cnt is >= KCOPY_MIN_SIZE
332 332 */
333 333 cmpq $KCOPY_MIN_SIZE, %rdx
334 334 jb do_copy_fault
335 335
336 336 /*
337 337 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
338 338 * count is COUNT_ALIGN_SIZE aligned.
339 339 */
340 340 movq %rdi, %r10
341 341 orq %rsi, %r10
342 342 andq $NTA_ALIGN_MASK, %r10
343 343 orq %rdx, %r10
344 344 andq $COUNT_ALIGN_MASK, %r10
345 345 jnz do_copy_fault
346 346
347 347 ALTENTRY(do_copy_fault_nta)
348 348 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
349 349 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
350 350 movq %rcx, T_LOFAULT(%r9) /* new lofault */
351 351
352 352 /*
353 353 * COPY_LOOP_BODY uses %rax and %r8
354 354 */
355 355 COPY_LOOP_INIT(%rdi, %rsi, %rdx)
356 356 2: COPY_LOOP_BODY(%rdi, %rsi, %rdx)
357 357 jnz 2b
358 358
359 359 mfence
360 360 xorl %eax, %eax /* return 0 (success) */
361 361 SMAP_ENABLE_INSTR(1)
362 362
363 363 _kcopy_nta_copyerr:
364 364 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
365 365 leave
366 366 ret
367 367 SET_SIZE(do_copy_fault_nta)
368 368 SET_SIZE(kcopy_nta)
369 369
370 370 #elif defined(__i386)
371 371
372 372 #define ARG_FROM 8
373 373 #define ARG_TO 12
374 374 #define ARG_COUNT 16
375 375
376 376 #define COPY_LOOP_INIT(src, dst, cnt) \
377 377 addl cnt, src; \
378 378 addl cnt, dst; \
379 379 shrl $3, cnt; \
380 380 neg cnt
381 381
382 382 #define COPY_LOOP_BODY(src, dst, cnt) \
383 383 prefetchnta 0x100(src, cnt, 8); \
384 384 movl (src, cnt, 8), %esi; \
385 385 movnti %esi, (dst, cnt, 8); \
386 386 movl 0x4(src, cnt, 8), %esi; \
387 387 movnti %esi, 0x4(dst, cnt, 8); \
388 388 movl 0x8(src, cnt, 8), %esi; \
389 389 movnti %esi, 0x8(dst, cnt, 8); \
390 390 movl 0xc(src, cnt, 8), %esi; \
391 391 movnti %esi, 0xc(dst, cnt, 8); \
392 392 addl $2, cnt
393 393
394 394 /*
395 395 * kcopy_nta is not implemented for 32-bit as no performance
396 396 * improvement was shown. We simply jump directly to kcopy
397 397 * and discard the 4 arguments.
398 398 */
399 399 ENTRY(kcopy_nta)
400 400 jmp kcopy
401 401
402 402 lea _kcopy_nta_copyerr, %eax /* lofault value */
403 403 ALTENTRY(do_copy_fault_nta)
404 404 pushl %ebp
405 405 movl %esp, %ebp /* setup stack frame */
406 406 pushl %esi
407 407 pushl %edi
408 408
409 409 movl %gs:CPU_THREAD, %edx
410 410 movl T_LOFAULT(%edx), %edi
411 411 pushl %edi /* save the current lofault */
412 412 movl %eax, T_LOFAULT(%edx) /* new lofault */
413 413
414 414 /* COPY_LOOP_BODY needs to use %esi */
415 415 movl ARG_COUNT(%ebp), %ecx
416 416 movl ARG_FROM(%ebp), %edi
417 417 movl ARG_TO(%ebp), %eax
418 418 COPY_LOOP_INIT(%edi, %eax, %ecx)
419 419 1: COPY_LOOP_BODY(%edi, %eax, %ecx)
420 420 jnz 1b
421 421 mfence
422 422
423 423 xorl %eax, %eax
424 424 _kcopy_nta_copyerr:
425 425 popl %ecx
426 426 popl %edi
427 427 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */
428 428 popl %esi
429 429 leave
430 430 ret
431 431 SET_SIZE(do_copy_fault_nta)
432 432 SET_SIZE(kcopy_nta)
433 433
434 434 #undef ARG_FROM
435 435 #undef ARG_TO
436 436 #undef ARG_COUNT
437 437
438 438 #endif /* __i386 */
439 439 #endif /* __lint */
440 440
441 441 #if defined(__lint)
442 442
443 443 /* ARGSUSED */
444 444 void
445 445 bcopy(const void *from, void *to, size_t count)
446 446 {}
447 447
448 448 #else /* __lint */
449 449
450 450 #if defined(__amd64)
451 451
452 452 ENTRY(bcopy)
453 453 #ifdef DEBUG
454 454 orq %rdx, %rdx /* %rdx = count */
455 455 jz 1f
456 456 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
457 457 jb 0f
458 458 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
459 459 jnb 1f
460 460 0: leaq .bcopy_panic_msg(%rip), %rdi
461 461 jmp call_panic /* setup stack and call panic */
462 462 1:
463 463 #endif
464 464 /*
465 465 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
466 466 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
467 467 * uses these registers in future they must be saved and restored.
468 468 */
469 469 ALTENTRY(bcopy_altentry)
470 470 do_copy:
471 471 #define L(s) .bcopy/**/s
472 472 cmpq $0x50, %rdx /* 80 */
473 473 jae bcopy_ck_size
474 474
475 475 /*
476 476 * Performance data shows many caller's copy small buffers. So for
477 477 * best perf for these sizes unrolled code is used. Store data without
478 478 * worrying about alignment.
479 479 */
480 480 leaq L(fwdPxQx)(%rip), %r10
481 481 addq %rdx, %rdi
482 482 addq %rdx, %rsi
483 483 movslq (%r10,%rdx,4), %rcx
484 484 leaq (%rcx,%r10,1), %r10
485 485 jmpq *%r10
486 486
487 487 .p2align 4
488 488 L(fwdPxQx):
489 489 .int L(P0Q0)-L(fwdPxQx) /* 0 */
490 490 .int L(P1Q0)-L(fwdPxQx)
491 491 .int L(P2Q0)-L(fwdPxQx)
492 492 .int L(P3Q0)-L(fwdPxQx)
493 493 .int L(P4Q0)-L(fwdPxQx)
494 494 .int L(P5Q0)-L(fwdPxQx)
495 495 .int L(P6Q0)-L(fwdPxQx)
496 496 .int L(P7Q0)-L(fwdPxQx)
497 497
498 498 .int L(P0Q1)-L(fwdPxQx) /* 8 */
499 499 .int L(P1Q1)-L(fwdPxQx)
500 500 .int L(P2Q1)-L(fwdPxQx)
501 501 .int L(P3Q1)-L(fwdPxQx)
502 502 .int L(P4Q1)-L(fwdPxQx)
503 503 .int L(P5Q1)-L(fwdPxQx)
504 504 .int L(P6Q1)-L(fwdPxQx)
505 505 .int L(P7Q1)-L(fwdPxQx)
506 506
507 507 .int L(P0Q2)-L(fwdPxQx) /* 16 */
508 508 .int L(P1Q2)-L(fwdPxQx)
509 509 .int L(P2Q2)-L(fwdPxQx)
510 510 .int L(P3Q2)-L(fwdPxQx)
511 511 .int L(P4Q2)-L(fwdPxQx)
512 512 .int L(P5Q2)-L(fwdPxQx)
513 513 .int L(P6Q2)-L(fwdPxQx)
514 514 .int L(P7Q2)-L(fwdPxQx)
515 515
516 516 .int L(P0Q3)-L(fwdPxQx) /* 24 */
517 517 .int L(P1Q3)-L(fwdPxQx)
518 518 .int L(P2Q3)-L(fwdPxQx)
519 519 .int L(P3Q3)-L(fwdPxQx)
520 520 .int L(P4Q3)-L(fwdPxQx)
521 521 .int L(P5Q3)-L(fwdPxQx)
522 522 .int L(P6Q3)-L(fwdPxQx)
523 523 .int L(P7Q3)-L(fwdPxQx)
524 524
525 525 .int L(P0Q4)-L(fwdPxQx) /* 32 */
526 526 .int L(P1Q4)-L(fwdPxQx)
527 527 .int L(P2Q4)-L(fwdPxQx)
528 528 .int L(P3Q4)-L(fwdPxQx)
529 529 .int L(P4Q4)-L(fwdPxQx)
530 530 .int L(P5Q4)-L(fwdPxQx)
531 531 .int L(P6Q4)-L(fwdPxQx)
532 532 .int L(P7Q4)-L(fwdPxQx)
533 533
534 534 .int L(P0Q5)-L(fwdPxQx) /* 40 */
535 535 .int L(P1Q5)-L(fwdPxQx)
536 536 .int L(P2Q5)-L(fwdPxQx)
537 537 .int L(P3Q5)-L(fwdPxQx)
538 538 .int L(P4Q5)-L(fwdPxQx)
539 539 .int L(P5Q5)-L(fwdPxQx)
540 540 .int L(P6Q5)-L(fwdPxQx)
541 541 .int L(P7Q5)-L(fwdPxQx)
542 542
543 543 .int L(P0Q6)-L(fwdPxQx) /* 48 */
544 544 .int L(P1Q6)-L(fwdPxQx)
545 545 .int L(P2Q6)-L(fwdPxQx)
546 546 .int L(P3Q6)-L(fwdPxQx)
547 547 .int L(P4Q6)-L(fwdPxQx)
548 548 .int L(P5Q6)-L(fwdPxQx)
549 549 .int L(P6Q6)-L(fwdPxQx)
550 550 .int L(P7Q6)-L(fwdPxQx)
551 551
552 552 .int L(P0Q7)-L(fwdPxQx) /* 56 */
553 553 .int L(P1Q7)-L(fwdPxQx)
554 554 .int L(P2Q7)-L(fwdPxQx)
555 555 .int L(P3Q7)-L(fwdPxQx)
556 556 .int L(P4Q7)-L(fwdPxQx)
557 557 .int L(P5Q7)-L(fwdPxQx)
558 558 .int L(P6Q7)-L(fwdPxQx)
559 559 .int L(P7Q7)-L(fwdPxQx)
560 560
561 561 .int L(P0Q8)-L(fwdPxQx) /* 64 */
562 562 .int L(P1Q8)-L(fwdPxQx)
563 563 .int L(P2Q8)-L(fwdPxQx)
564 564 .int L(P3Q8)-L(fwdPxQx)
565 565 .int L(P4Q8)-L(fwdPxQx)
566 566 .int L(P5Q8)-L(fwdPxQx)
567 567 .int L(P6Q8)-L(fwdPxQx)
568 568 .int L(P7Q8)-L(fwdPxQx)
569 569
570 570 .int L(P0Q9)-L(fwdPxQx) /* 72 */
571 571 .int L(P1Q9)-L(fwdPxQx)
572 572 .int L(P2Q9)-L(fwdPxQx)
573 573 .int L(P3Q9)-L(fwdPxQx)
574 574 .int L(P4Q9)-L(fwdPxQx)
575 575 .int L(P5Q9)-L(fwdPxQx)
576 576 .int L(P6Q9)-L(fwdPxQx)
577 577 .int L(P7Q9)-L(fwdPxQx) /* 79 */
578 578
579 579 .p2align 4
580 580 L(P0Q9):
581 581 mov -0x48(%rdi), %rcx
582 582 mov %rcx, -0x48(%rsi)
583 583 L(P0Q8):
584 584 mov -0x40(%rdi), %r10
585 585 mov %r10, -0x40(%rsi)
586 586 L(P0Q7):
587 587 mov -0x38(%rdi), %r8
588 588 mov %r8, -0x38(%rsi)
589 589 L(P0Q6):
590 590 mov -0x30(%rdi), %rcx
591 591 mov %rcx, -0x30(%rsi)
592 592 L(P0Q5):
593 593 mov -0x28(%rdi), %r10
594 594 mov %r10, -0x28(%rsi)
595 595 L(P0Q4):
596 596 mov -0x20(%rdi), %r8
597 597 mov %r8, -0x20(%rsi)
598 598 L(P0Q3):
599 599 mov -0x18(%rdi), %rcx
600 600 mov %rcx, -0x18(%rsi)
601 601 L(P0Q2):
602 602 mov -0x10(%rdi), %r10
603 603 mov %r10, -0x10(%rsi)
604 604 L(P0Q1):
605 605 mov -0x8(%rdi), %r8
606 606 mov %r8, -0x8(%rsi)
607 607 L(P0Q0):
608 608 ret
609 609
610 610 .p2align 4
611 611 L(P1Q9):
612 612 mov -0x49(%rdi), %r8
613 613 mov %r8, -0x49(%rsi)
614 614 L(P1Q8):
615 615 mov -0x41(%rdi), %rcx
616 616 mov %rcx, -0x41(%rsi)
617 617 L(P1Q7):
618 618 mov -0x39(%rdi), %r10
619 619 mov %r10, -0x39(%rsi)
620 620 L(P1Q6):
621 621 mov -0x31(%rdi), %r8
622 622 mov %r8, -0x31(%rsi)
623 623 L(P1Q5):
624 624 mov -0x29(%rdi), %rcx
625 625 mov %rcx, -0x29(%rsi)
626 626 L(P1Q4):
627 627 mov -0x21(%rdi), %r10
628 628 mov %r10, -0x21(%rsi)
629 629 L(P1Q3):
630 630 mov -0x19(%rdi), %r8
631 631 mov %r8, -0x19(%rsi)
632 632 L(P1Q2):
633 633 mov -0x11(%rdi), %rcx
634 634 mov %rcx, -0x11(%rsi)
635 635 L(P1Q1):
636 636 mov -0x9(%rdi), %r10
637 637 mov %r10, -0x9(%rsi)
638 638 L(P1Q0):
639 639 movzbq -0x1(%rdi), %r8
640 640 mov %r8b, -0x1(%rsi)
641 641 ret
642 642
643 643 .p2align 4
644 644 L(P2Q9):
645 645 mov -0x4a(%rdi), %r8
646 646 mov %r8, -0x4a(%rsi)
647 647 L(P2Q8):
648 648 mov -0x42(%rdi), %rcx
649 649 mov %rcx, -0x42(%rsi)
650 650 L(P2Q7):
651 651 mov -0x3a(%rdi), %r10
652 652 mov %r10, -0x3a(%rsi)
653 653 L(P2Q6):
654 654 mov -0x32(%rdi), %r8
655 655 mov %r8, -0x32(%rsi)
656 656 L(P2Q5):
657 657 mov -0x2a(%rdi), %rcx
658 658 mov %rcx, -0x2a(%rsi)
659 659 L(P2Q4):
660 660 mov -0x22(%rdi), %r10
661 661 mov %r10, -0x22(%rsi)
662 662 L(P2Q3):
663 663 mov -0x1a(%rdi), %r8
664 664 mov %r8, -0x1a(%rsi)
665 665 L(P2Q2):
666 666 mov -0x12(%rdi), %rcx
667 667 mov %rcx, -0x12(%rsi)
668 668 L(P2Q1):
669 669 mov -0xa(%rdi), %r10
670 670 mov %r10, -0xa(%rsi)
671 671 L(P2Q0):
672 672 movzwq -0x2(%rdi), %r8
673 673 mov %r8w, -0x2(%rsi)
674 674 ret
675 675
676 676 .p2align 4
677 677 L(P3Q9):
678 678 mov -0x4b(%rdi), %r8
679 679 mov %r8, -0x4b(%rsi)
680 680 L(P3Q8):
681 681 mov -0x43(%rdi), %rcx
682 682 mov %rcx, -0x43(%rsi)
683 683 L(P3Q7):
684 684 mov -0x3b(%rdi), %r10
685 685 mov %r10, -0x3b(%rsi)
686 686 L(P3Q6):
687 687 mov -0x33(%rdi), %r8
688 688 mov %r8, -0x33(%rsi)
689 689 L(P3Q5):
690 690 mov -0x2b(%rdi), %rcx
691 691 mov %rcx, -0x2b(%rsi)
692 692 L(P3Q4):
693 693 mov -0x23(%rdi), %r10
694 694 mov %r10, -0x23(%rsi)
695 695 L(P3Q3):
696 696 mov -0x1b(%rdi), %r8
697 697 mov %r8, -0x1b(%rsi)
698 698 L(P3Q2):
699 699 mov -0x13(%rdi), %rcx
700 700 mov %rcx, -0x13(%rsi)
701 701 L(P3Q1):
702 702 mov -0xb(%rdi), %r10
703 703 mov %r10, -0xb(%rsi)
704 704 /*
705 705 * These trailing loads/stores have to do all their loads 1st,
706 706 * then do the stores.
707 707 */
708 708 L(P3Q0):
709 709 movzwq -0x3(%rdi), %r8
710 710 movzbq -0x1(%rdi), %r10
711 711 mov %r8w, -0x3(%rsi)
712 712 mov %r10b, -0x1(%rsi)
713 713 ret
714 714
715 715 .p2align 4
716 716 L(P4Q9):
717 717 mov -0x4c(%rdi), %r8
718 718 mov %r8, -0x4c(%rsi)
719 719 L(P4Q8):
720 720 mov -0x44(%rdi), %rcx
721 721 mov %rcx, -0x44(%rsi)
722 722 L(P4Q7):
723 723 mov -0x3c(%rdi), %r10
724 724 mov %r10, -0x3c(%rsi)
725 725 L(P4Q6):
726 726 mov -0x34(%rdi), %r8
727 727 mov %r8, -0x34(%rsi)
728 728 L(P4Q5):
729 729 mov -0x2c(%rdi), %rcx
730 730 mov %rcx, -0x2c(%rsi)
731 731 L(P4Q4):
732 732 mov -0x24(%rdi), %r10
733 733 mov %r10, -0x24(%rsi)
734 734 L(P4Q3):
735 735 mov -0x1c(%rdi), %r8
736 736 mov %r8, -0x1c(%rsi)
737 737 L(P4Q2):
738 738 mov -0x14(%rdi), %rcx
739 739 mov %rcx, -0x14(%rsi)
740 740 L(P4Q1):
741 741 mov -0xc(%rdi), %r10
742 742 mov %r10, -0xc(%rsi)
743 743 L(P4Q0):
744 744 mov -0x4(%rdi), %r8d
745 745 mov %r8d, -0x4(%rsi)
746 746 ret
747 747
748 748 .p2align 4
749 749 L(P5Q9):
750 750 mov -0x4d(%rdi), %r8
751 751 mov %r8, -0x4d(%rsi)
752 752 L(P5Q8):
753 753 mov -0x45(%rdi), %rcx
754 754 mov %rcx, -0x45(%rsi)
755 755 L(P5Q7):
756 756 mov -0x3d(%rdi), %r10
757 757 mov %r10, -0x3d(%rsi)
758 758 L(P5Q6):
759 759 mov -0x35(%rdi), %r8
760 760 mov %r8, -0x35(%rsi)
761 761 L(P5Q5):
762 762 mov -0x2d(%rdi), %rcx
763 763 mov %rcx, -0x2d(%rsi)
764 764 L(P5Q4):
765 765 mov -0x25(%rdi), %r10
766 766 mov %r10, -0x25(%rsi)
767 767 L(P5Q3):
768 768 mov -0x1d(%rdi), %r8
769 769 mov %r8, -0x1d(%rsi)
770 770 L(P5Q2):
771 771 mov -0x15(%rdi), %rcx
772 772 mov %rcx, -0x15(%rsi)
773 773 L(P5Q1):
774 774 mov -0xd(%rdi), %r10
775 775 mov %r10, -0xd(%rsi)
776 776 L(P5Q0):
777 777 mov -0x5(%rdi), %r8d
778 778 movzbq -0x1(%rdi), %r10
779 779 mov %r8d, -0x5(%rsi)
780 780 mov %r10b, -0x1(%rsi)
781 781 ret
782 782
783 783 .p2align 4
784 784 L(P6Q9):
785 785 mov -0x4e(%rdi), %r8
786 786 mov %r8, -0x4e(%rsi)
787 787 L(P6Q8):
788 788 mov -0x46(%rdi), %rcx
789 789 mov %rcx, -0x46(%rsi)
790 790 L(P6Q7):
791 791 mov -0x3e(%rdi), %r10
792 792 mov %r10, -0x3e(%rsi)
793 793 L(P6Q6):
794 794 mov -0x36(%rdi), %r8
795 795 mov %r8, -0x36(%rsi)
796 796 L(P6Q5):
797 797 mov -0x2e(%rdi), %rcx
798 798 mov %rcx, -0x2e(%rsi)
799 799 L(P6Q4):
800 800 mov -0x26(%rdi), %r10
801 801 mov %r10, -0x26(%rsi)
802 802 L(P6Q3):
803 803 mov -0x1e(%rdi), %r8
804 804 mov %r8, -0x1e(%rsi)
805 805 L(P6Q2):
806 806 mov -0x16(%rdi), %rcx
807 807 mov %rcx, -0x16(%rsi)
808 808 L(P6Q1):
809 809 mov -0xe(%rdi), %r10
810 810 mov %r10, -0xe(%rsi)
811 811 L(P6Q0):
812 812 mov -0x6(%rdi), %r8d
813 813 movzwq -0x2(%rdi), %r10
814 814 mov %r8d, -0x6(%rsi)
815 815 mov %r10w, -0x2(%rsi)
816 816 ret
817 817
818 818 .p2align 4
819 819 L(P7Q9):
820 820 mov -0x4f(%rdi), %r8
821 821 mov %r8, -0x4f(%rsi)
822 822 L(P7Q8):
823 823 mov -0x47(%rdi), %rcx
824 824 mov %rcx, -0x47(%rsi)
825 825 L(P7Q7):
826 826 mov -0x3f(%rdi), %r10
827 827 mov %r10, -0x3f(%rsi)
828 828 L(P7Q6):
829 829 mov -0x37(%rdi), %r8
830 830 mov %r8, -0x37(%rsi)
831 831 L(P7Q5):
832 832 mov -0x2f(%rdi), %rcx
833 833 mov %rcx, -0x2f(%rsi)
834 834 L(P7Q4):
835 835 mov -0x27(%rdi), %r10
836 836 mov %r10, -0x27(%rsi)
837 837 L(P7Q3):
838 838 mov -0x1f(%rdi), %r8
839 839 mov %r8, -0x1f(%rsi)
840 840 L(P7Q2):
841 841 mov -0x17(%rdi), %rcx
842 842 mov %rcx, -0x17(%rsi)
843 843 L(P7Q1):
844 844 mov -0xf(%rdi), %r10
845 845 mov %r10, -0xf(%rsi)
846 846 L(P7Q0):
847 847 mov -0x7(%rdi), %r8d
848 848 movzwq -0x3(%rdi), %r10
849 849 movzbq -0x1(%rdi), %rcx
850 850 mov %r8d, -0x7(%rsi)
851 851 mov %r10w, -0x3(%rsi)
852 852 mov %cl, -0x1(%rsi)
853 853 ret
854 854
855 855 /*
856 856 * For large sizes rep smovq is fastest.
857 857 * Transition point determined experimentally as measured on
858 858 * Intel Xeon processors (incl. Nehalem and previous generations) and
859 859 * AMD Opteron. The transition value is patched at boot time to avoid
860 860 * memory reference hit.
861 861 */
862 862 .globl bcopy_patch_start
863 863 bcopy_patch_start:
864 864 cmpq $BCOPY_NHM_REP, %rdx
865 865 .globl bcopy_patch_end
866 866 bcopy_patch_end:
867 867
868 868 .p2align 4
869 869 .globl bcopy_ck_size
870 870 bcopy_ck_size:
871 871 cmpq $BCOPY_DFLT_REP, %rdx
872 872 jae L(use_rep)
873 873
874 874 /*
875 875 * Align to a 8-byte boundary. Avoids penalties from unaligned stores
876 876 * as well as from stores spanning cachelines.
877 877 */
878 878 test $0x7, %rsi
879 879 jz L(aligned_loop)
880 880 test $0x1, %rsi
881 881 jz 2f
882 882 movzbq (%rdi), %r8
883 883 dec %rdx
884 884 inc %rdi
885 885 mov %r8b, (%rsi)
886 886 inc %rsi
887 887 2:
888 888 test $0x2, %rsi
889 889 jz 4f
890 890 movzwq (%rdi), %r8
891 891 sub $0x2, %rdx
892 892 add $0x2, %rdi
893 893 mov %r8w, (%rsi)
894 894 add $0x2, %rsi
895 895 4:
896 896 test $0x4, %rsi
897 897 jz L(aligned_loop)
898 898 mov (%rdi), %r8d
899 899 sub $0x4, %rdx
900 900 add $0x4, %rdi
901 901 mov %r8d, (%rsi)
902 902 add $0x4, %rsi
903 903
904 904 /*
905 905 * Copy 64-bytes per loop
906 906 */
907 907 .p2align 4
908 908 L(aligned_loop):
909 909 mov (%rdi), %r8
910 910 mov 0x8(%rdi), %r10
911 911 lea -0x40(%rdx), %rdx
912 912 mov %r8, (%rsi)
913 913 mov %r10, 0x8(%rsi)
914 914 mov 0x10(%rdi), %rcx
915 915 mov 0x18(%rdi), %r8
916 916 mov %rcx, 0x10(%rsi)
917 917 mov %r8, 0x18(%rsi)
918 918
919 919 cmp $0x40, %rdx
920 920 mov 0x20(%rdi), %r10
921 921 mov 0x28(%rdi), %rcx
922 922 mov %r10, 0x20(%rsi)
923 923 mov %rcx, 0x28(%rsi)
924 924 mov 0x30(%rdi), %r8
925 925 mov 0x38(%rdi), %r10
926 926 lea 0x40(%rdi), %rdi
927 927 mov %r8, 0x30(%rsi)
928 928 mov %r10, 0x38(%rsi)
929 929 lea 0x40(%rsi), %rsi
930 930 jae L(aligned_loop)
931 931
932 932 /*
933 933 * Copy remaining bytes (0-63)
934 934 */
935 935 L(do_remainder):
936 936 leaq L(fwdPxQx)(%rip), %r10
937 937 addq %rdx, %rdi
938 938 addq %rdx, %rsi
939 939 movslq (%r10,%rdx,4), %rcx
940 940 leaq (%rcx,%r10,1), %r10
941 941 jmpq *%r10
942 942
943 943 /*
944 944 * Use rep smovq. Clear remainder via unrolled code
945 945 */
946 946 .p2align 4
947 947 L(use_rep):
948 948 xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */
949 949 movq %rdx, %rcx /* %rcx = count */
950 950 shrq $3, %rcx /* 8-byte word count */
951 951 rep
952 952 smovq
953 953
954 954 xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */
955 955 andq $7, %rdx /* remainder */
956 956 jnz L(do_remainder)
957 957 ret
958 958 #undef L
959 959
960 960 #ifdef DEBUG
961 961 /*
962 962 * Setup frame on the run-time stack. The end of the input argument
963 963 * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
964 964 * always points to the end of the latest allocated stack frame.
965 965 * panic(const char *format, ...) is a varargs function. When a
966 966 * function taking variable arguments is called, %rax must be set
967 967 * to eight times the number of floating point parameters passed
968 968 * to the function in SSE registers.
969 969 */
970 970 call_panic:
971 971 pushq %rbp /* align stack properly */
972 972 movq %rsp, %rbp
973 973 xorl %eax, %eax /* no variable arguments */
974 974 call panic /* %rdi = format string */
975 975 #endif
976 976 SET_SIZE(bcopy_altentry)
977 977 SET_SIZE(bcopy)
978 978
979 979 #elif defined(__i386)
980 980
981 981 #define ARG_FROM 4
982 982 #define ARG_TO 8
983 983 #define ARG_COUNT 12
984 984
985 985 ENTRY(bcopy)
986 986 #ifdef DEBUG
987 987 movl ARG_COUNT(%esp), %eax
988 988 orl %eax, %eax
989 989 jz 1f
990 990 movl postbootkernelbase, %eax
991 991 cmpl %eax, ARG_FROM(%esp)
992 992 jb 0f
993 993 cmpl %eax, ARG_TO(%esp)
994 994 jnb 1f
995 995 0: pushl %ebp
996 996 movl %esp, %ebp
997 997 pushl $.bcopy_panic_msg
998 998 call panic
999 999 1:
1000 1000 #endif
1001 1001 do_copy:
1002 1002 movl %esi, %eax /* save registers */
1003 1003 movl %edi, %edx
1004 1004 movl ARG_COUNT(%esp), %ecx
1005 1005 movl ARG_FROM(%esp), %esi
1006 1006 movl ARG_TO(%esp), %edi
1007 1007
1008 1008 shrl $2, %ecx /* word count */
1009 1009 rep
1010 1010 smovl
1011 1011 movl ARG_COUNT(%esp), %ecx
1012 1012 andl $3, %ecx /* bytes left over */
1013 1013 rep
1014 1014 smovb
1015 1015 movl %eax, %esi /* restore registers */
1016 1016 movl %edx, %edi
1017 1017 ret
1018 1018 SET_SIZE(bcopy)
1019 1019
1020 1020 #undef ARG_COUNT
1021 1021 #undef ARG_FROM
1022 1022 #undef ARG_TO
1023 1023
1024 1024 #endif /* __i386 */
1025 1025 #endif /* __lint */
1026 1026
1027 1027
1028 1028 /*
1029 1029 * Zero a block of storage, returning an error code if we
1030 1030 * take a kernel pagefault which cannot be resolved.
1031 1031 * Returns errno value on pagefault error, 0 if all ok
1032 1032 */
1033 1033
1034 1034 #if defined(__lint)
1035 1035
1036 1036 /* ARGSUSED */
1037 1037 int
1038 1038 kzero(void *addr, size_t count)
1039 1039 { return (0); }
1040 1040
1041 1041 #else /* __lint */
1042 1042
1043 1043 #if defined(__amd64)
1044 1044
1045 1045 ENTRY(kzero)
1046 1046 #ifdef DEBUG
1047 1047 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
1048 1048 jnb 0f
1049 1049 leaq .kzero_panic_msg(%rip), %rdi
1050 1050 jmp call_panic /* setup stack and call panic */
1051 1051 0:
1052 1052 #endif
1053 1053 /*
1054 1054 * pass lofault value as 3rd argument for fault return
1055 1055 */
1056 1056 leaq _kzeroerr(%rip), %rdx
1057 1057
1058 1058 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
1059 1059 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
1060 1060 movq %rdx, T_LOFAULT(%r9) /* new lofault */
1061 1061 call bzero_altentry
1062 1062 xorl %eax, %eax
1063 1063 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
1064 1064 ret
1065 1065 /*
1066 1066 * A fault during bzero is indicated through an errno value
1067 1067 * in %rax when we iretq to here.
1068 1068 */
1069 1069 _kzeroerr:
1070 1070 addq $8, %rsp /* pop bzero_altentry call ret addr */
1071 1071 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
1072 1072 ret
1073 1073 SET_SIZE(kzero)
1074 1074
1075 1075 #elif defined(__i386)
1076 1076
1077 1077 #define ARG_ADDR 8
1078 1078 #define ARG_COUNT 12
1079 1079
1080 1080 ENTRY(kzero)
1081 1081 #ifdef DEBUG
1082 1082 pushl %ebp
1083 1083 movl %esp, %ebp
1084 1084 movl postbootkernelbase, %eax
1085 1085 cmpl %eax, ARG_ADDR(%ebp)
1086 1086 jnb 0f
1087 1087 pushl $.kzero_panic_msg
1088 1088 call panic
1089 1089 0: popl %ebp
1090 1090 #endif
1091 1091 lea _kzeroerr, %eax /* kzeroerr is lofault value */
1092 1092
1093 1093 pushl %ebp /* save stack base */
1094 1094 movl %esp, %ebp /* set new stack base */
1095 1095 pushl %edi /* save %edi */
1096 1096
1097 1097 mov %gs:CPU_THREAD, %edx
1098 1098 movl T_LOFAULT(%edx), %edi
1099 1099 pushl %edi /* save the current lofault */
1100 1100 movl %eax, T_LOFAULT(%edx) /* new lofault */
1101 1101
1102 1102 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */
1103 1103 movl ARG_ADDR(%ebp), %edi /* %edi <- address of bytes to clear */
1104 1104 shrl $2, %ecx /* Count of double words to zero */
1105 1105 xorl %eax, %eax /* sstol val */
1106 1106 rep
1107 1107 sstol /* %ecx contains words to clear (%eax=0) */
1108 1108
1109 1109 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */
1110 1110 andl $3, %ecx /* do mod 4 */
1111 1111 rep
1112 1112 sstob /* %ecx contains residual bytes to clear */
1113 1113
1114 1114 /*
1115 1115 * A fault during kzero is indicated through an errno value
1116 1116 * in %eax when we iret to here.
1117 1117 */
1118 1118 _kzeroerr:
1119 1119 popl %edi
1120 1120 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */
1121 1121 popl %edi
1122 1122 popl %ebp
1123 1123 ret
1124 1124 SET_SIZE(kzero)
1125 1125
1126 1126 #undef ARG_ADDR
1127 1127 #undef ARG_COUNT
1128 1128
1129 1129 #endif /* __i386 */
1130 1130 #endif /* __lint */
1131 1131
1132 1132 /*
1133 1133 * Zero a block of storage.
1134 1134 */
1135 1135
1136 1136 #if defined(__lint)
1137 1137
1138 1138 /* ARGSUSED */
1139 1139 void
1140 1140 bzero(void *addr, size_t count)
1141 1141 {}
1142 1142
1143 1143 #else /* __lint */
1144 1144
1145 1145 #if defined(__amd64)
1146 1146
1147 1147 ENTRY(bzero)
1148 1148 #ifdef DEBUG
1149 1149 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
1150 1150 jnb 0f
1151 1151 leaq .bzero_panic_msg(%rip), %rdi
1152 1152 jmp call_panic /* setup stack and call panic */
1153 1153 0:
1154 1154 #endif
1155 1155 ALTENTRY(bzero_altentry)
1156 1156 do_zero:
1157 1157 #define L(s) .bzero/**/s
1158 1158 xorl %eax, %eax
1159 1159
1160 1160 cmpq $0x50, %rsi /* 80 */
1161 1161 jae L(ck_align)
1162 1162
1163 1163 /*
1164 1164 * Performance data shows many caller's are zeroing small buffers. So
1165 1165 * for best perf for these sizes unrolled code is used. Store zeros
1166 1166 * without worrying about alignment.
1167 1167 */
1168 1168 leaq L(setPxQx)(%rip), %r10
1169 1169 addq %rsi, %rdi
1170 1170 movslq (%r10,%rsi,4), %rcx
1171 1171 leaq (%rcx,%r10,1), %r10
1172 1172 jmpq *%r10
1173 1173
1174 1174 .p2align 4
1175 1175 L(setPxQx):
1176 1176 .int L(P0Q0)-L(setPxQx) /* 0 */
1177 1177 .int L(P1Q0)-L(setPxQx)
1178 1178 .int L(P2Q0)-L(setPxQx)
1179 1179 .int L(P3Q0)-L(setPxQx)
1180 1180 .int L(P4Q0)-L(setPxQx)
1181 1181 .int L(P5Q0)-L(setPxQx)
1182 1182 .int L(P6Q0)-L(setPxQx)
1183 1183 .int L(P7Q0)-L(setPxQx)
1184 1184
1185 1185 .int L(P0Q1)-L(setPxQx) /* 8 */
1186 1186 .int L(P1Q1)-L(setPxQx)
1187 1187 .int L(P2Q1)-L(setPxQx)
1188 1188 .int L(P3Q1)-L(setPxQx)
1189 1189 .int L(P4Q1)-L(setPxQx)
1190 1190 .int L(P5Q1)-L(setPxQx)
1191 1191 .int L(P6Q1)-L(setPxQx)
1192 1192 .int L(P7Q1)-L(setPxQx)
1193 1193
1194 1194 .int L(P0Q2)-L(setPxQx) /* 16 */
1195 1195 .int L(P1Q2)-L(setPxQx)
1196 1196 .int L(P2Q2)-L(setPxQx)
1197 1197 .int L(P3Q2)-L(setPxQx)
1198 1198 .int L(P4Q2)-L(setPxQx)
1199 1199 .int L(P5Q2)-L(setPxQx)
1200 1200 .int L(P6Q2)-L(setPxQx)
1201 1201 .int L(P7Q2)-L(setPxQx)
1202 1202
1203 1203 .int L(P0Q3)-L(setPxQx) /* 24 */
1204 1204 .int L(P1Q3)-L(setPxQx)
1205 1205 .int L(P2Q3)-L(setPxQx)
1206 1206 .int L(P3Q3)-L(setPxQx)
1207 1207 .int L(P4Q3)-L(setPxQx)
1208 1208 .int L(P5Q3)-L(setPxQx)
1209 1209 .int L(P6Q3)-L(setPxQx)
1210 1210 .int L(P7Q3)-L(setPxQx)
1211 1211
1212 1212 .int L(P0Q4)-L(setPxQx) /* 32 */
1213 1213 .int L(P1Q4)-L(setPxQx)
1214 1214 .int L(P2Q4)-L(setPxQx)
1215 1215 .int L(P3Q4)-L(setPxQx)
1216 1216 .int L(P4Q4)-L(setPxQx)
1217 1217 .int L(P5Q4)-L(setPxQx)
1218 1218 .int L(P6Q4)-L(setPxQx)
1219 1219 .int L(P7Q4)-L(setPxQx)
1220 1220
1221 1221 .int L(P0Q5)-L(setPxQx) /* 40 */
1222 1222 .int L(P1Q5)-L(setPxQx)
1223 1223 .int L(P2Q5)-L(setPxQx)
1224 1224 .int L(P3Q5)-L(setPxQx)
1225 1225 .int L(P4Q5)-L(setPxQx)
1226 1226 .int L(P5Q5)-L(setPxQx)
1227 1227 .int L(P6Q5)-L(setPxQx)
1228 1228 .int L(P7Q5)-L(setPxQx)
1229 1229
1230 1230 .int L(P0Q6)-L(setPxQx) /* 48 */
1231 1231 .int L(P1Q6)-L(setPxQx)
1232 1232 .int L(P2Q6)-L(setPxQx)
1233 1233 .int L(P3Q6)-L(setPxQx)
1234 1234 .int L(P4Q6)-L(setPxQx)
1235 1235 .int L(P5Q6)-L(setPxQx)
1236 1236 .int L(P6Q6)-L(setPxQx)
1237 1237 .int L(P7Q6)-L(setPxQx)
1238 1238
1239 1239 .int L(P0Q7)-L(setPxQx) /* 56 */
1240 1240 .int L(P1Q7)-L(setPxQx)
1241 1241 .int L(P2Q7)-L(setPxQx)
1242 1242 .int L(P3Q7)-L(setPxQx)
1243 1243 .int L(P4Q7)-L(setPxQx)
1244 1244 .int L(P5Q7)-L(setPxQx)
1245 1245 .int L(P6Q7)-L(setPxQx)
1246 1246 .int L(P7Q7)-L(setPxQx)
1247 1247
1248 1248 .int L(P0Q8)-L(setPxQx) /* 64 */
1249 1249 .int L(P1Q8)-L(setPxQx)
1250 1250 .int L(P2Q8)-L(setPxQx)
1251 1251 .int L(P3Q8)-L(setPxQx)
1252 1252 .int L(P4Q8)-L(setPxQx)
1253 1253 .int L(P5Q8)-L(setPxQx)
1254 1254 .int L(P6Q8)-L(setPxQx)
1255 1255 .int L(P7Q8)-L(setPxQx)
1256 1256
1257 1257 .int L(P0Q9)-L(setPxQx) /* 72 */
1258 1258 .int L(P1Q9)-L(setPxQx)
1259 1259 .int L(P2Q9)-L(setPxQx)
1260 1260 .int L(P3Q9)-L(setPxQx)
1261 1261 .int L(P4Q9)-L(setPxQx)
1262 1262 .int L(P5Q9)-L(setPxQx)
1263 1263 .int L(P6Q9)-L(setPxQx)
1264 1264 .int L(P7Q9)-L(setPxQx) /* 79 */
1265 1265
1266 1266 .p2align 4
1267 1267 L(P0Q9): mov %rax, -0x48(%rdi)
1268 1268 L(P0Q8): mov %rax, -0x40(%rdi)
1269 1269 L(P0Q7): mov %rax, -0x38(%rdi)
1270 1270 L(P0Q6): mov %rax, -0x30(%rdi)
1271 1271 L(P0Q5): mov %rax, -0x28(%rdi)
1272 1272 L(P0Q4): mov %rax, -0x20(%rdi)
1273 1273 L(P0Q3): mov %rax, -0x18(%rdi)
1274 1274 L(P0Q2): mov %rax, -0x10(%rdi)
1275 1275 L(P0Q1): mov %rax, -0x8(%rdi)
1276 1276 L(P0Q0):
1277 1277 ret
1278 1278
1279 1279 .p2align 4
1280 1280 L(P1Q9): mov %rax, -0x49(%rdi)
1281 1281 L(P1Q8): mov %rax, -0x41(%rdi)
1282 1282 L(P1Q7): mov %rax, -0x39(%rdi)
1283 1283 L(P1Q6): mov %rax, -0x31(%rdi)
1284 1284 L(P1Q5): mov %rax, -0x29(%rdi)
1285 1285 L(P1Q4): mov %rax, -0x21(%rdi)
1286 1286 L(P1Q3): mov %rax, -0x19(%rdi)
1287 1287 L(P1Q2): mov %rax, -0x11(%rdi)
1288 1288 L(P1Q1): mov %rax, -0x9(%rdi)
1289 1289 L(P1Q0): mov %al, -0x1(%rdi)
1290 1290 ret
1291 1291
1292 1292 .p2align 4
1293 1293 L(P2Q9): mov %rax, -0x4a(%rdi)
1294 1294 L(P2Q8): mov %rax, -0x42(%rdi)
1295 1295 L(P2Q7): mov %rax, -0x3a(%rdi)
1296 1296 L(P2Q6): mov %rax, -0x32(%rdi)
1297 1297 L(P2Q5): mov %rax, -0x2a(%rdi)
1298 1298 L(P2Q4): mov %rax, -0x22(%rdi)
1299 1299 L(P2Q3): mov %rax, -0x1a(%rdi)
1300 1300 L(P2Q2): mov %rax, -0x12(%rdi)
1301 1301 L(P2Q1): mov %rax, -0xa(%rdi)
1302 1302 L(P2Q0): mov %ax, -0x2(%rdi)
1303 1303 ret
1304 1304
1305 1305 .p2align 4
1306 1306 L(P3Q9): mov %rax, -0x4b(%rdi)
1307 1307 L(P3Q8): mov %rax, -0x43(%rdi)
1308 1308 L(P3Q7): mov %rax, -0x3b(%rdi)
1309 1309 L(P3Q6): mov %rax, -0x33(%rdi)
1310 1310 L(P3Q5): mov %rax, -0x2b(%rdi)
1311 1311 L(P3Q4): mov %rax, -0x23(%rdi)
1312 1312 L(P3Q3): mov %rax, -0x1b(%rdi)
1313 1313 L(P3Q2): mov %rax, -0x13(%rdi)
1314 1314 L(P3Q1): mov %rax, -0xb(%rdi)
1315 1315 L(P3Q0): mov %ax, -0x3(%rdi)
1316 1316 mov %al, -0x1(%rdi)
1317 1317 ret
1318 1318
1319 1319 .p2align 4
1320 1320 L(P4Q9): mov %rax, -0x4c(%rdi)
1321 1321 L(P4Q8): mov %rax, -0x44(%rdi)
1322 1322 L(P4Q7): mov %rax, -0x3c(%rdi)
1323 1323 L(P4Q6): mov %rax, -0x34(%rdi)
1324 1324 L(P4Q5): mov %rax, -0x2c(%rdi)
1325 1325 L(P4Q4): mov %rax, -0x24(%rdi)
1326 1326 L(P4Q3): mov %rax, -0x1c(%rdi)
1327 1327 L(P4Q2): mov %rax, -0x14(%rdi)
1328 1328 L(P4Q1): mov %rax, -0xc(%rdi)
1329 1329 L(P4Q0): mov %eax, -0x4(%rdi)
1330 1330 ret
1331 1331
1332 1332 .p2align 4
1333 1333 L(P5Q9): mov %rax, -0x4d(%rdi)
1334 1334 L(P5Q8): mov %rax, -0x45(%rdi)
1335 1335 L(P5Q7): mov %rax, -0x3d(%rdi)
1336 1336 L(P5Q6): mov %rax, -0x35(%rdi)
1337 1337 L(P5Q5): mov %rax, -0x2d(%rdi)
1338 1338 L(P5Q4): mov %rax, -0x25(%rdi)
1339 1339 L(P5Q3): mov %rax, -0x1d(%rdi)
1340 1340 L(P5Q2): mov %rax, -0x15(%rdi)
1341 1341 L(P5Q1): mov %rax, -0xd(%rdi)
1342 1342 L(P5Q0): mov %eax, -0x5(%rdi)
1343 1343 mov %al, -0x1(%rdi)
1344 1344 ret
1345 1345
1346 1346 .p2align 4
1347 1347 L(P6Q9): mov %rax, -0x4e(%rdi)
1348 1348 L(P6Q8): mov %rax, -0x46(%rdi)
1349 1349 L(P6Q7): mov %rax, -0x3e(%rdi)
1350 1350 L(P6Q6): mov %rax, -0x36(%rdi)
1351 1351 L(P6Q5): mov %rax, -0x2e(%rdi)
1352 1352 L(P6Q4): mov %rax, -0x26(%rdi)
1353 1353 L(P6Q3): mov %rax, -0x1e(%rdi)
1354 1354 L(P6Q2): mov %rax, -0x16(%rdi)
1355 1355 L(P6Q1): mov %rax, -0xe(%rdi)
1356 1356 L(P6Q0): mov %eax, -0x6(%rdi)
1357 1357 mov %ax, -0x2(%rdi)
1358 1358 ret
1359 1359
1360 1360 .p2align 4
1361 1361 L(P7Q9): mov %rax, -0x4f(%rdi)
1362 1362 L(P7Q8): mov %rax, -0x47(%rdi)
1363 1363 L(P7Q7): mov %rax, -0x3f(%rdi)
1364 1364 L(P7Q6): mov %rax, -0x37(%rdi)
1365 1365 L(P7Q5): mov %rax, -0x2f(%rdi)
1366 1366 L(P7Q4): mov %rax, -0x27(%rdi)
1367 1367 L(P7Q3): mov %rax, -0x1f(%rdi)
1368 1368 L(P7Q2): mov %rax, -0x17(%rdi)
1369 1369 L(P7Q1): mov %rax, -0xf(%rdi)
1370 1370 L(P7Q0): mov %eax, -0x7(%rdi)
1371 1371 mov %ax, -0x3(%rdi)
1372 1372 mov %al, -0x1(%rdi)
1373 1373 ret
1374 1374
1375 1375 /*
1376 1376 * Align to a 16-byte boundary. Avoids penalties from unaligned stores
1377 1377 * as well as from stores spanning cachelines. Note 16-byte alignment
1378 1378 * is better in case where rep sstosq is used.
1379 1379 */
1380 1380 .p2align 4
1381 1381 L(ck_align):
1382 1382 test $0xf, %rdi
1383 1383 jz L(aligned_now)
1384 1384 test $1, %rdi
1385 1385 jz 2f
1386 1386 mov %al, (%rdi)
1387 1387 dec %rsi
1388 1388 lea 1(%rdi),%rdi
1389 1389 2:
1390 1390 test $2, %rdi
1391 1391 jz 4f
1392 1392 mov %ax, (%rdi)
1393 1393 sub $2, %rsi
1394 1394 lea 2(%rdi),%rdi
1395 1395 4:
1396 1396 test $4, %rdi
1397 1397 jz 8f
1398 1398 mov %eax, (%rdi)
1399 1399 sub $4, %rsi
1400 1400 lea 4(%rdi),%rdi
1401 1401 8:
1402 1402 test $8, %rdi
1403 1403 jz L(aligned_now)
1404 1404 mov %rax, (%rdi)
1405 1405 sub $8, %rsi
1406 1406 lea 8(%rdi),%rdi
1407 1407
1408 1408 /*
1409 1409 * For large sizes rep sstoq is fastest.
1410 1410 * Transition point determined experimentally as measured on
1411 1411 * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
1412 1412 */
1413 1413 L(aligned_now):
1414 1414 cmp $BZERO_USE_REP, %rsi
1415 1415 ja L(use_rep)
1416 1416
1417 1417 /*
1418 1418 * zero 64-bytes per loop
1419 1419 */
1420 1420 .p2align 4
1421 1421 L(bzero_loop):
1422 1422 leaq -0x40(%rsi), %rsi
1423 1423 cmpq $0x40, %rsi
1424 1424 movq %rax, (%rdi)
1425 1425 movq %rax, 0x8(%rdi)
1426 1426 movq %rax, 0x10(%rdi)
1427 1427 movq %rax, 0x18(%rdi)
1428 1428 movq %rax, 0x20(%rdi)
1429 1429 movq %rax, 0x28(%rdi)
1430 1430 movq %rax, 0x30(%rdi)
1431 1431 movq %rax, 0x38(%rdi)
1432 1432 leaq 0x40(%rdi), %rdi
1433 1433 jae L(bzero_loop)
1434 1434
1435 1435 /*
1436 1436 * Clear any remaining bytes..
1437 1437 */
1438 1438 9:
1439 1439 leaq L(setPxQx)(%rip), %r10
1440 1440 addq %rsi, %rdi
1441 1441 movslq (%r10,%rsi,4), %rcx
1442 1442 leaq (%rcx,%r10,1), %r10
1443 1443 jmpq *%r10
1444 1444
1445 1445 /*
1446 1446 * Use rep sstoq. Clear any remainder via unrolled code
1447 1447 */
1448 1448 .p2align 4
1449 1449 L(use_rep):
1450 1450 movq %rsi, %rcx /* get size in bytes */
1451 1451 shrq $3, %rcx /* count of 8-byte words to zero */
1452 1452 rep
1453 1453 sstoq /* %rcx = words to clear (%rax=0) */
1454 1454 andq $7, %rsi /* remaining bytes */
1455 1455 jnz 9b
1456 1456 ret
1457 1457 #undef L
1458 1458 SET_SIZE(bzero_altentry)
1459 1459 SET_SIZE(bzero)
1460 1460
1461 1461 #elif defined(__i386)
1462 1462
1463 1463 #define ARG_ADDR 4
1464 1464 #define ARG_COUNT 8
1465 1465
1466 1466 ENTRY(bzero)
1467 1467 #ifdef DEBUG
1468 1468 movl postbootkernelbase, %eax
1469 1469 cmpl %eax, ARG_ADDR(%esp)
1470 1470 jnb 0f
1471 1471 pushl %ebp
1472 1472 movl %esp, %ebp
1473 1473 pushl $.bzero_panic_msg
1474 1474 call panic
1475 1475 0:
1476 1476 #endif
1477 1477 do_zero:
1478 1478 movl %edi, %edx
1479 1479 movl ARG_COUNT(%esp), %ecx
1480 1480 movl ARG_ADDR(%esp), %edi
1481 1481 shrl $2, %ecx
1482 1482 xorl %eax, %eax
1483 1483 rep
1484 1484 sstol
1485 1485 movl ARG_COUNT(%esp), %ecx
1486 1486 andl $3, %ecx
1487 1487 rep
1488 1488 sstob
1489 1489 movl %edx, %edi
1490 1490 ret
1491 1491 SET_SIZE(bzero)
1492 1492
1493 1493 #undef ARG_ADDR
1494 1494 #undef ARG_COUNT
1495 1495
1496 1496 #endif /* __i386 */
1497 1497 #endif /* __lint */
1498 1498
1499 1499 /*
1500 1500 * Transfer data to and from user space -
1501 1501 * Note that these routines can cause faults
1502 1502 * It is assumed that the kernel has nothing at
1503 1503 * less than KERNELBASE in the virtual address space.
1504 1504 *
1505 1505 * Note that copyin(9F) and copyout(9F) are part of the
1506 1506 * DDI/DKI which specifies that they return '-1' on "errors."
1507 1507 *
1508 1508 * Sigh.
1509 1509 *
1510 1510 * So there's two extremely similar routines - xcopyin_nta() and
1511 1511 * xcopyout_nta() which return the errno that we've faithfully computed.
1512 1512 * This allows other callers (e.g. uiomove(9F)) to work correctly.
1513 1513 * Given that these are used pretty heavily, we expand the calling
1514 1514 * sequences inline for all flavours (rather than making wrappers).
1515 1515 */
1516 1516
1517 1517 /*
1518 1518 * Copy user data to kernel space.
1519 1519 */
1520 1520
1521 1521 #if defined(__lint)
1522 1522
1523 1523 /* ARGSUSED */
1524 1524 int
1525 1525 copyin(const void *uaddr, void *kaddr, size_t count)
1526 1526 { return (0); }
1527 1527
1528 1528 #else /* lint */
1529 1529
1530 1530 #if defined(__amd64)
1531 1531
1532 1532 ENTRY(copyin)
1533 1533 pushq %rbp
1534 1534 movq %rsp, %rbp
1535 1535 subq $24, %rsp
1536 1536
1537 1537 /*
1538 1538 * save args in case we trap and need to rerun as a copyop
1539 1539 */
1540 1540 movq %rdi, (%rsp)
1541 1541 movq %rsi, 0x8(%rsp)
1542 1542 movq %rdx, 0x10(%rsp)
1543 1543
1544 1544 movq kernelbase(%rip), %rax
1545 1545 #ifdef DEBUG
1546 1546 cmpq %rax, %rsi /* %rsi = kaddr */
1547 1547 jnb 1f
1548 1548 leaq .copyin_panic_msg(%rip), %rdi
1549 1549 xorl %eax, %eax
1550 1550 call panic
1551 1551 1:
1552 1552 #endif
1553 1553 /*
1554 1554 * pass lofault value as 4th argument to do_copy_fault
1555 1555 */
1556 1556 leaq _copyin_err(%rip), %rcx
1557 1557
1558 1558 movq %gs:CPU_THREAD, %r9
1559 1559 cmpq %rax, %rdi /* test uaddr < kernelbase */
1560 1560 jae 3f /* take copyop if uaddr > kernelbase */
1561 1561 SMAP_DISABLE_INSTR(0)
1562 1562 jmp do_copy_fault /* Takes care of leave for us */
1563 1563
1564 1564 _copyin_err:
1565 1565 SMAP_ENABLE_INSTR(2)
1566 1566 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1567 1567 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1568 1568 3:
1569 1569 movq T_COPYOPS(%r9), %rax
1570 1570 cmpq $0, %rax
1571 1571 jz 2f
1572 1572 /*
1573 1573 * reload args for the copyop
1574 1574 */
1575 1575 movq (%rsp), %rdi
1576 1576 movq 0x8(%rsp), %rsi
1577 1577 movq 0x10(%rsp), %rdx
1578 1578 leave
1579 1579 jmp *CP_COPYIN(%rax)
1580 1580
1581 1581 2: movl $-1, %eax
1582 1582 leave
1583 1583 ret
1584 1584 SET_SIZE(copyin)
1585 1585
1586 1586 #elif defined(__i386)
1587 1587
1588 1588 #define ARG_UADDR 4
1589 1589 #define ARG_KADDR 8
1590 1590
1591 1591 ENTRY(copyin)
1592 1592 movl kernelbase, %ecx
1593 1593 #ifdef DEBUG
1594 1594 cmpl %ecx, ARG_KADDR(%esp)
1595 1595 jnb 1f
1596 1596 pushl %ebp
1597 1597 movl %esp, %ebp
1598 1598 pushl $.copyin_panic_msg
1599 1599 call panic
1600 1600 1:
1601 1601 #endif
1602 1602 lea _copyin_err, %eax
1603 1603
1604 1604 movl %gs:CPU_THREAD, %edx
1605 1605 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1606 1606 jb do_copy_fault
1607 1607 jmp 3f
1608 1608
1609 1609 _copyin_err:
1610 1610 popl %ecx
1611 1611 popl %edi
1612 1612 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1613 1613 popl %esi
1614 1614 popl %ebp
1615 1615 3:
1616 1616 movl T_COPYOPS(%edx), %eax
1617 1617 cmpl $0, %eax
1618 1618 jz 2f
1619 1619 jmp *CP_COPYIN(%eax)
1620 1620
1621 1621 2: movl $-1, %eax
1622 1622 ret
1623 1623 SET_SIZE(copyin)
1624 1624
1625 1625 #undef ARG_UADDR
1626 1626 #undef ARG_KADDR
1627 1627
1628 1628 #endif /* __i386 */
1629 1629 #endif /* __lint */
1630 1630
1631 1631 #if defined(__lint)
1632 1632
1633 1633 /* ARGSUSED */
1634 1634 int
1635 1635 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached)
1636 1636 { return (0); }
1637 1637
1638 1638 #else /* __lint */
1639 1639
1640 1640 #if defined(__amd64)
1641 1641
1642 1642 ENTRY(xcopyin_nta)
1643 1643 pushq %rbp
1644 1644 movq %rsp, %rbp
1645 1645 subq $24, %rsp
1646 1646
1647 1647 /*
1648 1648 * save args in case we trap and need to rerun as a copyop
1649 1649 * %rcx is consumed in this routine so we don't need to save
1650 1650 * it.
1651 1651 */
1652 1652 movq %rdi, (%rsp)
1653 1653 movq %rsi, 0x8(%rsp)
1654 1654 movq %rdx, 0x10(%rsp)
1655 1655
1656 1656 movq kernelbase(%rip), %rax
1657 1657 #ifdef DEBUG
1658 1658 cmpq %rax, %rsi /* %rsi = kaddr */
1659 1659 jnb 1f
1660 1660 leaq .xcopyin_panic_msg(%rip), %rdi
1661 1661 xorl %eax, %eax
1662 1662 call panic
1663 1663 1:
1664 1664 #endif
1665 1665 movq %gs:CPU_THREAD, %r9
1666 1666 cmpq %rax, %rdi /* test uaddr < kernelbase */
1667 1667 jae 4f
1668 1668 cmpq $0, %rcx /* No non-temporal access? */
1669 1669 /*
1670 1670 * pass lofault value as 4th argument to do_copy_fault
1671 1671 */
1672 1672 leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */
1673 1673 jnz 6f /* use regular access */
1674 1674 /*
1675 1675 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1676 1676 */
1677 1677 cmpq $XCOPY_MIN_SIZE, %rdx
1678 1678 jae 5f
1679 1679 6:
1680 1680 SMAP_DISABLE_INSTR(1)
1681 1681 jmp do_copy_fault
1682 1682
1683 1683 /*
1684 1684 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1685 1685 * count is COUNT_ALIGN_SIZE aligned.
1686 1686 */
1687 1687 5:
1688 1688 movq %rdi, %r10
1689 1689 orq %rsi, %r10
1690 1690 andq $NTA_ALIGN_MASK, %r10
1691 1691 orq %rdx, %r10
1692 1692 andq $COUNT_ALIGN_MASK, %r10
1693 1693 jnz 6b
1694 1694 leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */
1695 1695 SMAP_DISABLE_INSTR(2)
1696 1696 jmp do_copy_fault_nta /* use non-temporal access */
1697 1697
1698 1698 4:
1699 1699 movl $EFAULT, %eax
1700 1700 jmp 3f
1701 1701
1702 1702 /*
1703 1703 * A fault during do_copy_fault or do_copy_fault_nta is
1704 1704 * indicated through an errno value in %rax and we iret from the
1705 1705 * trap handler to here.
1706 1706 */
1707 1707 _xcopyin_err:
1708 1708 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1709 1709 _xcopyin_nta_err:
1710 1710 SMAP_ENABLE_INSTR(3)
1711 1711 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1712 1712 3:
1713 1713 movq T_COPYOPS(%r9), %r8
1714 1714 cmpq $0, %r8
1715 1715 jz 2f
1716 1716
1717 1717 /*
1718 1718 * reload args for the copyop
1719 1719 */
1720 1720 movq (%rsp), %rdi
1721 1721 movq 0x8(%rsp), %rsi
1722 1722 movq 0x10(%rsp), %rdx
1723 1723 leave
1724 1724 jmp *CP_XCOPYIN(%r8)
1725 1725
1726 1726 2: leave
1727 1727 ret
1728 1728 SET_SIZE(xcopyin_nta)
1729 1729
1730 1730 #elif defined(__i386)
1731 1731
1732 1732 #define ARG_UADDR 4
1733 1733 #define ARG_KADDR 8
1734 1734 #define ARG_COUNT 12
1735 1735 #define ARG_CACHED 16
1736 1736
1737 1737 .globl use_sse_copy
1738 1738
1739 1739 ENTRY(xcopyin_nta)
1740 1740 movl kernelbase, %ecx
1741 1741 lea _xcopyin_err, %eax
1742 1742 movl %gs:CPU_THREAD, %edx
1743 1743 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1744 1744 jae 4f
1745 1745
1746 1746 cmpl $0, use_sse_copy /* no sse support */
1747 1747 jz do_copy_fault
1748 1748
1749 1749 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */
1750 1750 jnz do_copy_fault
1751 1751
1752 1752 /*
1753 1753 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1754 1754 */
1755 1755 cmpl $XCOPY_MIN_SIZE, ARG_COUNT(%esp)
1756 1756 jb do_copy_fault
1757 1757
1758 1758 /*
1759 1759 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1760 1760 * count is COUNT_ALIGN_SIZE aligned.
1761 1761 */
1762 1762 movl ARG_UADDR(%esp), %ecx
1763 1763 orl ARG_KADDR(%esp), %ecx
1764 1764 andl $NTA_ALIGN_MASK, %ecx
1765 1765 orl ARG_COUNT(%esp), %ecx
1766 1766 andl $COUNT_ALIGN_MASK, %ecx
1767 1767 jnz do_copy_fault
1768 1768
1769 1769 jmp do_copy_fault_nta /* use regular access */
1770 1770
1771 1771 4:
1772 1772 movl $EFAULT, %eax
1773 1773 jmp 3f
1774 1774
1775 1775 /*
1776 1776 * A fault during do_copy_fault or do_copy_fault_nta is
1777 1777 * indicated through an errno value in %eax and we iret from the
1778 1778 * trap handler to here.
1779 1779 */
1780 1780 _xcopyin_err:
1781 1781 popl %ecx
1782 1782 popl %edi
1783 1783 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1784 1784 popl %esi
1785 1785 popl %ebp
1786 1786 3:
1787 1787 cmpl $0, T_COPYOPS(%edx)
1788 1788 jz 2f
1789 1789 movl T_COPYOPS(%edx), %eax
1790 1790 jmp *CP_XCOPYIN(%eax)
1791 1791
1792 1792 2: rep; ret /* use 2 byte return instruction when branch target */
1793 1793 /* AMD Software Optimization Guide - Section 6.2 */
1794 1794 SET_SIZE(xcopyin_nta)
1795 1795
1796 1796 #undef ARG_UADDR
1797 1797 #undef ARG_KADDR
1798 1798 #undef ARG_COUNT
1799 1799 #undef ARG_CACHED
1800 1800
1801 1801 #endif /* __i386 */
1802 1802 #endif /* __lint */
1803 1803
1804 1804 /*
1805 1805 * Copy kernel data to user space.
1806 1806 */
1807 1807
1808 1808 #if defined(__lint)
1809 1809
1810 1810 /* ARGSUSED */
1811 1811 int
1812 1812 copyout(const void *kaddr, void *uaddr, size_t count)
1813 1813 { return (0); }
1814 1814
1815 1815 #else /* __lint */
1816 1816
1817 1817 #if defined(__amd64)
1818 1818
1819 1819 ENTRY(copyout)
1820 1820 pushq %rbp
1821 1821 movq %rsp, %rbp
1822 1822 subq $24, %rsp
1823 1823
1824 1824 /*
1825 1825 * save args in case we trap and need to rerun as a copyop
1826 1826 */
1827 1827 movq %rdi, (%rsp)
1828 1828 movq %rsi, 0x8(%rsp)
1829 1829 movq %rdx, 0x10(%rsp)
1830 1830
1831 1831 movq kernelbase(%rip), %rax
1832 1832 #ifdef DEBUG
1833 1833 cmpq %rax, %rdi /* %rdi = kaddr */
1834 1834 jnb 1f
1835 1835 leaq .copyout_panic_msg(%rip), %rdi
1836 1836 xorl %eax, %eax
1837 1837 call panic
1838 1838 1:
1839 1839 #endif
1840 1840 /*
1841 1841 * pass lofault value as 4th argument to do_copy_fault
1842 1842 */
1843 1843 leaq _copyout_err(%rip), %rcx
1844 1844
1845 1845 movq %gs:CPU_THREAD, %r9
1846 1846 cmpq %rax, %rsi /* test uaddr < kernelbase */
1847 1847 jae 3f /* take copyop if uaddr > kernelbase */
1848 1848 SMAP_DISABLE_INSTR(3)
1849 1849 jmp do_copy_fault /* Calls leave for us */
1850 1850
1851 1851 _copyout_err:
1852 1852 SMAP_ENABLE_INSTR(4)
1853 1853 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1854 1854 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1855 1855 3:
1856 1856 movq T_COPYOPS(%r9), %rax
1857 1857 cmpq $0, %rax
1858 1858 jz 2f
1859 1859
1860 1860 /*
1861 1861 * reload args for the copyop
1862 1862 */
1863 1863 movq (%rsp), %rdi
1864 1864 movq 0x8(%rsp), %rsi
1865 1865 movq 0x10(%rsp), %rdx
1866 1866 leave
1867 1867 jmp *CP_COPYOUT(%rax)
1868 1868
1869 1869 2: movl $-1, %eax
1870 1870 leave
1871 1871 ret
1872 1872 SET_SIZE(copyout)
1873 1873
1874 1874 #elif defined(__i386)
1875 1875
1876 1876 #define ARG_KADDR 4
1877 1877 #define ARG_UADDR 8
1878 1878
1879 1879 ENTRY(copyout)
1880 1880 movl kernelbase, %ecx
1881 1881 #ifdef DEBUG
1882 1882 cmpl %ecx, ARG_KADDR(%esp)
1883 1883 jnb 1f
1884 1884 pushl %ebp
1885 1885 movl %esp, %ebp
1886 1886 pushl $.copyout_panic_msg
1887 1887 call panic
1888 1888 1:
1889 1889 #endif
1890 1890 lea _copyout_err, %eax
1891 1891 movl %gs:CPU_THREAD, %edx
1892 1892 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1893 1893 jb do_copy_fault
1894 1894 jmp 3f
1895 1895
1896 1896 _copyout_err:
1897 1897 popl %ecx
1898 1898 popl %edi
1899 1899 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1900 1900 popl %esi
1901 1901 popl %ebp
1902 1902 3:
1903 1903 movl T_COPYOPS(%edx), %eax
1904 1904 cmpl $0, %eax
1905 1905 jz 2f
1906 1906 jmp *CP_COPYOUT(%eax)
1907 1907
1908 1908 2: movl $-1, %eax
1909 1909 ret
1910 1910 SET_SIZE(copyout)
1911 1911
1912 1912 #undef ARG_UADDR
1913 1913 #undef ARG_KADDR
1914 1914
1915 1915 #endif /* __i386 */
1916 1916 #endif /* __lint */
1917 1917
1918 1918 #if defined(__lint)
1919 1919
1920 1920 /* ARGSUSED */
1921 1921 int
1922 1922 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached)
1923 1923 { return (0); }
1924 1924
1925 1925 #else /* __lint */
1926 1926
1927 1927 #if defined(__amd64)
1928 1928
1929 1929 ENTRY(xcopyout_nta)
1930 1930 pushq %rbp
1931 1931 movq %rsp, %rbp
1932 1932 subq $24, %rsp
1933 1933
1934 1934 /*
1935 1935 * save args in case we trap and need to rerun as a copyop
1936 1936 */
1937 1937 movq %rdi, (%rsp)
1938 1938 movq %rsi, 0x8(%rsp)
1939 1939 movq %rdx, 0x10(%rsp)
1940 1940
1941 1941 movq kernelbase(%rip), %rax
1942 1942 #ifdef DEBUG
1943 1943 cmpq %rax, %rdi /* %rdi = kaddr */
1944 1944 jnb 1f
1945 1945 leaq .xcopyout_panic_msg(%rip), %rdi
1946 1946 xorl %eax, %eax
1947 1947 call panic
1948 1948 1:
1949 1949 #endif
1950 1950 movq %gs:CPU_THREAD, %r9
1951 1951 cmpq %rax, %rsi /* test uaddr < kernelbase */
1952 1952 jae 4f
1953 1953
1954 1954 cmpq $0, %rcx /* No non-temporal access? */
1955 1955 /*
1956 1956 * pass lofault value as 4th argument to do_copy_fault
1957 1957 */
1958 1958 leaq _xcopyout_err(%rip), %rcx
1959 1959 jnz 6f
1960 1960 /*
1961 1961 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1962 1962 */
1963 1963 cmpq $XCOPY_MIN_SIZE, %rdx
1964 1964 jae 5f
1965 1965 6:
1966 1966 SMAP_DISABLE_INSTR(4)
1967 1967 jmp do_copy_fault
1968 1968
1969 1969 /*
1970 1970 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1971 1971 * count is COUNT_ALIGN_SIZE aligned.
1972 1972 */
1973 1973 5:
1974 1974 movq %rdi, %r10
1975 1975 orq %rsi, %r10
1976 1976 andq $NTA_ALIGN_MASK, %r10
1977 1977 orq %rdx, %r10
1978 1978 andq $COUNT_ALIGN_MASK, %r10
1979 1979 jnz 6b
1980 1980 leaq _xcopyout_nta_err(%rip), %rcx
1981 1981 SMAP_DISABLE_INSTR(5)
1982 1982 call do_copy_fault_nta
1983 1983 SMAP_ENABLE_INSTR(5)
1984 1984 ret
1985 1985
1986 1986 4:
1987 1987 movl $EFAULT, %eax
1988 1988 jmp 3f
1989 1989
1990 1990 /*
1991 1991 * A fault during do_copy_fault or do_copy_fault_nta is
1992 1992 * indicated through an errno value in %rax and we iret from the
1993 1993 * trap handler to here.
1994 1994 */
1995 1995 _xcopyout_err:
1996 1996 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1997 1997 _xcopyout_nta_err:
1998 1998 SMAP_ENABLE_INSTR(6)
1999 1999 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2000 2000 3:
2001 2001 movq T_COPYOPS(%r9), %r8
2002 2002 cmpq $0, %r8
2003 2003 jz 2f
2004 2004
2005 2005 /*
2006 2006 * reload args for the copyop
2007 2007 */
2008 2008 movq (%rsp), %rdi
2009 2009 movq 0x8(%rsp), %rsi
2010 2010 movq 0x10(%rsp), %rdx
2011 2011 leave
2012 2012 jmp *CP_XCOPYOUT(%r8)
2013 2013
2014 2014 2: leave
2015 2015 ret
2016 2016 SET_SIZE(xcopyout_nta)
2017 2017
2018 2018 #elif defined(__i386)
2019 2019
2020 2020 #define ARG_KADDR 4
2021 2021 #define ARG_UADDR 8
2022 2022 #define ARG_COUNT 12
2023 2023 #define ARG_CACHED 16
2024 2024
2025 2025 ENTRY(xcopyout_nta)
2026 2026 movl kernelbase, %ecx
2027 2027 lea _xcopyout_err, %eax
2028 2028 movl %gs:CPU_THREAD, %edx
2029 2029 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2030 2030 jae 4f
2031 2031
2032 2032 cmpl $0, use_sse_copy /* no sse support */
2033 2033 jz do_copy_fault
2034 2034
2035 2035 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */
2036 2036 jnz do_copy_fault
2037 2037
2038 2038 /*
2039 2039 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
2040 2040 */
2041 2041 cmpl $XCOPY_MIN_SIZE, %edx
2042 2042 jb do_copy_fault
2043 2043
2044 2044 /*
2045 2045 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2046 2046 * count is COUNT_ALIGN_SIZE aligned.
2047 2047 */
2048 2048 movl ARG_UADDR(%esp), %ecx
2049 2049 orl ARG_KADDR(%esp), %ecx
2050 2050 andl $NTA_ALIGN_MASK, %ecx
2051 2051 orl ARG_COUNT(%esp), %ecx
2052 2052 andl $COUNT_ALIGN_MASK, %ecx
2053 2053 jnz do_copy_fault
2054 2054 jmp do_copy_fault_nta
2055 2055
2056 2056 4:
2057 2057 movl $EFAULT, %eax
2058 2058 jmp 3f
2059 2059
2060 2060 /*
2061 2061 * A fault during do_copy_fault or do_copy_fault_nta is
2062 2062 * indicated through an errno value in %eax and we iret from the
2063 2063 * trap handler to here.
2064 2064 */
2065 2065 _xcopyout_err:
2066 2066 / restore the original lofault
2067 2067 popl %ecx
2068 2068 popl %edi
2069 2069 movl %ecx, T_LOFAULT(%edx) / original lofault
2070 2070 popl %esi
2071 2071 popl %ebp
2072 2072 3:
2073 2073 cmpl $0, T_COPYOPS(%edx)
2074 2074 jz 2f
2075 2075 movl T_COPYOPS(%edx), %eax
2076 2076 jmp *CP_XCOPYOUT(%eax)
2077 2077
2078 2078 2: rep; ret /* use 2 byte return instruction when branch target */
2079 2079 /* AMD Software Optimization Guide - Section 6.2 */
2080 2080 SET_SIZE(xcopyout_nta)
2081 2081
2082 2082 #undef ARG_UADDR
2083 2083 #undef ARG_KADDR
2084 2084 #undef ARG_COUNT
2085 2085 #undef ARG_CACHED
2086 2086
2087 2087 #endif /* __i386 */
2088 2088 #endif /* __lint */
2089 2089
2090 2090 /*
2091 2091 * Copy a null terminated string from one point to another in
2092 2092 * the kernel address space.
2093 2093 */
2094 2094
2095 2095 #if defined(__lint)
2096 2096
2097 2097 /* ARGSUSED */
2098 2098 int
2099 2099 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied)
2100 2100 { return (0); }
2101 2101
2102 2102 #else /* __lint */
2103 2103
2104 2104 #if defined(__amd64)
2105 2105
2106 2106 ENTRY(copystr)
2107 2107 pushq %rbp
2108 2108 movq %rsp, %rbp
2109 2109 #ifdef DEBUG
2110 2110 movq kernelbase(%rip), %rax
2111 2111 cmpq %rax, %rdi /* %rdi = from */
2112 2112 jb 0f
2113 2113 cmpq %rax, %rsi /* %rsi = to */
2114 2114 jnb 1f
2115 2115 0: leaq .copystr_panic_msg(%rip), %rdi
2116 2116 xorl %eax, %eax
2117 2117 call panic
2118 2118 1:
2119 2119 #endif
2120 2120 movq %gs:CPU_THREAD, %r9
2121 2121 movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */
2122 2122 /* 5th argument to do_copystr */
2123 2123 xorl %r10d,%r10d /* pass smap restore need in %r10d */
2124 2124 /* as a non-ABI 6th arg */
2125 2125 do_copystr:
2126 2126 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
2127 2127 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
2128 2128 movq %r8, T_LOFAULT(%r9) /* new lofault */
2129 2129
2130 2130 movq %rdx, %r8 /* save maxlength */
2131 2131
2132 2132 cmpq $0, %rdx /* %rdx = maxlength */
2133 2133 je copystr_enametoolong /* maxlength == 0 */
2134 2134
2135 2135 copystr_loop:
2136 2136 decq %r8
2137 2137 movb (%rdi), %al
2138 2138 incq %rdi
2139 2139 movb %al, (%rsi)
2140 2140 incq %rsi
2141 2141 cmpb $0, %al
2142 2142 je copystr_null /* null char */
2143 2143 cmpq $0, %r8
2144 2144 jne copystr_loop
2145 2145
2146 2146 copystr_enametoolong:
2147 2147 movl $ENAMETOOLONG, %eax
2148 2148 jmp copystr_out
2149 2149
2150 2150 copystr_null:
2151 2151 xorl %eax, %eax /* no error */
2152 2152
2153 2153 copystr_out:
2154 2154 cmpq $0, %rcx /* want length? */
2155 2155 je copystr_smap /* no */
2156 2156 subq %r8, %rdx /* compute length and store it */
2157 2157 movq %rdx, (%rcx)
2158 2158
2159 2159 copystr_smap:
2160 2160 cmpl $0, %r10d
2161 2161 jz copystr_done
2162 2162 SMAP_ENABLE_INSTR(7)
2163 2163
2164 2164 copystr_done:
2165 2165 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2166 2166 leave
2167 2167 ret
2168 2168 SET_SIZE(copystr)
2169 2169
2170 2170 #elif defined(__i386)
2171 2171
2172 2172 #define ARG_FROM 8
2173 2173 #define ARG_TO 12
2174 2174 #define ARG_MAXLEN 16
2175 2175 #define ARG_LENCOPIED 20
2176 2176
2177 2177 ENTRY(copystr)
2178 2178 #ifdef DEBUG
2179 2179 pushl %ebp
2180 2180 movl %esp, %ebp
2181 2181 movl kernelbase, %eax
2182 2182 cmpl %eax, ARG_FROM(%esp)
2183 2183 jb 0f
2184 2184 cmpl %eax, ARG_TO(%esp)
2185 2185 jnb 1f
2186 2186 0: pushl $.copystr_panic_msg
2187 2187 call panic
2188 2188 1: popl %ebp
2189 2189 #endif
2190 2190 /* get the current lofault address */
2191 2191 movl %gs:CPU_THREAD, %eax
2192 2192 movl T_LOFAULT(%eax), %eax
2193 2193 do_copystr:
2194 2194 pushl %ebp /* setup stack frame */
2195 2195 movl %esp, %ebp
2196 2196 pushl %ebx /* save registers */
2197 2197 pushl %edi
2198 2198
2199 2199 movl %gs:CPU_THREAD, %ebx
2200 2200 movl T_LOFAULT(%ebx), %edi
2201 2201 pushl %edi /* save the current lofault */
2202 2202 movl %eax, T_LOFAULT(%ebx) /* new lofault */
2203 2203
2204 2204 movl ARG_MAXLEN(%ebp), %ecx
2205 2205 cmpl $0, %ecx
2206 2206 je copystr_enametoolong /* maxlength == 0 */
2207 2207
2208 2208 movl ARG_FROM(%ebp), %ebx /* source address */
2209 2209 movl ARG_TO(%ebp), %edx /* destination address */
2210 2210
2211 2211 copystr_loop:
2212 2212 decl %ecx
2213 2213 movb (%ebx), %al
2214 2214 incl %ebx
2215 2215 movb %al, (%edx)
2216 2216 incl %edx
2217 2217 cmpb $0, %al
2218 2218 je copystr_null /* null char */
2219 2219 cmpl $0, %ecx
2220 2220 jne copystr_loop
2221 2221
2222 2222 copystr_enametoolong:
2223 2223 movl $ENAMETOOLONG, %eax
2224 2224 jmp copystr_out
2225 2225
2226 2226 copystr_null:
2227 2227 xorl %eax, %eax /* no error */
2228 2228
2229 2229 copystr_out:
2230 2230 cmpl $0, ARG_LENCOPIED(%ebp) /* want length? */
2231 2231 je copystr_done /* no */
2232 2232 movl ARG_MAXLEN(%ebp), %edx
2233 2233 subl %ecx, %edx /* compute length and store it */
2234 2234 movl ARG_LENCOPIED(%ebp), %ecx
2235 2235 movl %edx, (%ecx)
2236 2236
2237 2237 copystr_done:
2238 2238 popl %edi
2239 2239 movl %gs:CPU_THREAD, %ebx
2240 2240 movl %edi, T_LOFAULT(%ebx) /* restore the original lofault */
2241 2241
2242 2242 popl %edi
2243 2243 popl %ebx
2244 2244 popl %ebp
2245 2245 ret
2246 2246 SET_SIZE(copystr)
2247 2247
2248 2248 #undef ARG_FROM
2249 2249 #undef ARG_TO
2250 2250 #undef ARG_MAXLEN
2251 2251 #undef ARG_LENCOPIED
2252 2252
2253 2253 #endif /* __i386 */
2254 2254 #endif /* __lint */
2255 2255
2256 2256 /*
2257 2257 * Copy a null terminated string from the user address space into
2258 2258 * the kernel address space.
2259 2259 */
2260 2260
2261 2261 #if defined(__lint)
2262 2262
2263 2263 /* ARGSUSED */
2264 2264 int
2265 2265 copyinstr(const char *uaddr, char *kaddr, size_t maxlength,
2266 2266 size_t *lencopied)
2267 2267 { return (0); }
2268 2268
2269 2269 #else /* __lint */
2270 2270
2271 2271 #if defined(__amd64)
2272 2272
2273 2273 ENTRY(copyinstr)
2274 2274 pushq %rbp
2275 2275 movq %rsp, %rbp
2276 2276 subq $32, %rsp
2277 2277
2278 2278 /*
2279 2279 * save args in case we trap and need to rerun as a copyop
2280 2280 */
2281 2281 movq %rdi, (%rsp)
2282 2282 movq %rsi, 0x8(%rsp)
2283 2283 movq %rdx, 0x10(%rsp)
2284 2284 movq %rcx, 0x18(%rsp)
2285 2285
2286 2286 movq kernelbase(%rip), %rax
2287 2287 #ifdef DEBUG
2288 2288 cmpq %rax, %rsi /* %rsi = kaddr */
2289 2289 jnb 1f
2290 2290 leaq .copyinstr_panic_msg(%rip), %rdi
2291 2291 xorl %eax, %eax
2292 2292 call panic
2293 2293 1:
2294 2294 #endif
2295 2295 /*
2296 2296 * pass lofault value as 5th argument to do_copystr
2297 2297 * do_copystr expects whether or not we need smap in %r10d
2298 2298 */
2299 2299 leaq _copyinstr_error(%rip), %r8
2300 2300 movl $1, %r10d
2301 2301
2302 2302 cmpq %rax, %rdi /* test uaddr < kernelbase */
2303 2303 jae 4f
2304 2304 SMAP_DISABLE_INSTR(6)
2305 2305 jmp do_copystr
2306 2306 4:
2307 2307 movq %gs:CPU_THREAD, %r9
2308 2308 jmp 3f
2309 2309
2310 2310 _copyinstr_error:
2311 2311 SMAP_ENABLE_INSTR(8)
2312 2312 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2313 2313 3:
2314 2314 movq T_COPYOPS(%r9), %rax
2315 2315 cmpq $0, %rax
2316 2316 jz 2f
2317 2317
2318 2318 /*
2319 2319 * reload args for the copyop
2320 2320 */
2321 2321 movq (%rsp), %rdi
2322 2322 movq 0x8(%rsp), %rsi
2323 2323 movq 0x10(%rsp), %rdx
2324 2324 movq 0x18(%rsp), %rcx
2325 2325 leave
2326 2326 jmp *CP_COPYINSTR(%rax)
2327 2327
2328 2328 2: movl $EFAULT, %eax /* return EFAULT */
2329 2329 leave
2330 2330 ret
2331 2331 SET_SIZE(copyinstr)
2332 2332
2333 2333 #elif defined(__i386)
2334 2334
2335 2335 #define ARG_UADDR 4
2336 2336 #define ARG_KADDR 8
2337 2337
2338 2338 ENTRY(copyinstr)
2339 2339 movl kernelbase, %ecx
2340 2340 #ifdef DEBUG
2341 2341 cmpl %ecx, ARG_KADDR(%esp)
2342 2342 jnb 1f
2343 2343 pushl %ebp
2344 2344 movl %esp, %ebp
2345 2345 pushl $.copyinstr_panic_msg
2346 2346 call panic
2347 2347 1:
2348 2348 #endif
2349 2349 lea _copyinstr_error, %eax
2350 2350 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2351 2351 jb do_copystr
2352 2352 movl %gs:CPU_THREAD, %edx
2353 2353 jmp 3f
2354 2354
2355 2355 _copyinstr_error:
2356 2356 popl %edi
2357 2357 movl %gs:CPU_THREAD, %edx
2358 2358 movl %edi, T_LOFAULT(%edx) /* original lofault */
2359 2359
2360 2360 popl %edi
2361 2361 popl %ebx
2362 2362 popl %ebp
2363 2363 3:
2364 2364 movl T_COPYOPS(%edx), %eax
2365 2365 cmpl $0, %eax
2366 2366 jz 2f
2367 2367 jmp *CP_COPYINSTR(%eax)
2368 2368
2369 2369 2: movl $EFAULT, %eax /* return EFAULT */
2370 2370 ret
2371 2371 SET_SIZE(copyinstr)
2372 2372
2373 2373 #undef ARG_UADDR
2374 2374 #undef ARG_KADDR
2375 2375
2376 2376 #endif /* __i386 */
2377 2377 #endif /* __lint */
2378 2378
2379 2379 /*
2380 2380 * Copy a null terminated string from the kernel
2381 2381 * address space to the user address space.
2382 2382 */
2383 2383
2384 2384 #if defined(__lint)
2385 2385
2386 2386 /* ARGSUSED */
2387 2387 int
2388 2388 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength,
2389 2389 size_t *lencopied)
2390 2390 { return (0); }
2391 2391
2392 2392 #else /* __lint */
2393 2393
2394 2394 #if defined(__amd64)
2395 2395
2396 2396 ENTRY(copyoutstr)
2397 2397 pushq %rbp
2398 2398 movq %rsp, %rbp
2399 2399 subq $32, %rsp
2400 2400
2401 2401 /*
2402 2402 * save args in case we trap and need to rerun as a copyop
2403 2403 */
2404 2404 movq %rdi, (%rsp)
2405 2405 movq %rsi, 0x8(%rsp)
2406 2406 movq %rdx, 0x10(%rsp)
2407 2407 movq %rcx, 0x18(%rsp)
2408 2408
2409 2409 movq kernelbase(%rip), %rax
2410 2410 #ifdef DEBUG
2411 2411 cmpq %rax, %rdi /* %rdi = kaddr */
2412 2412 jnb 1f
2413 2413 leaq .copyoutstr_panic_msg(%rip), %rdi
2414 2414 jmp call_panic /* setup stack and call panic */
2415 2415 1:
2416 2416 #endif
2417 2417 /*
2418 2418 * pass lofault value as 5th argument to do_copystr
2419 2419 * pass one as 6th argument to do_copystr in %r10d
2420 2420 */
2421 2421 leaq _copyoutstr_error(%rip), %r8
2422 2422 movl $1, %r10d
2423 2423
2424 2424 cmpq %rax, %rsi /* test uaddr < kernelbase */
2425 2425 jae 4f
2426 2426 SMAP_DISABLE_INSTR(7)
2427 2427 jmp do_copystr
2428 2428 4:
2429 2429 movq %gs:CPU_THREAD, %r9
2430 2430 jmp 3f
2431 2431
2432 2432 _copyoutstr_error:
2433 2433 SMAP_ENABLE_INSTR(9)
2434 2434 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2435 2435 3:
2436 2436 movq T_COPYOPS(%r9), %rax
2437 2437 cmpq $0, %rax
2438 2438 jz 2f
2439 2439
2440 2440 /*
2441 2441 * reload args for the copyop
2442 2442 */
2443 2443 movq (%rsp), %rdi
2444 2444 movq 0x8(%rsp), %rsi
2445 2445 movq 0x10(%rsp), %rdx
2446 2446 movq 0x18(%rsp), %rcx
2447 2447 leave
2448 2448 jmp *CP_COPYOUTSTR(%rax)
2449 2449
2450 2450 2: movl $EFAULT, %eax /* return EFAULT */
2451 2451 leave
2452 2452 ret
2453 2453 SET_SIZE(copyoutstr)
2454 2454
2455 2455 #elif defined(__i386)
2456 2456
2457 2457 #define ARG_KADDR 4
2458 2458 #define ARG_UADDR 8
2459 2459
2460 2460 ENTRY(copyoutstr)
2461 2461 movl kernelbase, %ecx
2462 2462 #ifdef DEBUG
2463 2463 cmpl %ecx, ARG_KADDR(%esp)
2464 2464 jnb 1f
2465 2465 pushl %ebp
2466 2466 movl %esp, %ebp
2467 2467 pushl $.copyoutstr_panic_msg
2468 2468 call panic
2469 2469 1:
2470 2470 #endif
2471 2471 lea _copyoutstr_error, %eax
2472 2472 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2473 2473 jb do_copystr
2474 2474 movl %gs:CPU_THREAD, %edx
2475 2475 jmp 3f
2476 2476
2477 2477 _copyoutstr_error:
2478 2478 popl %edi
2479 2479 movl %gs:CPU_THREAD, %edx
2480 2480 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */
2481 2481
2482 2482 popl %edi
2483 2483 popl %ebx
2484 2484 popl %ebp
2485 2485 3:
2486 2486 movl T_COPYOPS(%edx), %eax
2487 2487 cmpl $0, %eax
2488 2488 jz 2f
2489 2489 jmp *CP_COPYOUTSTR(%eax)
2490 2490
2491 2491 2: movl $EFAULT, %eax /* return EFAULT */
2492 2492 ret
2493 2493 SET_SIZE(copyoutstr)
2494 2494
2495 2495 #undef ARG_KADDR
2496 2496 #undef ARG_UADDR
2497 2497
2498 2498 #endif /* __i386 */
2499 2499 #endif /* __lint */
2500 2500
2501 2501 /*
2502 2502 * Since all of the fuword() variants are so similar, we have a macro to spit
2503 2503 * them out. This allows us to create DTrace-unobservable functions easily.
2504 2504 */
2505 2505
2506 2506 #if defined(__lint)
2507 2507
2508 2508 #if defined(__amd64)
2509 2509
2510 2510 /* ARGSUSED */
2511 2511 int
2512 2512 fuword64(const void *addr, uint64_t *dst)
2513 2513 { return (0); }
2514 2514
2515 2515 #endif
2516 2516
2517 2517 /* ARGSUSED */
2518 2518 int
2519 2519 fuword32(const void *addr, uint32_t *dst)
2520 2520 { return (0); }
2521 2521
2522 2522 /* ARGSUSED */
2523 2523 int
2524 2524 fuword16(const void *addr, uint16_t *dst)
2525 2525 { return (0); }
2526 2526
2527 2527 /* ARGSUSED */
2528 2528 int
2529 2529 fuword8(const void *addr, uint8_t *dst)
2530 2530 { return (0); }
2531 2531
2532 2532 #else /* __lint */
2533 2533
2534 2534 #if defined(__amd64)
2535 2535
2536 2536 /*
2537 2537 * Note that we don't save and reload the arguments here
2538 2538 * because their values are not altered in the copy path.
2539 2539 * Additionally, when successful, the smap_enable jmp will
2540 2540 * actually return us to our original caller.
2541 2541 */
2542 2542
2543 2543 #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2544 2544 ENTRY(NAME) \
2545 2545 movq %gs:CPU_THREAD, %r9; \
2546 2546 cmpq kernelbase(%rip), %rdi; \
2547 2547 jae 1f; \
2548 2548 leaq _flt_/**/NAME, %rdx; \
2549 2549 movq %rdx, T_LOFAULT(%r9); \
2550 2550 SMAP_DISABLE_INSTR(DISNUM) \
2551 2551 INSTR (%rdi), REG; \
2552 2552 movq $0, T_LOFAULT(%r9); \
2553 2553 INSTR REG, (%rsi); \
2554 2554 xorl %eax, %eax; \
2555 2555 SMAP_ENABLE_INSTR(EN1) \
2556 2556 ret; \
2557 2557 _flt_/**/NAME: \
2558 2558 SMAP_ENABLE_INSTR(EN2) \
2559 2559 movq $0, T_LOFAULT(%r9); \
2560 2560 1: \
2561 2561 movq T_COPYOPS(%r9), %rax; \
2562 2562 cmpq $0, %rax; \
2563 2563 jz 2f; \
2564 2564 jmp *COPYOP(%rax); \
2565 2565 2: \
2566 2566 movl $-1, %eax; \
2567 2567 ret; \
2568 2568 SET_SIZE(NAME)
2569 2569
2570 2570 FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2571 2571 FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2572 2572 FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2573 2573 FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2574 2574
2575 2575 #elif defined(__i386)
2576 2576
2577 2577 #define FUWORD(NAME, INSTR, REG, COPYOP) \
2578 2578 ENTRY(NAME) \
2579 2579 movl %gs:CPU_THREAD, %ecx; \
2580 2580 movl kernelbase, %eax; \
2581 2581 cmpl %eax, 4(%esp); \
2582 2582 jae 1f; \
2583 2583 lea _flt_/**/NAME, %edx; \
2584 2584 movl %edx, T_LOFAULT(%ecx); \
2585 2585 movl 4(%esp), %eax; \
2586 2586 movl 8(%esp), %edx; \
2587 2587 INSTR (%eax), REG; \
2588 2588 movl $0, T_LOFAULT(%ecx); \
2589 2589 INSTR REG, (%edx); \
2590 2590 xorl %eax, %eax; \
2591 2591 ret; \
2592 2592 _flt_/**/NAME: \
2593 2593 movl $0, T_LOFAULT(%ecx); \
2594 2594 1: \
2595 2595 movl T_COPYOPS(%ecx), %eax; \
2596 2596 cmpl $0, %eax; \
2597 2597 jz 2f; \
2598 2598 jmp *COPYOP(%eax); \
2599 2599 2: \
2600 2600 movl $-1, %eax; \
2601 2601 ret; \
2602 2602 SET_SIZE(NAME)
2603 2603
2604 2604 FUWORD(fuword32, movl, %eax, CP_FUWORD32)
2605 2605 FUWORD(fuword16, movw, %ax, CP_FUWORD16)
2606 2606 FUWORD(fuword8, movb, %al, CP_FUWORD8)
2607 2607
2608 2608 #endif /* __i386 */
2609 2609
2610 2610 #undef FUWORD
2611 2611
2612 2612 #endif /* __lint */
2613 2613
2614 2614 /*
2615 2615 * Set user word.
2616 2616 */
2617 2617
2618 2618 #if defined(__lint)
2619 2619
2620 2620 #if defined(__amd64)
2621 2621
2622 2622 /* ARGSUSED */
2623 2623 int
2624 2624 suword64(void *addr, uint64_t value)
2625 2625 { return (0); }
2626 2626
2627 2627 #endif
2628 2628
2629 2629 /* ARGSUSED */
2630 2630 int
2631 2631 suword32(void *addr, uint32_t value)
2632 2632 { return (0); }
2633 2633
2634 2634 /* ARGSUSED */
2635 2635 int
2636 2636 suword16(void *addr, uint16_t value)
2637 2637 { return (0); }
2638 2638
2639 2639 /* ARGSUSED */
2640 2640 int
2641 2641 suword8(void *addr, uint8_t value)
2642 2642 { return (0); }
2643 2643
2644 2644 #else /* lint */
2645 2645
2646 2646 #if defined(__amd64)
2647 2647
2648 2648 /*
2649 2649 * Note that we don't save and reload the arguments here
2650 2650 * because their values are not altered in the copy path.
2651 2651 */
2652 2652
2653 2653 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2654 2654 ENTRY(NAME) \
2655 2655 movq %gs:CPU_THREAD, %r9; \
2656 2656 cmpq kernelbase(%rip), %rdi; \
2657 2657 jae 1f; \
2658 2658 leaq _flt_/**/NAME, %rdx; \
2659 2659 SMAP_DISABLE_INSTR(DISNUM) \
2660 2660 movq %rdx, T_LOFAULT(%r9); \
2661 2661 INSTR REG, (%rdi); \
2662 2662 movq $0, T_LOFAULT(%r9); \
2663 2663 xorl %eax, %eax; \
2664 2664 SMAP_ENABLE_INSTR(EN1) \
2665 2665 ret; \
2666 2666 _flt_/**/NAME: \
2667 2667 SMAP_ENABLE_INSTR(EN2) \
2668 2668 movq $0, T_LOFAULT(%r9); \
2669 2669 1: \
2670 2670 movq T_COPYOPS(%r9), %rax; \
2671 2671 cmpq $0, %rax; \
2672 2672 jz 3f; \
2673 2673 jmp *COPYOP(%rax); \
2674 2674 3: \
2675 2675 movl $-1, %eax; \
2676 2676 ret; \
2677 2677 SET_SIZE(NAME)
2678 2678
2679 2679 SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2680 2680 SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2681 2681 SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2682 2682 SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2683 2683
2684 2684 #elif defined(__i386)
2685 2685
2686 2686 #define SUWORD(NAME, INSTR, REG, COPYOP) \
2687 2687 ENTRY(NAME) \
2688 2688 movl %gs:CPU_THREAD, %ecx; \
2689 2689 movl kernelbase, %eax; \
2690 2690 cmpl %eax, 4(%esp); \
2691 2691 jae 1f; \
2692 2692 lea _flt_/**/NAME, %edx; \
2693 2693 movl %edx, T_LOFAULT(%ecx); \
2694 2694 movl 4(%esp), %eax; \
2695 2695 movl 8(%esp), %edx; \
2696 2696 INSTR REG, (%eax); \
2697 2697 movl $0, T_LOFAULT(%ecx); \
2698 2698 xorl %eax, %eax; \
2699 2699 ret; \
2700 2700 _flt_/**/NAME: \
2701 2701 movl $0, T_LOFAULT(%ecx); \
2702 2702 1: \
2703 2703 movl T_COPYOPS(%ecx), %eax; \
2704 2704 cmpl $0, %eax; \
2705 2705 jz 3f; \
2706 2706 movl COPYOP(%eax), %ecx; \
2707 2707 jmp *%ecx; \
2708 2708 3: \
2709 2709 movl $-1, %eax; \
2710 2710 ret; \
2711 2711 SET_SIZE(NAME)
2712 2712
2713 2713 SUWORD(suword32, movl, %edx, CP_SUWORD32)
2714 2714 SUWORD(suword16, movw, %dx, CP_SUWORD16)
2715 2715 SUWORD(suword8, movb, %dl, CP_SUWORD8)
2716 2716
2717 2717 #endif /* __i386 */
2718 2718
2719 2719 #undef SUWORD
2720 2720
2721 2721 #endif /* __lint */
2722 2722
2723 2723 #if defined(__lint)
2724 2724
2725 2725 #if defined(__amd64)
2726 2726
2727 2727 /*ARGSUSED*/
2728 2728 void
2729 2729 fuword64_noerr(const void *addr, uint64_t *dst)
2730 2730 {}
2731 2731
2732 2732 #endif
2733 2733
2734 2734 /*ARGSUSED*/
2735 2735 void
2736 2736 fuword32_noerr(const void *addr, uint32_t *dst)
2737 2737 {}
2738 2738
2739 2739 /*ARGSUSED*/
2740 2740 void
2741 2741 fuword8_noerr(const void *addr, uint8_t *dst)
2742 2742 {}
2743 2743
2744 2744 /*ARGSUSED*/
2745 2745 void
2746 2746 fuword16_noerr(const void *addr, uint16_t *dst)
2747 2747 {}
2748 2748
2749 2749 #else /* __lint */
2750 2750
2751 2751 #if defined(__amd64)
2752 2752
2753 2753 #define FUWORD_NOERR(NAME, INSTR, REG) \
2754 2754 ENTRY(NAME) \
2755 2755 cmpq kernelbase(%rip), %rdi; \
2756 2756 cmovnbq kernelbase(%rip), %rdi; \
2757 2757 INSTR (%rdi), REG; \
2758 2758 INSTR REG, (%rsi); \
2759 2759 ret; \
2760 2760 SET_SIZE(NAME)
2761 2761
2762 2762 FUWORD_NOERR(fuword64_noerr, movq, %rax)
2763 2763 FUWORD_NOERR(fuword32_noerr, movl, %eax)
2764 2764 FUWORD_NOERR(fuword16_noerr, movw, %ax)
2765 2765 FUWORD_NOERR(fuword8_noerr, movb, %al)
2766 2766
2767 2767 #elif defined(__i386)
2768 2768
2769 2769 #define FUWORD_NOERR(NAME, INSTR, REG) \
2770 2770 ENTRY(NAME) \
2771 2771 movl 4(%esp), %eax; \
2772 2772 cmpl kernelbase, %eax; \
2773 2773 jb 1f; \
2774 2774 movl kernelbase, %eax; \
2775 2775 1: movl 8(%esp), %edx; \
2776 2776 INSTR (%eax), REG; \
2777 2777 INSTR REG, (%edx); \
2778 2778 ret; \
2779 2779 SET_SIZE(NAME)
2780 2780
2781 2781 FUWORD_NOERR(fuword32_noerr, movl, %ecx)
2782 2782 FUWORD_NOERR(fuword16_noerr, movw, %cx)
2783 2783 FUWORD_NOERR(fuword8_noerr, movb, %cl)
2784 2784
2785 2785 #endif /* __i386 */
2786 2786
2787 2787 #undef FUWORD_NOERR
2788 2788
2789 2789 #endif /* __lint */
2790 2790
2791 2791 #if defined(__lint)
2792 2792
2793 2793 #if defined(__amd64)
2794 2794
2795 2795 /*ARGSUSED*/
2796 2796 void
2797 2797 suword64_noerr(void *addr, uint64_t value)
2798 2798 {}
2799 2799
2800 2800 #endif
2801 2801
2802 2802 /*ARGSUSED*/
2803 2803 void
2804 2804 suword32_noerr(void *addr, uint32_t value)
2805 2805 {}
2806 2806
2807 2807 /*ARGSUSED*/
2808 2808 void
2809 2809 suword16_noerr(void *addr, uint16_t value)
2810 2810 {}
2811 2811
2812 2812 /*ARGSUSED*/
2813 2813 void
2814 2814 suword8_noerr(void *addr, uint8_t value)
2815 2815 {}
2816 2816
2817 2817 #else /* lint */
2818 2818
2819 2819 #if defined(__amd64)
2820 2820
2821 2821 #define SUWORD_NOERR(NAME, INSTR, REG) \
2822 2822 ENTRY(NAME) \
2823 2823 cmpq kernelbase(%rip), %rdi; \
2824 2824 cmovnbq kernelbase(%rip), %rdi; \
2825 2825 INSTR REG, (%rdi); \
2826 2826 ret; \
2827 2827 SET_SIZE(NAME)
2828 2828
2829 2829 SUWORD_NOERR(suword64_noerr, movq, %rsi)
2830 2830 SUWORD_NOERR(suword32_noerr, movl, %esi)
2831 2831 SUWORD_NOERR(suword16_noerr, movw, %si)
2832 2832 SUWORD_NOERR(suword8_noerr, movb, %sil)
2833 2833
2834 2834 #elif defined(__i386)
2835 2835
2836 2836 #define SUWORD_NOERR(NAME, INSTR, REG) \
2837 2837 ENTRY(NAME) \
2838 2838 movl 4(%esp), %eax; \
2839 2839 cmpl kernelbase, %eax; \
2840 2840 jb 1f; \
2841 2841 movl kernelbase, %eax; \
2842 2842 1: \
2843 2843 movl 8(%esp), %edx; \
2844 2844 INSTR REG, (%eax); \
2845 2845 ret; \
2846 2846 SET_SIZE(NAME)
2847 2847
2848 2848 SUWORD_NOERR(suword32_noerr, movl, %edx)
2849 2849 SUWORD_NOERR(suword16_noerr, movw, %dx)
2850 2850 SUWORD_NOERR(suword8_noerr, movb, %dl)
2851 2851
2852 2852 #endif /* __i386 */
2853 2853
2854 2854 #undef SUWORD_NOERR
2855 2855
2856 2856 #endif /* lint */
2857 2857
2858 2858
2859 2859 #if defined(__lint)
2860 2860
2861 2861 /*ARGSUSED*/
2862 2862 int
2863 2863 subyte(void *addr, uchar_t value)
2864 2864 { return (0); }
2865 2865
2866 2866 /*ARGSUSED*/
2867 2867 void
2868 2868 subyte_noerr(void *addr, uchar_t value)
2869 2869 {}
2870 2870
2871 2871 /*ARGSUSED*/
2872 2872 int
2873 2873 fulword(const void *addr, ulong_t *valuep)
2874 2874 { return (0); }
2875 2875
2876 2876 /*ARGSUSED*/
2877 2877 void
2878 2878 fulword_noerr(const void *addr, ulong_t *valuep)
2879 2879 {}
2880 2880
2881 2881 /*ARGSUSED*/
2882 2882 int
2883 2883 sulword(void *addr, ulong_t valuep)
2884 2884 { return (0); }
2885 2885
2886 2886 /*ARGSUSED*/
2887 2887 void
2888 2888 sulword_noerr(void *addr, ulong_t valuep)
2889 2889 {}
2890 2890
2891 2891 #else
2892 2892
2893 2893 .weak subyte
2894 2894 subyte=suword8
2895 2895 .weak subyte_noerr
2896 2896 subyte_noerr=suword8_noerr
2897 2897
2898 2898 #if defined(__amd64)
2899 2899
2900 2900 .weak fulword
2901 2901 fulword=fuword64
2902 2902 .weak fulword_noerr
2903 2903 fulword_noerr=fuword64_noerr
2904 2904 .weak sulword
2905 2905 sulword=suword64
2906 2906 .weak sulword_noerr
2907 2907 sulword_noerr=suword64_noerr
2908 2908
2909 2909 #elif defined(__i386)
2910 2910
2911 2911 .weak fulword
2912 2912 fulword=fuword32
2913 2913 .weak fulword_noerr
2914 2914 fulword_noerr=fuword32_noerr
2915 2915 .weak sulword
2916 2916 sulword=suword32
2917 2917 .weak sulword_noerr
2918 2918 sulword_noerr=suword32_noerr
2919 2919
2920 2920 #endif /* __i386 */
2921 2921
2922 2922 #endif /* __lint */
2923 2923
2924 2924 #if defined(__lint)
2925 2925
2926 2926 /*
2927 2927 * Copy a block of storage - must not overlap (from + len <= to).
2928 2928 * No fault handler installed (to be called under on_fault())
2929 2929 */
2930 2930
2931 2931 /* ARGSUSED */
2932 2932 void
2933 2933 copyout_noerr(const void *kfrom, void *uto, size_t count)
2934 2934 {}
2935 2935
2936 2936 /* ARGSUSED */
2937 2937 void
2938 2938 copyin_noerr(const void *ufrom, void *kto, size_t count)
2939 2939 {}
2940 2940
2941 2941 /*
2942 2942 * Zero a block of storage in user space
2943 2943 */
2944 2944
2945 2945 /* ARGSUSED */
2946 2946 void
2947 2947 uzero(void *addr, size_t count)
2948 2948 {}
2949 2949
2950 2950 /*
2951 2951 * copy a block of storage in user space
2952 2952 */
2953 2953
2954 2954 /* ARGSUSED */
2955 2955 void
2956 2956 ucopy(const void *ufrom, void *uto, size_t ulength)
2957 2957 {}
2958 2958
2959 2959 /*
2960 2960 * copy a string in user space
2961 2961 */
2962 2962
2963 2963 /* ARGSUSED */
2964 2964 void
2965 2965 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied)
2966 2966 {}
2967 2967
2968 2968 #else /* __lint */
2969 2969
2970 2970 #if defined(__amd64)
2971 2971
2972 2972 ENTRY(copyin_noerr)
2973 2973 movq kernelbase(%rip), %rax
2974 2974 #ifdef DEBUG
2975 2975 cmpq %rax, %rsi /* %rsi = kto */
2976 2976 jae 1f
2977 2977 leaq .cpyin_ne_pmsg(%rip), %rdi
2978 2978 jmp call_panic /* setup stack and call panic */
2979 2979 1:
2980 2980 #endif
2981 2981 cmpq %rax, %rdi /* ufrom < kernelbase */
2982 2982 jb do_copy
2983 2983 movq %rax, %rdi /* force fault at kernelbase */
2984 2984 jmp do_copy
2985 2985 SET_SIZE(copyin_noerr)
2986 2986
2987 2987 ENTRY(copyout_noerr)
2988 2988 movq kernelbase(%rip), %rax
2989 2989 #ifdef DEBUG
2990 2990 cmpq %rax, %rdi /* %rdi = kfrom */
2991 2991 jae 1f
2992 2992 leaq .cpyout_ne_pmsg(%rip), %rdi
2993 2993 jmp call_panic /* setup stack and call panic */
2994 2994 1:
2995 2995 #endif
2996 2996 cmpq %rax, %rsi /* uto < kernelbase */
2997 2997 jb do_copy
2998 2998 movq %rax, %rsi /* force fault at kernelbase */
2999 2999 jmp do_copy
3000 3000 SET_SIZE(copyout_noerr)
3001 3001
3002 3002 ENTRY(uzero)
3003 3003 movq kernelbase(%rip), %rax
3004 3004 cmpq %rax, %rdi
3005 3005 jb do_zero
3006 3006 movq %rax, %rdi /* force fault at kernelbase */
3007 3007 jmp do_zero
3008 3008 SET_SIZE(uzero)
3009 3009
3010 3010 ENTRY(ucopy)
3011 3011 movq kernelbase(%rip), %rax
3012 3012 cmpq %rax, %rdi
3013 3013 cmovaeq %rax, %rdi /* force fault at kernelbase */
3014 3014 cmpq %rax, %rsi
3015 3015 cmovaeq %rax, %rsi /* force fault at kernelbase */
3016 3016 jmp do_copy
3017 3017 SET_SIZE(ucopy)
3018 3018
3019 3019 /*
3020 3020 * Note, the frame pointer is required here becuase do_copystr expects
3021 3021 * to be able to pop it off!
3022 3022 */
3023 3023 ENTRY(ucopystr)
3024 3024 pushq %rbp
3025 3025 movq %rsp, %rbp
3026 3026 movq kernelbase(%rip), %rax
3027 3027 cmpq %rax, %rdi
3028 3028 cmovaeq %rax, %rdi /* force fault at kernelbase */
3029 3029 cmpq %rax, %rsi
3030 3030 cmovaeq %rax, %rsi /* force fault at kernelbase */
3031 3031 /* do_copystr expects lofault address in %r8 */
3032 3032 /* do_copystr expects whether or not we need smap in %r10 */
3033 3033 xorl %r10d, %r10d
3034 3034 movq %gs:CPU_THREAD, %r8
3035 3035 movq T_LOFAULT(%r8), %r8
3036 3036 jmp do_copystr
3037 3037 SET_SIZE(ucopystr)
3038 3038
3039 3039 #elif defined(__i386)
3040 3040
3041 3041 ENTRY(copyin_noerr)
3042 3042 movl kernelbase, %eax
3043 3043 #ifdef DEBUG
3044 3044 cmpl %eax, 8(%esp)
3045 3045 jae 1f
3046 3046 pushl $.cpyin_ne_pmsg
3047 3047 call panic
3048 3048 1:
3049 3049 #endif
3050 3050 cmpl %eax, 4(%esp)
3051 3051 jb do_copy
3052 3052 movl %eax, 4(%esp) /* force fault at kernelbase */
3053 3053 jmp do_copy
3054 3054 SET_SIZE(copyin_noerr)
3055 3055
3056 3056 ENTRY(copyout_noerr)
3057 3057 movl kernelbase, %eax
3058 3058 #ifdef DEBUG
3059 3059 cmpl %eax, 4(%esp)
3060 3060 jae 1f
3061 3061 pushl $.cpyout_ne_pmsg
3062 3062 call panic
3063 3063 1:
3064 3064 #endif
3065 3065 cmpl %eax, 8(%esp)
3066 3066 jb do_copy
3067 3067 movl %eax, 8(%esp) /* force fault at kernelbase */
3068 3068 jmp do_copy
3069 3069 SET_SIZE(copyout_noerr)
3070 3070
3071 3071 ENTRY(uzero)
3072 3072 movl kernelbase, %eax
3073 3073 cmpl %eax, 4(%esp)
3074 3074 jb do_zero
3075 3075 movl %eax, 4(%esp) /* force fault at kernelbase */
3076 3076 jmp do_zero
3077 3077 SET_SIZE(uzero)
3078 3078
3079 3079 ENTRY(ucopy)
3080 3080 movl kernelbase, %eax
3081 3081 cmpl %eax, 4(%esp)
3082 3082 jb 1f
3083 3083 movl %eax, 4(%esp) /* force fault at kernelbase */
3084 3084 1:
3085 3085 cmpl %eax, 8(%esp)
3086 3086 jb do_copy
3087 3087 movl %eax, 8(%esp) /* force fault at kernelbase */
3088 3088 jmp do_copy
3089 3089 SET_SIZE(ucopy)
3090 3090
3091 3091 ENTRY(ucopystr)
3092 3092 movl kernelbase, %eax
3093 3093 cmpl %eax, 4(%esp)
3094 3094 jb 1f
3095 3095 movl %eax, 4(%esp) /* force fault at kernelbase */
3096 3096 1:
3097 3097 cmpl %eax, 8(%esp)
3098 3098 jb 2f
3099 3099 movl %eax, 8(%esp) /* force fault at kernelbase */
3100 3100 2:
3101 3101 /* do_copystr expects the lofault address in %eax */
3102 3102 movl %gs:CPU_THREAD, %eax
3103 3103 movl T_LOFAULT(%eax), %eax
3104 3104 jmp do_copystr
3105 3105 SET_SIZE(ucopystr)
3106 3106
3107 3107 #endif /* __i386 */
3108 3108
3109 3109 #ifdef DEBUG
3110 3110 .data
3111 3111 .kcopy_panic_msg:
3112 3112 .string "kcopy: arguments below kernelbase"
3113 3113 .bcopy_panic_msg:
3114 3114 .string "bcopy: arguments below kernelbase"
3115 3115 .kzero_panic_msg:
3116 3116 .string "kzero: arguments below kernelbase"
3117 3117 .bzero_panic_msg:
3118 3118 .string "bzero: arguments below kernelbase"
3119 3119 .copyin_panic_msg:
3120 3120 .string "copyin: kaddr argument below kernelbase"
3121 3121 .xcopyin_panic_msg:
3122 3122 .string "xcopyin: kaddr argument below kernelbase"
3123 3123 .copyout_panic_msg:
3124 3124 .string "copyout: kaddr argument below kernelbase"
3125 3125 .xcopyout_panic_msg:
3126 3126 .string "xcopyout: kaddr argument below kernelbase"
3127 3127 .copystr_panic_msg:
3128 3128 .string "copystr: arguments in user space"
3129 3129 .copyinstr_panic_msg:
3130 3130 .string "copyinstr: kaddr argument not in kernel address space"
↓ open down ↓ |
3081 lines elided |
↑ open up ↑ |
3131 3131 .copyoutstr_panic_msg:
3132 3132 .string "copyoutstr: kaddr argument not in kernel address space"
3133 3133 .cpyin_ne_pmsg:
3134 3134 .string "copyin_noerr: argument not in kernel address space"
3135 3135 .cpyout_ne_pmsg:
3136 3136 .string "copyout_noerr: argument not in kernel address space"
3137 3137 #endif
3138 3138
3139 3139 #endif /* __lint */
3140 3140
3141 -/*
3142 - * These functions are used for SMAP, supervisor mode access protection. They
3143 - * are hotpatched to become real instructions when the system starts up which is
3144 - * done in mlsetup() as a part of enabling the other CR4 related features.
3145 - *
3146 - * Generally speaking, smap_disable() is a stac instruction and smap_enable is a
3147 - * clac instruction. It's safe to call these any number of times, and in fact,
3148 - * out of paranoia, the kernel will likely call it at several points.
3149 - */
3150 -
3151 -#if defined(__lint)
3152 -
3153 -void
3154 -smap_enable(void)
3155 -{}
3156 -
3157 -void
3158 -smap_disable(void)
3159 -{}
3160 -
3161 -#else
3162 -
3163 -#if defined (__amd64) || defined(__i386)
3164 - ENTRY(smap_disable)
3165 - nop
3166 - nop
3167 - nop
3168 - ret
3169 - SET_SIZE(smap_disable)
3170 -
3171 - ENTRY(smap_enable)
3172 - nop
3173 - nop
3174 - nop
3175 - ret
3176 - SET_SIZE(smap_enable)
3177 -
3178 -#endif /* __amd64 || __i386 */
3179 -
3180 -#endif /* __lint */
3181 -
3182 3141 #ifndef __lint
3183 3142
3184 3143 .data
3185 3144 .align 4
3186 3145 .globl _smap_enable_patch_count
3187 3146 .type _smap_enable_patch_count,@object
3188 3147 .size _smap_enable_patch_count, 4
3189 3148 _smap_enable_patch_count:
3190 3149 .long SMAP_ENABLE_COUNT
3191 3150
3192 3151 .globl _smap_disable_patch_count
3193 3152 .type _smap_disable_patch_count,@object
3194 3153 .size _smap_disable_patch_count, 4
3195 3154 _smap_disable_patch_count:
3196 3155 .long SMAP_DISABLE_COUNT
3197 3156
3198 3157 #endif /* __lint */
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX