Print this page
OS-7125 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/ia32/ml/copy.s
+++ new/usr/src/uts/intel/ia32/ml/copy.s
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 2009, Intel Corporation
28 28 * All rights reserved.
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
29 29 */
30 30
31 31 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
32 32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
33 33 /* All Rights Reserved */
34 34
35 35 /* Copyright (c) 1987, 1988 Microsoft Corporation */
36 36 /* All Rights Reserved */
37 37
38 38 /*
39 - * Copyright 2016 Joyent, Inc.
39 + * Copyright (c) 2018 Joyent, Inc.
40 40 */
41 41
42 42 #include <sys/errno.h>
43 43 #include <sys/asm_linkage.h>
44 44
45 45 #if defined(__lint)
46 46 #include <sys/types.h>
47 47 #include <sys/systm.h>
48 48 #else /* __lint */
49 49 #include "assym.h"
50 50 #endif /* __lint */
51 51
52 52 #define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
53 53 #define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
54 54 /*
55 55 * Non-temopral access (NTA) alignment requirement
56 56 */
57 57 #define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */
58 58 #define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1)
59 59 #define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */
60 60 #define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1)
61 61
62 62 /*
63 63 * With the introduction of Broadwell, Intel has introduced supervisor mode
64 64 * access protection -- SMAP. SMAP forces the kernel to set certain bits to
65 65 * enable access of user pages (AC in rflags, defines as PS_ACHK in
66 66 * <sys/psw.h>). One of the challenges is that the implementation of many of the
67 67 * userland copy routines directly use the kernel ones. For example, copyin and
68 68 * copyout simply go and jump to the do_copy_fault label and traditionally let
69 69 * those deal with the return for them. In fact, changing that is a can of frame
70 70 * pointers.
71 71 *
72 72 * Rules and Constraints:
73 73 *
74 74 * 1. For anything that's not in copy.s, we have it do explicit calls to the
75 75 * smap related code. It usually is in a position where it is able to. This is
76 76 * restricted to the following three places: DTrace, resume() in swtch.s and
77 77 * on_fault/no_fault. If you want to add it somewhere else, we should be
78 78 * thinking twice.
79 79 *
80 80 * 2. We try to toggle this at the smallest window possible. This means that if
81 81 * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
82 82 * other function, we will always leave with SMAP enabled (the kernel cannot
83 83 * access user pages).
84 84 *
85 85 * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
86 86 * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
87 87 * which already takes care of ensuring that SMAP is enabled and disabled. Note
88 88 * this means that when under an on_fault()/no_fault() handler, one must not
89 89 * call the non-*_noeer() routines.
90 90 *
91 91 * 4. The first thing we should do after coming out of an lofault handler is to
92 92 * make sure that we call smap_enable again to ensure that we are safely
93 93 * protected, as more often than not, we will have disabled smap to get there.
94 94 *
95 95 * 5. The SMAP functions, smap_enable and smap_disable may not touch any
96 96 * registers beyond those done by the call and ret. These routines may be called
97 97 * from arbitrary contexts in copy.s where we have slightly more special ABIs in
98 98 * place.
99 99 *
100 100 * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
101 101 * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and
102 102 * smap_disable()). If the number of these is changed, you must update the
103 103 * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
104 104 *
105 105 * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is
106 106 * no known technical reason preventing it from being enabled.
107 107 *
108 108 * 8. Generally this .s file is processed by a K&R style cpp. This means that it
109 109 * really has a lot of feelings about whitespace. In particular, if you have a
110 110 * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
111 111 *
112 112 * 9. The smap_enable and smap_disable functions should not generally be called.
113 113 * They exist such that DTrace and on_trap() may use them, that's it.
114 114 *
115 115 * 10. In general, the kernel has its own value for rflags that gets used. This
116 116 * is maintained in a few different places which vary based on how the thread
117 117 * comes into existence and whether it's a user thread. In general, when the
118 118 * kernel takes a trap, it always will set ourselves to a known set of flags,
119 119 * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
120 120 * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
121 121 * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
122 122 * where that gets masked off.
123 123 */
124 124
125 125 /*
126 126 * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
127 127 * "rep smovq" for large sizes. Performance data shows that many calls to
128 128 * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
129 129 * these small sizes unrolled code is used. For medium sizes loops writing
130 130 * 64-bytes per loop are used. Transition points were determined experimentally.
131 131 */
132 132 #define BZERO_USE_REP (1024)
133 133 #define BCOPY_DFLT_REP (128)
134 134 #define BCOPY_NHM_REP (768)
135 135
136 136 /*
137 137 * Copy a block of storage, returning an error code if `from' or
138 138 * `to' takes a kernel pagefault which cannot be resolved.
139 139 * Returns errno value on pagefault error, 0 if all ok
140 140 */
141 141
142 142 /*
143 143 * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
144 144 * additional call instructions.
145 145 */
146 146 #if defined(__amd64)
147 147 #define SMAP_DISABLE_COUNT 16
148 148 #define SMAP_ENABLE_COUNT 26
149 149 #elif defined(__i386)
150 150 #define SMAP_DISABLE_COUNT 0
151 151 #define SMAP_ENABLE_COUNT 0
152 152 #endif
153 153
154 154 #define SMAP_DISABLE_INSTR(ITER) \
155 155 .globl _smap_disable_patch_/**/ITER; \
156 156 _smap_disable_patch_/**/ITER/**/:; \
157 157 nop; nop; nop;
158 158
159 159 #define SMAP_ENABLE_INSTR(ITER) \
160 160 .globl _smap_enable_patch_/**/ITER; \
161 161 _smap_enable_patch_/**/ITER/**/:; \
162 162 nop; nop; nop;
163 163
164 164 #if defined(__lint)
165 165
166 166 /* ARGSUSED */
167 167 int
168 168 kcopy(const void *from, void *to, size_t count)
169 169 { return (0); }
170 170
171 171 #else /* __lint */
172 172
173 173 .globl kernelbase
174 174 .globl postbootkernelbase
175 175
176 176 #if defined(__amd64)
177 177
178 178 ENTRY(kcopy)
179 179 pushq %rbp
180 180 movq %rsp, %rbp
181 181 #ifdef DEBUG
182 182 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
183 183 jb 0f
184 184 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
185 185 jnb 1f
186 186 0: leaq .kcopy_panic_msg(%rip), %rdi
187 187 xorl %eax, %eax
188 188 call panic
189 189 1:
190 190 #endif
191 191 /*
192 192 * pass lofault value as 4th argument to do_copy_fault
193 193 */
194 194 leaq _kcopy_copyerr(%rip), %rcx
195 195 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
196 196
197 197 do_copy_fault:
198 198 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
199 199 movq %rcx, T_LOFAULT(%r9) /* new lofault */
200 200 call bcopy_altentry
201 201 xorl %eax, %eax /* return 0 (success) */
202 202 SMAP_ENABLE_INSTR(0)
203 203
204 204 /*
205 205 * A fault during do_copy_fault is indicated through an errno value
206 206 * in %rax and we iretq from the trap handler to here.
207 207 */
208 208 _kcopy_copyerr:
209 209 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
210 210 leave
211 211 ret
212 212 SET_SIZE(kcopy)
213 213
214 214 #elif defined(__i386)
215 215
216 216 #define ARG_FROM 8
217 217 #define ARG_TO 12
218 218 #define ARG_COUNT 16
219 219
220 220 ENTRY(kcopy)
221 221 #ifdef DEBUG
222 222 pushl %ebp
223 223 movl %esp, %ebp
224 224 movl postbootkernelbase, %eax
225 225 cmpl %eax, ARG_FROM(%ebp)
226 226 jb 0f
227 227 cmpl %eax, ARG_TO(%ebp)
228 228 jnb 1f
229 229 0: pushl $.kcopy_panic_msg
230 230 call panic
231 231 1: popl %ebp
232 232 #endif
233 233 lea _kcopy_copyerr, %eax /* lofault value */
234 234 movl %gs:CPU_THREAD, %edx
235 235
236 236 do_copy_fault:
237 237 pushl %ebp
238 238 movl %esp, %ebp /* setup stack frame */
239 239 pushl %esi
240 240 pushl %edi /* save registers */
241 241
242 242 movl T_LOFAULT(%edx), %edi
243 243 pushl %edi /* save the current lofault */
244 244 movl %eax, T_LOFAULT(%edx) /* new lofault */
245 245
246 246 movl ARG_COUNT(%ebp), %ecx
247 247 movl ARG_FROM(%ebp), %esi
248 248 movl ARG_TO(%ebp), %edi
249 249 shrl $2, %ecx /* word count */
250 250 rep
251 251 smovl
252 252 movl ARG_COUNT(%ebp), %ecx
253 253 andl $3, %ecx /* bytes left over */
254 254 rep
255 255 smovb
256 256 xorl %eax, %eax
257 257
258 258 /*
259 259 * A fault during do_copy_fault is indicated through an errno value
260 260 * in %eax and we iret from the trap handler to here.
261 261 */
262 262 _kcopy_copyerr:
263 263 popl %ecx
264 264 popl %edi
265 265 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */
266 266 popl %esi
267 267 popl %ebp
268 268 ret
269 269 SET_SIZE(kcopy)
270 270
271 271 #undef ARG_FROM
272 272 #undef ARG_TO
273 273 #undef ARG_COUNT
274 274
275 275 #endif /* __i386 */
276 276 #endif /* __lint */
277 277
278 278 #if defined(__lint)
279 279
280 280 /*
281 281 * Copy a block of storage. Similar to kcopy but uses non-temporal
282 282 * instructions.
283 283 */
284 284
285 285 /* ARGSUSED */
286 286 int
287 287 kcopy_nta(const void *from, void *to, size_t count, int copy_cached)
288 288 { return (0); }
289 289
290 290 #else /* __lint */
291 291
292 292 #if defined(__amd64)
293 293
294 294 #define COPY_LOOP_INIT(src, dst, cnt) \
295 295 addq cnt, src; \
296 296 addq cnt, dst; \
297 297 shrq $3, cnt; \
298 298 neg cnt
299 299
300 300 /* Copy 16 bytes per loop. Uses %rax and %r8 */
301 301 #define COPY_LOOP_BODY(src, dst, cnt) \
302 302 prefetchnta 0x100(src, cnt, 8); \
303 303 movq (src, cnt, 8), %rax; \
304 304 movq 0x8(src, cnt, 8), %r8; \
305 305 movnti %rax, (dst, cnt, 8); \
306 306 movnti %r8, 0x8(dst, cnt, 8); \
307 307 addq $2, cnt
308 308
309 309 ENTRY(kcopy_nta)
310 310 pushq %rbp
311 311 movq %rsp, %rbp
312 312 #ifdef DEBUG
313 313 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
314 314 jb 0f
315 315 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
316 316 jnb 1f
317 317 0: leaq .kcopy_panic_msg(%rip), %rdi
318 318 xorl %eax, %eax
319 319 call panic
320 320 1:
321 321 #endif
322 322
323 323 movq %gs:CPU_THREAD, %r9
324 324 cmpq $0, %rcx /* No non-temporal access? */
325 325 /*
326 326 * pass lofault value as 4th argument to do_copy_fault
327 327 */
328 328 leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */
329 329 jnz do_copy_fault /* use regular access */
330 330 /*
331 331 * Make sure cnt is >= KCOPY_MIN_SIZE
332 332 */
333 333 cmpq $KCOPY_MIN_SIZE, %rdx
334 334 jb do_copy_fault
335 335
336 336 /*
337 337 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
338 338 * count is COUNT_ALIGN_SIZE aligned.
339 339 */
340 340 movq %rdi, %r10
341 341 orq %rsi, %r10
342 342 andq $NTA_ALIGN_MASK, %r10
343 343 orq %rdx, %r10
344 344 andq $COUNT_ALIGN_MASK, %r10
345 345 jnz do_copy_fault
346 346
347 347 ALTENTRY(do_copy_fault_nta)
348 348 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
349 349 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
350 350 movq %rcx, T_LOFAULT(%r9) /* new lofault */
351 351
352 352 /*
353 353 * COPY_LOOP_BODY uses %rax and %r8
354 354 */
355 355 COPY_LOOP_INIT(%rdi, %rsi, %rdx)
356 356 2: COPY_LOOP_BODY(%rdi, %rsi, %rdx)
357 357 jnz 2b
358 358
359 359 mfence
360 360 xorl %eax, %eax /* return 0 (success) */
361 361 SMAP_ENABLE_INSTR(1)
362 362
363 363 _kcopy_nta_copyerr:
364 364 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
365 365 leave
366 366 ret
367 367 SET_SIZE(do_copy_fault_nta)
368 368 SET_SIZE(kcopy_nta)
369 369
370 370 #elif defined(__i386)
371 371
372 372 #define ARG_FROM 8
373 373 #define ARG_TO 12
374 374 #define ARG_COUNT 16
375 375
376 376 #define COPY_LOOP_INIT(src, dst, cnt) \
377 377 addl cnt, src; \
378 378 addl cnt, dst; \
379 379 shrl $3, cnt; \
380 380 neg cnt
381 381
382 382 #define COPY_LOOP_BODY(src, dst, cnt) \
383 383 prefetchnta 0x100(src, cnt, 8); \
384 384 movl (src, cnt, 8), %esi; \
385 385 movnti %esi, (dst, cnt, 8); \
386 386 movl 0x4(src, cnt, 8), %esi; \
387 387 movnti %esi, 0x4(dst, cnt, 8); \
388 388 movl 0x8(src, cnt, 8), %esi; \
389 389 movnti %esi, 0x8(dst, cnt, 8); \
390 390 movl 0xc(src, cnt, 8), %esi; \
391 391 movnti %esi, 0xc(dst, cnt, 8); \
392 392 addl $2, cnt
393 393
394 394 /*
395 395 * kcopy_nta is not implemented for 32-bit as no performance
396 396 * improvement was shown. We simply jump directly to kcopy
397 397 * and discard the 4 arguments.
398 398 */
399 399 ENTRY(kcopy_nta)
400 400 jmp kcopy
401 401
402 402 lea _kcopy_nta_copyerr, %eax /* lofault value */
403 403 ALTENTRY(do_copy_fault_nta)
404 404 pushl %ebp
405 405 movl %esp, %ebp /* setup stack frame */
406 406 pushl %esi
407 407 pushl %edi
408 408
409 409 movl %gs:CPU_THREAD, %edx
410 410 movl T_LOFAULT(%edx), %edi
411 411 pushl %edi /* save the current lofault */
412 412 movl %eax, T_LOFAULT(%edx) /* new lofault */
413 413
414 414 /* COPY_LOOP_BODY needs to use %esi */
415 415 movl ARG_COUNT(%ebp), %ecx
416 416 movl ARG_FROM(%ebp), %edi
417 417 movl ARG_TO(%ebp), %eax
418 418 COPY_LOOP_INIT(%edi, %eax, %ecx)
419 419 1: COPY_LOOP_BODY(%edi, %eax, %ecx)
420 420 jnz 1b
421 421 mfence
422 422
423 423 xorl %eax, %eax
424 424 _kcopy_nta_copyerr:
425 425 popl %ecx
426 426 popl %edi
427 427 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */
428 428 popl %esi
429 429 leave
430 430 ret
431 431 SET_SIZE(do_copy_fault_nta)
432 432 SET_SIZE(kcopy_nta)
433 433
434 434 #undef ARG_FROM
435 435 #undef ARG_TO
436 436 #undef ARG_COUNT
437 437
438 438 #endif /* __i386 */
439 439 #endif /* __lint */
440 440
441 441 #if defined(__lint)
442 442
443 443 /* ARGSUSED */
444 444 void
445 445 bcopy(const void *from, void *to, size_t count)
446 446 {}
447 447
448 448 #else /* __lint */
449 449
450 450 #if defined(__amd64)
451 451
452 452 ENTRY(bcopy)
453 453 #ifdef DEBUG
454 454 orq %rdx, %rdx /* %rdx = count */
455 455 jz 1f
456 456 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
457 457 jb 0f
458 458 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
459 459 jnb 1f
460 460 0: leaq .bcopy_panic_msg(%rip), %rdi
461 461 jmp call_panic /* setup stack and call panic */
462 462 1:
463 463 #endif
464 464 /*
465 465 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
466 466 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
467 467 * uses these registers in future they must be saved and restored.
468 468 */
469 469 ALTENTRY(bcopy_altentry)
470 470 do_copy:
471 471 #define L(s) .bcopy/**/s
472 472 cmpq $0x50, %rdx /* 80 */
473 473 jae bcopy_ck_size
474 474
475 475 /*
476 476 * Performance data shows many caller's copy small buffers. So for
477 477 * best perf for these sizes unrolled code is used. Store data without
478 478 * worrying about alignment.
479 479 */
480 480 leaq L(fwdPxQx)(%rip), %r10
481 481 addq %rdx, %rdi
482 482 addq %rdx, %rsi
483 483 movslq (%r10,%rdx,4), %rcx
484 484 leaq (%rcx,%r10,1), %r10
485 485 jmpq *%r10
486 486
487 487 .p2align 4
488 488 L(fwdPxQx):
489 489 .int L(P0Q0)-L(fwdPxQx) /* 0 */
490 490 .int L(P1Q0)-L(fwdPxQx)
491 491 .int L(P2Q0)-L(fwdPxQx)
492 492 .int L(P3Q0)-L(fwdPxQx)
493 493 .int L(P4Q0)-L(fwdPxQx)
494 494 .int L(P5Q0)-L(fwdPxQx)
495 495 .int L(P6Q0)-L(fwdPxQx)
496 496 .int L(P7Q0)-L(fwdPxQx)
497 497
498 498 .int L(P0Q1)-L(fwdPxQx) /* 8 */
499 499 .int L(P1Q1)-L(fwdPxQx)
500 500 .int L(P2Q1)-L(fwdPxQx)
501 501 .int L(P3Q1)-L(fwdPxQx)
502 502 .int L(P4Q1)-L(fwdPxQx)
503 503 .int L(P5Q1)-L(fwdPxQx)
504 504 .int L(P6Q1)-L(fwdPxQx)
505 505 .int L(P7Q1)-L(fwdPxQx)
506 506
507 507 .int L(P0Q2)-L(fwdPxQx) /* 16 */
508 508 .int L(P1Q2)-L(fwdPxQx)
509 509 .int L(P2Q2)-L(fwdPxQx)
510 510 .int L(P3Q2)-L(fwdPxQx)
511 511 .int L(P4Q2)-L(fwdPxQx)
512 512 .int L(P5Q2)-L(fwdPxQx)
513 513 .int L(P6Q2)-L(fwdPxQx)
514 514 .int L(P7Q2)-L(fwdPxQx)
515 515
516 516 .int L(P0Q3)-L(fwdPxQx) /* 24 */
517 517 .int L(P1Q3)-L(fwdPxQx)
518 518 .int L(P2Q3)-L(fwdPxQx)
519 519 .int L(P3Q3)-L(fwdPxQx)
520 520 .int L(P4Q3)-L(fwdPxQx)
521 521 .int L(P5Q3)-L(fwdPxQx)
522 522 .int L(P6Q3)-L(fwdPxQx)
523 523 .int L(P7Q3)-L(fwdPxQx)
524 524
525 525 .int L(P0Q4)-L(fwdPxQx) /* 32 */
526 526 .int L(P1Q4)-L(fwdPxQx)
527 527 .int L(P2Q4)-L(fwdPxQx)
528 528 .int L(P3Q4)-L(fwdPxQx)
529 529 .int L(P4Q4)-L(fwdPxQx)
530 530 .int L(P5Q4)-L(fwdPxQx)
531 531 .int L(P6Q4)-L(fwdPxQx)
532 532 .int L(P7Q4)-L(fwdPxQx)
533 533
534 534 .int L(P0Q5)-L(fwdPxQx) /* 40 */
535 535 .int L(P1Q5)-L(fwdPxQx)
536 536 .int L(P2Q5)-L(fwdPxQx)
537 537 .int L(P3Q5)-L(fwdPxQx)
538 538 .int L(P4Q5)-L(fwdPxQx)
539 539 .int L(P5Q5)-L(fwdPxQx)
540 540 .int L(P6Q5)-L(fwdPxQx)
541 541 .int L(P7Q5)-L(fwdPxQx)
542 542
543 543 .int L(P0Q6)-L(fwdPxQx) /* 48 */
544 544 .int L(P1Q6)-L(fwdPxQx)
545 545 .int L(P2Q6)-L(fwdPxQx)
546 546 .int L(P3Q6)-L(fwdPxQx)
547 547 .int L(P4Q6)-L(fwdPxQx)
548 548 .int L(P5Q6)-L(fwdPxQx)
549 549 .int L(P6Q6)-L(fwdPxQx)
550 550 .int L(P7Q6)-L(fwdPxQx)
551 551
552 552 .int L(P0Q7)-L(fwdPxQx) /* 56 */
553 553 .int L(P1Q7)-L(fwdPxQx)
554 554 .int L(P2Q7)-L(fwdPxQx)
555 555 .int L(P3Q7)-L(fwdPxQx)
556 556 .int L(P4Q7)-L(fwdPxQx)
557 557 .int L(P5Q7)-L(fwdPxQx)
558 558 .int L(P6Q7)-L(fwdPxQx)
559 559 .int L(P7Q7)-L(fwdPxQx)
560 560
561 561 .int L(P0Q8)-L(fwdPxQx) /* 64 */
562 562 .int L(P1Q8)-L(fwdPxQx)
563 563 .int L(P2Q8)-L(fwdPxQx)
564 564 .int L(P3Q8)-L(fwdPxQx)
565 565 .int L(P4Q8)-L(fwdPxQx)
566 566 .int L(P5Q8)-L(fwdPxQx)
567 567 .int L(P6Q8)-L(fwdPxQx)
568 568 .int L(P7Q8)-L(fwdPxQx)
569 569
570 570 .int L(P0Q9)-L(fwdPxQx) /* 72 */
571 571 .int L(P1Q9)-L(fwdPxQx)
572 572 .int L(P2Q9)-L(fwdPxQx)
573 573 .int L(P3Q9)-L(fwdPxQx)
574 574 .int L(P4Q9)-L(fwdPxQx)
575 575 .int L(P5Q9)-L(fwdPxQx)
576 576 .int L(P6Q9)-L(fwdPxQx)
577 577 .int L(P7Q9)-L(fwdPxQx) /* 79 */
578 578
579 579 .p2align 4
580 580 L(P0Q9):
581 581 mov -0x48(%rdi), %rcx
582 582 mov %rcx, -0x48(%rsi)
583 583 L(P0Q8):
584 584 mov -0x40(%rdi), %r10
585 585 mov %r10, -0x40(%rsi)
586 586 L(P0Q7):
587 587 mov -0x38(%rdi), %r8
588 588 mov %r8, -0x38(%rsi)
589 589 L(P0Q6):
590 590 mov -0x30(%rdi), %rcx
591 591 mov %rcx, -0x30(%rsi)
592 592 L(P0Q5):
593 593 mov -0x28(%rdi), %r10
594 594 mov %r10, -0x28(%rsi)
595 595 L(P0Q4):
596 596 mov -0x20(%rdi), %r8
597 597 mov %r8, -0x20(%rsi)
598 598 L(P0Q3):
599 599 mov -0x18(%rdi), %rcx
600 600 mov %rcx, -0x18(%rsi)
601 601 L(P0Q2):
602 602 mov -0x10(%rdi), %r10
603 603 mov %r10, -0x10(%rsi)
604 604 L(P0Q1):
605 605 mov -0x8(%rdi), %r8
606 606 mov %r8, -0x8(%rsi)
607 607 L(P0Q0):
608 608 ret
609 609
610 610 .p2align 4
611 611 L(P1Q9):
612 612 mov -0x49(%rdi), %r8
613 613 mov %r8, -0x49(%rsi)
614 614 L(P1Q8):
615 615 mov -0x41(%rdi), %rcx
616 616 mov %rcx, -0x41(%rsi)
617 617 L(P1Q7):
618 618 mov -0x39(%rdi), %r10
619 619 mov %r10, -0x39(%rsi)
620 620 L(P1Q6):
621 621 mov -0x31(%rdi), %r8
622 622 mov %r8, -0x31(%rsi)
623 623 L(P1Q5):
624 624 mov -0x29(%rdi), %rcx
625 625 mov %rcx, -0x29(%rsi)
626 626 L(P1Q4):
627 627 mov -0x21(%rdi), %r10
628 628 mov %r10, -0x21(%rsi)
629 629 L(P1Q3):
630 630 mov -0x19(%rdi), %r8
631 631 mov %r8, -0x19(%rsi)
632 632 L(P1Q2):
633 633 mov -0x11(%rdi), %rcx
634 634 mov %rcx, -0x11(%rsi)
635 635 L(P1Q1):
636 636 mov -0x9(%rdi), %r10
637 637 mov %r10, -0x9(%rsi)
638 638 L(P1Q0):
639 639 movzbq -0x1(%rdi), %r8
640 640 mov %r8b, -0x1(%rsi)
641 641 ret
642 642
643 643 .p2align 4
644 644 L(P2Q9):
645 645 mov -0x4a(%rdi), %r8
646 646 mov %r8, -0x4a(%rsi)
647 647 L(P2Q8):
648 648 mov -0x42(%rdi), %rcx
649 649 mov %rcx, -0x42(%rsi)
650 650 L(P2Q7):
651 651 mov -0x3a(%rdi), %r10
652 652 mov %r10, -0x3a(%rsi)
653 653 L(P2Q6):
654 654 mov -0x32(%rdi), %r8
655 655 mov %r8, -0x32(%rsi)
656 656 L(P2Q5):
657 657 mov -0x2a(%rdi), %rcx
658 658 mov %rcx, -0x2a(%rsi)
659 659 L(P2Q4):
660 660 mov -0x22(%rdi), %r10
661 661 mov %r10, -0x22(%rsi)
662 662 L(P2Q3):
663 663 mov -0x1a(%rdi), %r8
664 664 mov %r8, -0x1a(%rsi)
665 665 L(P2Q2):
666 666 mov -0x12(%rdi), %rcx
667 667 mov %rcx, -0x12(%rsi)
668 668 L(P2Q1):
669 669 mov -0xa(%rdi), %r10
670 670 mov %r10, -0xa(%rsi)
671 671 L(P2Q0):
672 672 movzwq -0x2(%rdi), %r8
673 673 mov %r8w, -0x2(%rsi)
674 674 ret
675 675
676 676 .p2align 4
677 677 L(P3Q9):
678 678 mov -0x4b(%rdi), %r8
679 679 mov %r8, -0x4b(%rsi)
680 680 L(P3Q8):
681 681 mov -0x43(%rdi), %rcx
682 682 mov %rcx, -0x43(%rsi)
683 683 L(P3Q7):
684 684 mov -0x3b(%rdi), %r10
685 685 mov %r10, -0x3b(%rsi)
686 686 L(P3Q6):
687 687 mov -0x33(%rdi), %r8
688 688 mov %r8, -0x33(%rsi)
689 689 L(P3Q5):
690 690 mov -0x2b(%rdi), %rcx
691 691 mov %rcx, -0x2b(%rsi)
692 692 L(P3Q4):
693 693 mov -0x23(%rdi), %r10
694 694 mov %r10, -0x23(%rsi)
695 695 L(P3Q3):
696 696 mov -0x1b(%rdi), %r8
697 697 mov %r8, -0x1b(%rsi)
698 698 L(P3Q2):
699 699 mov -0x13(%rdi), %rcx
700 700 mov %rcx, -0x13(%rsi)
701 701 L(P3Q1):
702 702 mov -0xb(%rdi), %r10
703 703 mov %r10, -0xb(%rsi)
704 704 /*
705 705 * These trailing loads/stores have to do all their loads 1st,
706 706 * then do the stores.
707 707 */
708 708 L(P3Q0):
709 709 movzwq -0x3(%rdi), %r8
710 710 movzbq -0x1(%rdi), %r10
711 711 mov %r8w, -0x3(%rsi)
712 712 mov %r10b, -0x1(%rsi)
713 713 ret
714 714
715 715 .p2align 4
716 716 L(P4Q9):
717 717 mov -0x4c(%rdi), %r8
718 718 mov %r8, -0x4c(%rsi)
719 719 L(P4Q8):
720 720 mov -0x44(%rdi), %rcx
721 721 mov %rcx, -0x44(%rsi)
722 722 L(P4Q7):
723 723 mov -0x3c(%rdi), %r10
724 724 mov %r10, -0x3c(%rsi)
725 725 L(P4Q6):
726 726 mov -0x34(%rdi), %r8
727 727 mov %r8, -0x34(%rsi)
728 728 L(P4Q5):
729 729 mov -0x2c(%rdi), %rcx
730 730 mov %rcx, -0x2c(%rsi)
731 731 L(P4Q4):
732 732 mov -0x24(%rdi), %r10
733 733 mov %r10, -0x24(%rsi)
734 734 L(P4Q3):
735 735 mov -0x1c(%rdi), %r8
736 736 mov %r8, -0x1c(%rsi)
737 737 L(P4Q2):
738 738 mov -0x14(%rdi), %rcx
739 739 mov %rcx, -0x14(%rsi)
740 740 L(P4Q1):
741 741 mov -0xc(%rdi), %r10
742 742 mov %r10, -0xc(%rsi)
743 743 L(P4Q0):
744 744 mov -0x4(%rdi), %r8d
745 745 mov %r8d, -0x4(%rsi)
746 746 ret
747 747
748 748 .p2align 4
749 749 L(P5Q9):
750 750 mov -0x4d(%rdi), %r8
751 751 mov %r8, -0x4d(%rsi)
752 752 L(P5Q8):
753 753 mov -0x45(%rdi), %rcx
754 754 mov %rcx, -0x45(%rsi)
755 755 L(P5Q7):
756 756 mov -0x3d(%rdi), %r10
757 757 mov %r10, -0x3d(%rsi)
758 758 L(P5Q6):
759 759 mov -0x35(%rdi), %r8
760 760 mov %r8, -0x35(%rsi)
761 761 L(P5Q5):
762 762 mov -0x2d(%rdi), %rcx
763 763 mov %rcx, -0x2d(%rsi)
764 764 L(P5Q4):
765 765 mov -0x25(%rdi), %r10
766 766 mov %r10, -0x25(%rsi)
767 767 L(P5Q3):
768 768 mov -0x1d(%rdi), %r8
769 769 mov %r8, -0x1d(%rsi)
770 770 L(P5Q2):
771 771 mov -0x15(%rdi), %rcx
772 772 mov %rcx, -0x15(%rsi)
773 773 L(P5Q1):
774 774 mov -0xd(%rdi), %r10
775 775 mov %r10, -0xd(%rsi)
776 776 L(P5Q0):
777 777 mov -0x5(%rdi), %r8d
778 778 movzbq -0x1(%rdi), %r10
779 779 mov %r8d, -0x5(%rsi)
780 780 mov %r10b, -0x1(%rsi)
781 781 ret
782 782
783 783 .p2align 4
784 784 L(P6Q9):
785 785 mov -0x4e(%rdi), %r8
786 786 mov %r8, -0x4e(%rsi)
787 787 L(P6Q8):
788 788 mov -0x46(%rdi), %rcx
789 789 mov %rcx, -0x46(%rsi)
790 790 L(P6Q7):
791 791 mov -0x3e(%rdi), %r10
792 792 mov %r10, -0x3e(%rsi)
793 793 L(P6Q6):
794 794 mov -0x36(%rdi), %r8
795 795 mov %r8, -0x36(%rsi)
796 796 L(P6Q5):
797 797 mov -0x2e(%rdi), %rcx
798 798 mov %rcx, -0x2e(%rsi)
799 799 L(P6Q4):
800 800 mov -0x26(%rdi), %r10
801 801 mov %r10, -0x26(%rsi)
802 802 L(P6Q3):
803 803 mov -0x1e(%rdi), %r8
804 804 mov %r8, -0x1e(%rsi)
805 805 L(P6Q2):
806 806 mov -0x16(%rdi), %rcx
807 807 mov %rcx, -0x16(%rsi)
808 808 L(P6Q1):
809 809 mov -0xe(%rdi), %r10
810 810 mov %r10, -0xe(%rsi)
811 811 L(P6Q0):
812 812 mov -0x6(%rdi), %r8d
813 813 movzwq -0x2(%rdi), %r10
814 814 mov %r8d, -0x6(%rsi)
815 815 mov %r10w, -0x2(%rsi)
816 816 ret
817 817
818 818 .p2align 4
819 819 L(P7Q9):
820 820 mov -0x4f(%rdi), %r8
821 821 mov %r8, -0x4f(%rsi)
822 822 L(P7Q8):
823 823 mov -0x47(%rdi), %rcx
824 824 mov %rcx, -0x47(%rsi)
825 825 L(P7Q7):
826 826 mov -0x3f(%rdi), %r10
827 827 mov %r10, -0x3f(%rsi)
828 828 L(P7Q6):
829 829 mov -0x37(%rdi), %r8
830 830 mov %r8, -0x37(%rsi)
831 831 L(P7Q5):
832 832 mov -0x2f(%rdi), %rcx
833 833 mov %rcx, -0x2f(%rsi)
834 834 L(P7Q4):
835 835 mov -0x27(%rdi), %r10
836 836 mov %r10, -0x27(%rsi)
837 837 L(P7Q3):
838 838 mov -0x1f(%rdi), %r8
839 839 mov %r8, -0x1f(%rsi)
840 840 L(P7Q2):
841 841 mov -0x17(%rdi), %rcx
842 842 mov %rcx, -0x17(%rsi)
843 843 L(P7Q1):
844 844 mov -0xf(%rdi), %r10
845 845 mov %r10, -0xf(%rsi)
846 846 L(P7Q0):
847 847 mov -0x7(%rdi), %r8d
848 848 movzwq -0x3(%rdi), %r10
849 849 movzbq -0x1(%rdi), %rcx
850 850 mov %r8d, -0x7(%rsi)
851 851 mov %r10w, -0x3(%rsi)
852 852 mov %cl, -0x1(%rsi)
853 853 ret
854 854
855 855 /*
856 856 * For large sizes rep smovq is fastest.
857 857 * Transition point determined experimentally as measured on
858 858 * Intel Xeon processors (incl. Nehalem and previous generations) and
↓ open down ↓ |
809 lines elided |
↑ open up ↑ |
859 859 * AMD Opteron. The transition value is patched at boot time to avoid
860 860 * memory reference hit.
861 861 */
862 862 .globl bcopy_patch_start
863 863 bcopy_patch_start:
864 864 cmpq $BCOPY_NHM_REP, %rdx
865 865 .globl bcopy_patch_end
866 866 bcopy_patch_end:
867 867
868 868 .p2align 4
869 - .globl bcopy_ck_size
870 -bcopy_ck_size:
869 + ALTENTRY(bcopy_ck_size)
870 +
871 871 cmpq $BCOPY_DFLT_REP, %rdx
872 872 jae L(use_rep)
873 873
874 874 /*
875 875 * Align to a 8-byte boundary. Avoids penalties from unaligned stores
876 876 * as well as from stores spanning cachelines.
877 877 */
878 878 test $0x7, %rsi
879 879 jz L(aligned_loop)
880 880 test $0x1, %rsi
881 881 jz 2f
882 882 movzbq (%rdi), %r8
883 883 dec %rdx
884 884 inc %rdi
885 885 mov %r8b, (%rsi)
886 886 inc %rsi
887 887 2:
888 888 test $0x2, %rsi
889 889 jz 4f
890 890 movzwq (%rdi), %r8
891 891 sub $0x2, %rdx
892 892 add $0x2, %rdi
893 893 mov %r8w, (%rsi)
894 894 add $0x2, %rsi
895 895 4:
896 896 test $0x4, %rsi
897 897 jz L(aligned_loop)
898 898 mov (%rdi), %r8d
899 899 sub $0x4, %rdx
900 900 add $0x4, %rdi
901 901 mov %r8d, (%rsi)
902 902 add $0x4, %rsi
903 903
904 904 /*
905 905 * Copy 64-bytes per loop
906 906 */
907 907 .p2align 4
908 908 L(aligned_loop):
909 909 mov (%rdi), %r8
910 910 mov 0x8(%rdi), %r10
911 911 lea -0x40(%rdx), %rdx
912 912 mov %r8, (%rsi)
913 913 mov %r10, 0x8(%rsi)
914 914 mov 0x10(%rdi), %rcx
915 915 mov 0x18(%rdi), %r8
916 916 mov %rcx, 0x10(%rsi)
917 917 mov %r8, 0x18(%rsi)
918 918
919 919 cmp $0x40, %rdx
920 920 mov 0x20(%rdi), %r10
921 921 mov 0x28(%rdi), %rcx
922 922 mov %r10, 0x20(%rsi)
923 923 mov %rcx, 0x28(%rsi)
924 924 mov 0x30(%rdi), %r8
925 925 mov 0x38(%rdi), %r10
926 926 lea 0x40(%rdi), %rdi
927 927 mov %r8, 0x30(%rsi)
928 928 mov %r10, 0x38(%rsi)
929 929 lea 0x40(%rsi), %rsi
930 930 jae L(aligned_loop)
931 931
932 932 /*
933 933 * Copy remaining bytes (0-63)
934 934 */
935 935 L(do_remainder):
936 936 leaq L(fwdPxQx)(%rip), %r10
937 937 addq %rdx, %rdi
938 938 addq %rdx, %rsi
939 939 movslq (%r10,%rdx,4), %rcx
940 940 leaq (%rcx,%r10,1), %r10
941 941 jmpq *%r10
942 942
943 943 /*
944 944 * Use rep smovq. Clear remainder via unrolled code
945 945 */
946 946 .p2align 4
947 947 L(use_rep):
948 948 xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */
↓ open down ↓ |
68 lines elided |
↑ open up ↑ |
949 949 movq %rdx, %rcx /* %rcx = count */
950 950 shrq $3, %rcx /* 8-byte word count */
951 951 rep
952 952 smovq
953 953
954 954 xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */
955 955 andq $7, %rdx /* remainder */
956 956 jnz L(do_remainder)
957 957 ret
958 958 #undef L
959 + SET_SIZE(bcopy_ck_size)
959 960
960 961 #ifdef DEBUG
961 962 /*
962 963 * Setup frame on the run-time stack. The end of the input argument
963 964 * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
964 965 * always points to the end of the latest allocated stack frame.
965 966 * panic(const char *format, ...) is a varargs function. When a
966 967 * function taking variable arguments is called, %rax must be set
967 968 * to eight times the number of floating point parameters passed
968 969 * to the function in SSE registers.
969 970 */
970 971 call_panic:
971 972 pushq %rbp /* align stack properly */
972 973 movq %rsp, %rbp
973 974 xorl %eax, %eax /* no variable arguments */
974 975 call panic /* %rdi = format string */
975 976 #endif
976 977 SET_SIZE(bcopy_altentry)
977 978 SET_SIZE(bcopy)
978 979
979 980 #elif defined(__i386)
980 981
981 982 #define ARG_FROM 4
982 983 #define ARG_TO 8
983 984 #define ARG_COUNT 12
984 985
985 986 ENTRY(bcopy)
986 987 #ifdef DEBUG
987 988 movl ARG_COUNT(%esp), %eax
988 989 orl %eax, %eax
989 990 jz 1f
990 991 movl postbootkernelbase, %eax
991 992 cmpl %eax, ARG_FROM(%esp)
992 993 jb 0f
993 994 cmpl %eax, ARG_TO(%esp)
994 995 jnb 1f
995 996 0: pushl %ebp
996 997 movl %esp, %ebp
997 998 pushl $.bcopy_panic_msg
998 999 call panic
999 1000 1:
1000 1001 #endif
1001 1002 do_copy:
1002 1003 movl %esi, %eax /* save registers */
1003 1004 movl %edi, %edx
1004 1005 movl ARG_COUNT(%esp), %ecx
1005 1006 movl ARG_FROM(%esp), %esi
1006 1007 movl ARG_TO(%esp), %edi
1007 1008
1008 1009 shrl $2, %ecx /* word count */
1009 1010 rep
1010 1011 smovl
1011 1012 movl ARG_COUNT(%esp), %ecx
1012 1013 andl $3, %ecx /* bytes left over */
1013 1014 rep
1014 1015 smovb
1015 1016 movl %eax, %esi /* restore registers */
1016 1017 movl %edx, %edi
1017 1018 ret
1018 1019 SET_SIZE(bcopy)
1019 1020
1020 1021 #undef ARG_COUNT
1021 1022 #undef ARG_FROM
1022 1023 #undef ARG_TO
1023 1024
1024 1025 #endif /* __i386 */
1025 1026 #endif /* __lint */
1026 1027
1027 1028
1028 1029 /*
1029 1030 * Zero a block of storage, returning an error code if we
1030 1031 * take a kernel pagefault which cannot be resolved.
1031 1032 * Returns errno value on pagefault error, 0 if all ok
1032 1033 */
1033 1034
1034 1035 #if defined(__lint)
1035 1036
1036 1037 /* ARGSUSED */
1037 1038 int
1038 1039 kzero(void *addr, size_t count)
1039 1040 { return (0); }
1040 1041
1041 1042 #else /* __lint */
1042 1043
1043 1044 #if defined(__amd64)
1044 1045
1045 1046 ENTRY(kzero)
1046 1047 #ifdef DEBUG
1047 1048 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
1048 1049 jnb 0f
1049 1050 leaq .kzero_panic_msg(%rip), %rdi
1050 1051 jmp call_panic /* setup stack and call panic */
1051 1052 0:
1052 1053 #endif
1053 1054 /*
1054 1055 * pass lofault value as 3rd argument for fault return
1055 1056 */
1056 1057 leaq _kzeroerr(%rip), %rdx
1057 1058
1058 1059 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
1059 1060 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
1060 1061 movq %rdx, T_LOFAULT(%r9) /* new lofault */
1061 1062 call bzero_altentry
1062 1063 xorl %eax, %eax
1063 1064 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
1064 1065 ret
1065 1066 /*
1066 1067 * A fault during bzero is indicated through an errno value
1067 1068 * in %rax when we iretq to here.
1068 1069 */
1069 1070 _kzeroerr:
1070 1071 addq $8, %rsp /* pop bzero_altentry call ret addr */
1071 1072 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
1072 1073 ret
1073 1074 SET_SIZE(kzero)
1074 1075
1075 1076 #elif defined(__i386)
1076 1077
1077 1078 #define ARG_ADDR 8
1078 1079 #define ARG_COUNT 12
1079 1080
1080 1081 ENTRY(kzero)
1081 1082 #ifdef DEBUG
1082 1083 pushl %ebp
1083 1084 movl %esp, %ebp
1084 1085 movl postbootkernelbase, %eax
1085 1086 cmpl %eax, ARG_ADDR(%ebp)
1086 1087 jnb 0f
1087 1088 pushl $.kzero_panic_msg
1088 1089 call panic
1089 1090 0: popl %ebp
1090 1091 #endif
1091 1092 lea _kzeroerr, %eax /* kzeroerr is lofault value */
1092 1093
1093 1094 pushl %ebp /* save stack base */
1094 1095 movl %esp, %ebp /* set new stack base */
1095 1096 pushl %edi /* save %edi */
1096 1097
1097 1098 mov %gs:CPU_THREAD, %edx
1098 1099 movl T_LOFAULT(%edx), %edi
1099 1100 pushl %edi /* save the current lofault */
1100 1101 movl %eax, T_LOFAULT(%edx) /* new lofault */
1101 1102
1102 1103 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */
1103 1104 movl ARG_ADDR(%ebp), %edi /* %edi <- address of bytes to clear */
1104 1105 shrl $2, %ecx /* Count of double words to zero */
1105 1106 xorl %eax, %eax /* sstol val */
1106 1107 rep
1107 1108 sstol /* %ecx contains words to clear (%eax=0) */
1108 1109
1109 1110 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */
1110 1111 andl $3, %ecx /* do mod 4 */
1111 1112 rep
1112 1113 sstob /* %ecx contains residual bytes to clear */
1113 1114
1114 1115 /*
1115 1116 * A fault during kzero is indicated through an errno value
1116 1117 * in %eax when we iret to here.
1117 1118 */
1118 1119 _kzeroerr:
1119 1120 popl %edi
1120 1121 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */
1121 1122 popl %edi
1122 1123 popl %ebp
1123 1124 ret
1124 1125 SET_SIZE(kzero)
1125 1126
1126 1127 #undef ARG_ADDR
1127 1128 #undef ARG_COUNT
1128 1129
1129 1130 #endif /* __i386 */
1130 1131 #endif /* __lint */
1131 1132
1132 1133 /*
1133 1134 * Zero a block of storage.
1134 1135 */
1135 1136
1136 1137 #if defined(__lint)
1137 1138
1138 1139 /* ARGSUSED */
1139 1140 void
1140 1141 bzero(void *addr, size_t count)
1141 1142 {}
1142 1143
1143 1144 #else /* __lint */
1144 1145
1145 1146 #if defined(__amd64)
1146 1147
1147 1148 ENTRY(bzero)
1148 1149 #ifdef DEBUG
1149 1150 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
1150 1151 jnb 0f
1151 1152 leaq .bzero_panic_msg(%rip), %rdi
1152 1153 jmp call_panic /* setup stack and call panic */
1153 1154 0:
1154 1155 #endif
1155 1156 ALTENTRY(bzero_altentry)
1156 1157 do_zero:
1157 1158 #define L(s) .bzero/**/s
1158 1159 xorl %eax, %eax
1159 1160
1160 1161 cmpq $0x50, %rsi /* 80 */
1161 1162 jae L(ck_align)
1162 1163
1163 1164 /*
1164 1165 * Performance data shows many caller's are zeroing small buffers. So
1165 1166 * for best perf for these sizes unrolled code is used. Store zeros
1166 1167 * without worrying about alignment.
1167 1168 */
1168 1169 leaq L(setPxQx)(%rip), %r10
1169 1170 addq %rsi, %rdi
1170 1171 movslq (%r10,%rsi,4), %rcx
1171 1172 leaq (%rcx,%r10,1), %r10
1172 1173 jmpq *%r10
1173 1174
1174 1175 .p2align 4
1175 1176 L(setPxQx):
1176 1177 .int L(P0Q0)-L(setPxQx) /* 0 */
1177 1178 .int L(P1Q0)-L(setPxQx)
1178 1179 .int L(P2Q0)-L(setPxQx)
1179 1180 .int L(P3Q0)-L(setPxQx)
1180 1181 .int L(P4Q0)-L(setPxQx)
1181 1182 .int L(P5Q0)-L(setPxQx)
1182 1183 .int L(P6Q0)-L(setPxQx)
1183 1184 .int L(P7Q0)-L(setPxQx)
1184 1185
1185 1186 .int L(P0Q1)-L(setPxQx) /* 8 */
1186 1187 .int L(P1Q1)-L(setPxQx)
1187 1188 .int L(P2Q1)-L(setPxQx)
1188 1189 .int L(P3Q1)-L(setPxQx)
1189 1190 .int L(P4Q1)-L(setPxQx)
1190 1191 .int L(P5Q1)-L(setPxQx)
1191 1192 .int L(P6Q1)-L(setPxQx)
1192 1193 .int L(P7Q1)-L(setPxQx)
1193 1194
1194 1195 .int L(P0Q2)-L(setPxQx) /* 16 */
1195 1196 .int L(P1Q2)-L(setPxQx)
1196 1197 .int L(P2Q2)-L(setPxQx)
1197 1198 .int L(P3Q2)-L(setPxQx)
1198 1199 .int L(P4Q2)-L(setPxQx)
1199 1200 .int L(P5Q2)-L(setPxQx)
1200 1201 .int L(P6Q2)-L(setPxQx)
1201 1202 .int L(P7Q2)-L(setPxQx)
1202 1203
1203 1204 .int L(P0Q3)-L(setPxQx) /* 24 */
1204 1205 .int L(P1Q3)-L(setPxQx)
1205 1206 .int L(P2Q3)-L(setPxQx)
1206 1207 .int L(P3Q3)-L(setPxQx)
1207 1208 .int L(P4Q3)-L(setPxQx)
1208 1209 .int L(P5Q3)-L(setPxQx)
1209 1210 .int L(P6Q3)-L(setPxQx)
1210 1211 .int L(P7Q3)-L(setPxQx)
1211 1212
1212 1213 .int L(P0Q4)-L(setPxQx) /* 32 */
1213 1214 .int L(P1Q4)-L(setPxQx)
1214 1215 .int L(P2Q4)-L(setPxQx)
1215 1216 .int L(P3Q4)-L(setPxQx)
1216 1217 .int L(P4Q4)-L(setPxQx)
1217 1218 .int L(P5Q4)-L(setPxQx)
1218 1219 .int L(P6Q4)-L(setPxQx)
1219 1220 .int L(P7Q4)-L(setPxQx)
1220 1221
1221 1222 .int L(P0Q5)-L(setPxQx) /* 40 */
1222 1223 .int L(P1Q5)-L(setPxQx)
1223 1224 .int L(P2Q5)-L(setPxQx)
1224 1225 .int L(P3Q5)-L(setPxQx)
1225 1226 .int L(P4Q5)-L(setPxQx)
1226 1227 .int L(P5Q5)-L(setPxQx)
1227 1228 .int L(P6Q5)-L(setPxQx)
1228 1229 .int L(P7Q5)-L(setPxQx)
1229 1230
1230 1231 .int L(P0Q6)-L(setPxQx) /* 48 */
1231 1232 .int L(P1Q6)-L(setPxQx)
1232 1233 .int L(P2Q6)-L(setPxQx)
1233 1234 .int L(P3Q6)-L(setPxQx)
1234 1235 .int L(P4Q6)-L(setPxQx)
1235 1236 .int L(P5Q6)-L(setPxQx)
1236 1237 .int L(P6Q6)-L(setPxQx)
1237 1238 .int L(P7Q6)-L(setPxQx)
1238 1239
1239 1240 .int L(P0Q7)-L(setPxQx) /* 56 */
1240 1241 .int L(P1Q7)-L(setPxQx)
1241 1242 .int L(P2Q7)-L(setPxQx)
1242 1243 .int L(P3Q7)-L(setPxQx)
1243 1244 .int L(P4Q7)-L(setPxQx)
1244 1245 .int L(P5Q7)-L(setPxQx)
1245 1246 .int L(P6Q7)-L(setPxQx)
1246 1247 .int L(P7Q7)-L(setPxQx)
1247 1248
1248 1249 .int L(P0Q8)-L(setPxQx) /* 64 */
1249 1250 .int L(P1Q8)-L(setPxQx)
1250 1251 .int L(P2Q8)-L(setPxQx)
1251 1252 .int L(P3Q8)-L(setPxQx)
1252 1253 .int L(P4Q8)-L(setPxQx)
1253 1254 .int L(P5Q8)-L(setPxQx)
1254 1255 .int L(P6Q8)-L(setPxQx)
1255 1256 .int L(P7Q8)-L(setPxQx)
1256 1257
1257 1258 .int L(P0Q9)-L(setPxQx) /* 72 */
1258 1259 .int L(P1Q9)-L(setPxQx)
1259 1260 .int L(P2Q9)-L(setPxQx)
1260 1261 .int L(P3Q9)-L(setPxQx)
1261 1262 .int L(P4Q9)-L(setPxQx)
1262 1263 .int L(P5Q9)-L(setPxQx)
1263 1264 .int L(P6Q9)-L(setPxQx)
1264 1265 .int L(P7Q9)-L(setPxQx) /* 79 */
1265 1266
1266 1267 .p2align 4
1267 1268 L(P0Q9): mov %rax, -0x48(%rdi)
1268 1269 L(P0Q8): mov %rax, -0x40(%rdi)
1269 1270 L(P0Q7): mov %rax, -0x38(%rdi)
1270 1271 L(P0Q6): mov %rax, -0x30(%rdi)
1271 1272 L(P0Q5): mov %rax, -0x28(%rdi)
1272 1273 L(P0Q4): mov %rax, -0x20(%rdi)
1273 1274 L(P0Q3): mov %rax, -0x18(%rdi)
1274 1275 L(P0Q2): mov %rax, -0x10(%rdi)
1275 1276 L(P0Q1): mov %rax, -0x8(%rdi)
1276 1277 L(P0Q0):
1277 1278 ret
1278 1279
1279 1280 .p2align 4
1280 1281 L(P1Q9): mov %rax, -0x49(%rdi)
1281 1282 L(P1Q8): mov %rax, -0x41(%rdi)
1282 1283 L(P1Q7): mov %rax, -0x39(%rdi)
1283 1284 L(P1Q6): mov %rax, -0x31(%rdi)
1284 1285 L(P1Q5): mov %rax, -0x29(%rdi)
1285 1286 L(P1Q4): mov %rax, -0x21(%rdi)
1286 1287 L(P1Q3): mov %rax, -0x19(%rdi)
1287 1288 L(P1Q2): mov %rax, -0x11(%rdi)
1288 1289 L(P1Q1): mov %rax, -0x9(%rdi)
1289 1290 L(P1Q0): mov %al, -0x1(%rdi)
1290 1291 ret
1291 1292
1292 1293 .p2align 4
1293 1294 L(P2Q9): mov %rax, -0x4a(%rdi)
1294 1295 L(P2Q8): mov %rax, -0x42(%rdi)
1295 1296 L(P2Q7): mov %rax, -0x3a(%rdi)
1296 1297 L(P2Q6): mov %rax, -0x32(%rdi)
1297 1298 L(P2Q5): mov %rax, -0x2a(%rdi)
1298 1299 L(P2Q4): mov %rax, -0x22(%rdi)
1299 1300 L(P2Q3): mov %rax, -0x1a(%rdi)
1300 1301 L(P2Q2): mov %rax, -0x12(%rdi)
1301 1302 L(P2Q1): mov %rax, -0xa(%rdi)
1302 1303 L(P2Q0): mov %ax, -0x2(%rdi)
1303 1304 ret
1304 1305
1305 1306 .p2align 4
1306 1307 L(P3Q9): mov %rax, -0x4b(%rdi)
1307 1308 L(P3Q8): mov %rax, -0x43(%rdi)
1308 1309 L(P3Q7): mov %rax, -0x3b(%rdi)
1309 1310 L(P3Q6): mov %rax, -0x33(%rdi)
1310 1311 L(P3Q5): mov %rax, -0x2b(%rdi)
1311 1312 L(P3Q4): mov %rax, -0x23(%rdi)
1312 1313 L(P3Q3): mov %rax, -0x1b(%rdi)
1313 1314 L(P3Q2): mov %rax, -0x13(%rdi)
1314 1315 L(P3Q1): mov %rax, -0xb(%rdi)
1315 1316 L(P3Q0): mov %ax, -0x3(%rdi)
1316 1317 mov %al, -0x1(%rdi)
1317 1318 ret
1318 1319
1319 1320 .p2align 4
1320 1321 L(P4Q9): mov %rax, -0x4c(%rdi)
1321 1322 L(P4Q8): mov %rax, -0x44(%rdi)
1322 1323 L(P4Q7): mov %rax, -0x3c(%rdi)
1323 1324 L(P4Q6): mov %rax, -0x34(%rdi)
1324 1325 L(P4Q5): mov %rax, -0x2c(%rdi)
1325 1326 L(P4Q4): mov %rax, -0x24(%rdi)
1326 1327 L(P4Q3): mov %rax, -0x1c(%rdi)
1327 1328 L(P4Q2): mov %rax, -0x14(%rdi)
1328 1329 L(P4Q1): mov %rax, -0xc(%rdi)
1329 1330 L(P4Q0): mov %eax, -0x4(%rdi)
1330 1331 ret
1331 1332
1332 1333 .p2align 4
1333 1334 L(P5Q9): mov %rax, -0x4d(%rdi)
1334 1335 L(P5Q8): mov %rax, -0x45(%rdi)
1335 1336 L(P5Q7): mov %rax, -0x3d(%rdi)
1336 1337 L(P5Q6): mov %rax, -0x35(%rdi)
1337 1338 L(P5Q5): mov %rax, -0x2d(%rdi)
1338 1339 L(P5Q4): mov %rax, -0x25(%rdi)
1339 1340 L(P5Q3): mov %rax, -0x1d(%rdi)
1340 1341 L(P5Q2): mov %rax, -0x15(%rdi)
1341 1342 L(P5Q1): mov %rax, -0xd(%rdi)
1342 1343 L(P5Q0): mov %eax, -0x5(%rdi)
1343 1344 mov %al, -0x1(%rdi)
1344 1345 ret
1345 1346
1346 1347 .p2align 4
1347 1348 L(P6Q9): mov %rax, -0x4e(%rdi)
1348 1349 L(P6Q8): mov %rax, -0x46(%rdi)
1349 1350 L(P6Q7): mov %rax, -0x3e(%rdi)
1350 1351 L(P6Q6): mov %rax, -0x36(%rdi)
1351 1352 L(P6Q5): mov %rax, -0x2e(%rdi)
1352 1353 L(P6Q4): mov %rax, -0x26(%rdi)
1353 1354 L(P6Q3): mov %rax, -0x1e(%rdi)
1354 1355 L(P6Q2): mov %rax, -0x16(%rdi)
1355 1356 L(P6Q1): mov %rax, -0xe(%rdi)
1356 1357 L(P6Q0): mov %eax, -0x6(%rdi)
1357 1358 mov %ax, -0x2(%rdi)
1358 1359 ret
1359 1360
1360 1361 .p2align 4
1361 1362 L(P7Q9): mov %rax, -0x4f(%rdi)
1362 1363 L(P7Q8): mov %rax, -0x47(%rdi)
1363 1364 L(P7Q7): mov %rax, -0x3f(%rdi)
1364 1365 L(P7Q6): mov %rax, -0x37(%rdi)
1365 1366 L(P7Q5): mov %rax, -0x2f(%rdi)
1366 1367 L(P7Q4): mov %rax, -0x27(%rdi)
1367 1368 L(P7Q3): mov %rax, -0x1f(%rdi)
1368 1369 L(P7Q2): mov %rax, -0x17(%rdi)
1369 1370 L(P7Q1): mov %rax, -0xf(%rdi)
1370 1371 L(P7Q0): mov %eax, -0x7(%rdi)
1371 1372 mov %ax, -0x3(%rdi)
1372 1373 mov %al, -0x1(%rdi)
1373 1374 ret
1374 1375
1375 1376 /*
1376 1377 * Align to a 16-byte boundary. Avoids penalties from unaligned stores
1377 1378 * as well as from stores spanning cachelines. Note 16-byte alignment
1378 1379 * is better in case where rep sstosq is used.
1379 1380 */
1380 1381 .p2align 4
1381 1382 L(ck_align):
1382 1383 test $0xf, %rdi
1383 1384 jz L(aligned_now)
1384 1385 test $1, %rdi
1385 1386 jz 2f
1386 1387 mov %al, (%rdi)
1387 1388 dec %rsi
1388 1389 lea 1(%rdi),%rdi
1389 1390 2:
1390 1391 test $2, %rdi
1391 1392 jz 4f
1392 1393 mov %ax, (%rdi)
1393 1394 sub $2, %rsi
1394 1395 lea 2(%rdi),%rdi
1395 1396 4:
1396 1397 test $4, %rdi
1397 1398 jz 8f
1398 1399 mov %eax, (%rdi)
1399 1400 sub $4, %rsi
1400 1401 lea 4(%rdi),%rdi
1401 1402 8:
1402 1403 test $8, %rdi
1403 1404 jz L(aligned_now)
1404 1405 mov %rax, (%rdi)
1405 1406 sub $8, %rsi
1406 1407 lea 8(%rdi),%rdi
1407 1408
1408 1409 /*
1409 1410 * For large sizes rep sstoq is fastest.
1410 1411 * Transition point determined experimentally as measured on
1411 1412 * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
1412 1413 */
1413 1414 L(aligned_now):
1414 1415 cmp $BZERO_USE_REP, %rsi
1415 1416 ja L(use_rep)
1416 1417
1417 1418 /*
1418 1419 * zero 64-bytes per loop
1419 1420 */
1420 1421 .p2align 4
1421 1422 L(bzero_loop):
1422 1423 leaq -0x40(%rsi), %rsi
1423 1424 cmpq $0x40, %rsi
1424 1425 movq %rax, (%rdi)
1425 1426 movq %rax, 0x8(%rdi)
1426 1427 movq %rax, 0x10(%rdi)
1427 1428 movq %rax, 0x18(%rdi)
1428 1429 movq %rax, 0x20(%rdi)
1429 1430 movq %rax, 0x28(%rdi)
1430 1431 movq %rax, 0x30(%rdi)
1431 1432 movq %rax, 0x38(%rdi)
1432 1433 leaq 0x40(%rdi), %rdi
1433 1434 jae L(bzero_loop)
1434 1435
1435 1436 /*
1436 1437 * Clear any remaining bytes..
1437 1438 */
1438 1439 9:
1439 1440 leaq L(setPxQx)(%rip), %r10
1440 1441 addq %rsi, %rdi
1441 1442 movslq (%r10,%rsi,4), %rcx
1442 1443 leaq (%rcx,%r10,1), %r10
1443 1444 jmpq *%r10
1444 1445
1445 1446 /*
1446 1447 * Use rep sstoq. Clear any remainder via unrolled code
1447 1448 */
1448 1449 .p2align 4
1449 1450 L(use_rep):
1450 1451 movq %rsi, %rcx /* get size in bytes */
1451 1452 shrq $3, %rcx /* count of 8-byte words to zero */
1452 1453 rep
1453 1454 sstoq /* %rcx = words to clear (%rax=0) */
1454 1455 andq $7, %rsi /* remaining bytes */
1455 1456 jnz 9b
1456 1457 ret
1457 1458 #undef L
1458 1459 SET_SIZE(bzero_altentry)
1459 1460 SET_SIZE(bzero)
1460 1461
1461 1462 #elif defined(__i386)
1462 1463
1463 1464 #define ARG_ADDR 4
1464 1465 #define ARG_COUNT 8
1465 1466
1466 1467 ENTRY(bzero)
1467 1468 #ifdef DEBUG
1468 1469 movl postbootkernelbase, %eax
1469 1470 cmpl %eax, ARG_ADDR(%esp)
1470 1471 jnb 0f
1471 1472 pushl %ebp
1472 1473 movl %esp, %ebp
1473 1474 pushl $.bzero_panic_msg
1474 1475 call panic
1475 1476 0:
1476 1477 #endif
1477 1478 do_zero:
1478 1479 movl %edi, %edx
1479 1480 movl ARG_COUNT(%esp), %ecx
1480 1481 movl ARG_ADDR(%esp), %edi
1481 1482 shrl $2, %ecx
1482 1483 xorl %eax, %eax
1483 1484 rep
1484 1485 sstol
1485 1486 movl ARG_COUNT(%esp), %ecx
1486 1487 andl $3, %ecx
1487 1488 rep
1488 1489 sstob
1489 1490 movl %edx, %edi
1490 1491 ret
1491 1492 SET_SIZE(bzero)
1492 1493
1493 1494 #undef ARG_ADDR
1494 1495 #undef ARG_COUNT
1495 1496
1496 1497 #endif /* __i386 */
1497 1498 #endif /* __lint */
1498 1499
1499 1500 /*
1500 1501 * Transfer data to and from user space -
1501 1502 * Note that these routines can cause faults
1502 1503 * It is assumed that the kernel has nothing at
1503 1504 * less than KERNELBASE in the virtual address space.
1504 1505 *
1505 1506 * Note that copyin(9F) and copyout(9F) are part of the
1506 1507 * DDI/DKI which specifies that they return '-1' on "errors."
1507 1508 *
1508 1509 * Sigh.
1509 1510 *
1510 1511 * So there's two extremely similar routines - xcopyin_nta() and
1511 1512 * xcopyout_nta() which return the errno that we've faithfully computed.
1512 1513 * This allows other callers (e.g. uiomove(9F)) to work correctly.
1513 1514 * Given that these are used pretty heavily, we expand the calling
1514 1515 * sequences inline for all flavours (rather than making wrappers).
1515 1516 */
1516 1517
1517 1518 /*
1518 1519 * Copy user data to kernel space.
1519 1520 */
1520 1521
1521 1522 #if defined(__lint)
1522 1523
1523 1524 /* ARGSUSED */
1524 1525 int
1525 1526 copyin(const void *uaddr, void *kaddr, size_t count)
1526 1527 { return (0); }
1527 1528
1528 1529 #else /* lint */
1529 1530
1530 1531 #if defined(__amd64)
1531 1532
1532 1533 ENTRY(copyin)
1533 1534 pushq %rbp
1534 1535 movq %rsp, %rbp
1535 1536 subq $24, %rsp
1536 1537
1537 1538 /*
1538 1539 * save args in case we trap and need to rerun as a copyop
1539 1540 */
1540 1541 movq %rdi, (%rsp)
1541 1542 movq %rsi, 0x8(%rsp)
1542 1543 movq %rdx, 0x10(%rsp)
1543 1544
1544 1545 movq kernelbase(%rip), %rax
1545 1546 #ifdef DEBUG
1546 1547 cmpq %rax, %rsi /* %rsi = kaddr */
1547 1548 jnb 1f
1548 1549 leaq .copyin_panic_msg(%rip), %rdi
1549 1550 xorl %eax, %eax
1550 1551 call panic
1551 1552 1:
1552 1553 #endif
1553 1554 /*
1554 1555 * pass lofault value as 4th argument to do_copy_fault
1555 1556 */
1556 1557 leaq _copyin_err(%rip), %rcx
1557 1558
1558 1559 movq %gs:CPU_THREAD, %r9
1559 1560 cmpq %rax, %rdi /* test uaddr < kernelbase */
1560 1561 jae 3f /* take copyop if uaddr > kernelbase */
1561 1562 SMAP_DISABLE_INSTR(0)
1562 1563 jmp do_copy_fault /* Takes care of leave for us */
1563 1564
1564 1565 _copyin_err:
1565 1566 SMAP_ENABLE_INSTR(2)
1566 1567 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1567 1568 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1568 1569 3:
1569 1570 movq T_COPYOPS(%r9), %rax
1570 1571 cmpq $0, %rax
1571 1572 jz 2f
1572 1573 /*
1573 1574 * reload args for the copyop
1574 1575 */
1575 1576 movq (%rsp), %rdi
1576 1577 movq 0x8(%rsp), %rsi
1577 1578 movq 0x10(%rsp), %rdx
1578 1579 leave
1579 1580 jmp *CP_COPYIN(%rax)
1580 1581
1581 1582 2: movl $-1, %eax
1582 1583 leave
1583 1584 ret
1584 1585 SET_SIZE(copyin)
1585 1586
1586 1587 #elif defined(__i386)
1587 1588
1588 1589 #define ARG_UADDR 4
1589 1590 #define ARG_KADDR 8
1590 1591
1591 1592 ENTRY(copyin)
1592 1593 movl kernelbase, %ecx
1593 1594 #ifdef DEBUG
1594 1595 cmpl %ecx, ARG_KADDR(%esp)
1595 1596 jnb 1f
1596 1597 pushl %ebp
1597 1598 movl %esp, %ebp
1598 1599 pushl $.copyin_panic_msg
1599 1600 call panic
1600 1601 1:
1601 1602 #endif
1602 1603 lea _copyin_err, %eax
1603 1604
1604 1605 movl %gs:CPU_THREAD, %edx
1605 1606 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1606 1607 jb do_copy_fault
1607 1608 jmp 3f
1608 1609
1609 1610 _copyin_err:
1610 1611 popl %ecx
1611 1612 popl %edi
1612 1613 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1613 1614 popl %esi
1614 1615 popl %ebp
1615 1616 3:
1616 1617 movl T_COPYOPS(%edx), %eax
1617 1618 cmpl $0, %eax
1618 1619 jz 2f
1619 1620 jmp *CP_COPYIN(%eax)
1620 1621
1621 1622 2: movl $-1, %eax
1622 1623 ret
1623 1624 SET_SIZE(copyin)
1624 1625
1625 1626 #undef ARG_UADDR
1626 1627 #undef ARG_KADDR
1627 1628
1628 1629 #endif /* __i386 */
1629 1630 #endif /* __lint */
1630 1631
1631 1632 #if defined(__lint)
1632 1633
1633 1634 /* ARGSUSED */
1634 1635 int
1635 1636 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached)
1636 1637 { return (0); }
1637 1638
1638 1639 #else /* __lint */
1639 1640
1640 1641 #if defined(__amd64)
1641 1642
1642 1643 ENTRY(xcopyin_nta)
1643 1644 pushq %rbp
1644 1645 movq %rsp, %rbp
1645 1646 subq $24, %rsp
1646 1647
1647 1648 /*
1648 1649 * save args in case we trap and need to rerun as a copyop
1649 1650 * %rcx is consumed in this routine so we don't need to save
1650 1651 * it.
1651 1652 */
1652 1653 movq %rdi, (%rsp)
1653 1654 movq %rsi, 0x8(%rsp)
1654 1655 movq %rdx, 0x10(%rsp)
1655 1656
1656 1657 movq kernelbase(%rip), %rax
1657 1658 #ifdef DEBUG
1658 1659 cmpq %rax, %rsi /* %rsi = kaddr */
1659 1660 jnb 1f
1660 1661 leaq .xcopyin_panic_msg(%rip), %rdi
1661 1662 xorl %eax, %eax
1662 1663 call panic
1663 1664 1:
1664 1665 #endif
1665 1666 movq %gs:CPU_THREAD, %r9
1666 1667 cmpq %rax, %rdi /* test uaddr < kernelbase */
1667 1668 jae 4f
1668 1669 cmpq $0, %rcx /* No non-temporal access? */
1669 1670 /*
1670 1671 * pass lofault value as 4th argument to do_copy_fault
1671 1672 */
1672 1673 leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */
1673 1674 jnz 6f /* use regular access */
1674 1675 /*
1675 1676 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1676 1677 */
1677 1678 cmpq $XCOPY_MIN_SIZE, %rdx
1678 1679 jae 5f
1679 1680 6:
1680 1681 SMAP_DISABLE_INSTR(1)
1681 1682 jmp do_copy_fault
1682 1683
1683 1684 /*
1684 1685 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1685 1686 * count is COUNT_ALIGN_SIZE aligned.
1686 1687 */
1687 1688 5:
1688 1689 movq %rdi, %r10
1689 1690 orq %rsi, %r10
1690 1691 andq $NTA_ALIGN_MASK, %r10
1691 1692 orq %rdx, %r10
1692 1693 andq $COUNT_ALIGN_MASK, %r10
1693 1694 jnz 6b
1694 1695 leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */
1695 1696 SMAP_DISABLE_INSTR(2)
1696 1697 jmp do_copy_fault_nta /* use non-temporal access */
1697 1698
1698 1699 4:
1699 1700 movl $EFAULT, %eax
1700 1701 jmp 3f
1701 1702
1702 1703 /*
1703 1704 * A fault during do_copy_fault or do_copy_fault_nta is
1704 1705 * indicated through an errno value in %rax and we iret from the
1705 1706 * trap handler to here.
1706 1707 */
1707 1708 _xcopyin_err:
1708 1709 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1709 1710 _xcopyin_nta_err:
1710 1711 SMAP_ENABLE_INSTR(3)
1711 1712 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1712 1713 3:
1713 1714 movq T_COPYOPS(%r9), %r8
1714 1715 cmpq $0, %r8
1715 1716 jz 2f
1716 1717
1717 1718 /*
1718 1719 * reload args for the copyop
1719 1720 */
1720 1721 movq (%rsp), %rdi
1721 1722 movq 0x8(%rsp), %rsi
1722 1723 movq 0x10(%rsp), %rdx
1723 1724 leave
1724 1725 jmp *CP_XCOPYIN(%r8)
1725 1726
1726 1727 2: leave
1727 1728 ret
1728 1729 SET_SIZE(xcopyin_nta)
1729 1730
1730 1731 #elif defined(__i386)
1731 1732
1732 1733 #define ARG_UADDR 4
1733 1734 #define ARG_KADDR 8
1734 1735 #define ARG_COUNT 12
1735 1736 #define ARG_CACHED 16
1736 1737
1737 1738 .globl use_sse_copy
1738 1739
1739 1740 ENTRY(xcopyin_nta)
1740 1741 movl kernelbase, %ecx
1741 1742 lea _xcopyin_err, %eax
1742 1743 movl %gs:CPU_THREAD, %edx
1743 1744 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1744 1745 jae 4f
1745 1746
1746 1747 cmpl $0, use_sse_copy /* no sse support */
1747 1748 jz do_copy_fault
1748 1749
1749 1750 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */
1750 1751 jnz do_copy_fault
1751 1752
1752 1753 /*
1753 1754 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1754 1755 */
1755 1756 cmpl $XCOPY_MIN_SIZE, ARG_COUNT(%esp)
1756 1757 jb do_copy_fault
1757 1758
1758 1759 /*
1759 1760 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1760 1761 * count is COUNT_ALIGN_SIZE aligned.
1761 1762 */
1762 1763 movl ARG_UADDR(%esp), %ecx
1763 1764 orl ARG_KADDR(%esp), %ecx
1764 1765 andl $NTA_ALIGN_MASK, %ecx
1765 1766 orl ARG_COUNT(%esp), %ecx
1766 1767 andl $COUNT_ALIGN_MASK, %ecx
1767 1768 jnz do_copy_fault
1768 1769
1769 1770 jmp do_copy_fault_nta /* use regular access */
1770 1771
1771 1772 4:
1772 1773 movl $EFAULT, %eax
1773 1774 jmp 3f
1774 1775
1775 1776 /*
1776 1777 * A fault during do_copy_fault or do_copy_fault_nta is
1777 1778 * indicated through an errno value in %eax and we iret from the
1778 1779 * trap handler to here.
1779 1780 */
1780 1781 _xcopyin_err:
1781 1782 popl %ecx
1782 1783 popl %edi
1783 1784 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1784 1785 popl %esi
1785 1786 popl %ebp
1786 1787 3:
1787 1788 cmpl $0, T_COPYOPS(%edx)
1788 1789 jz 2f
1789 1790 movl T_COPYOPS(%edx), %eax
1790 1791 jmp *CP_XCOPYIN(%eax)
1791 1792
1792 1793 2: rep; ret /* use 2 byte return instruction when branch target */
1793 1794 /* AMD Software Optimization Guide - Section 6.2 */
1794 1795 SET_SIZE(xcopyin_nta)
1795 1796
1796 1797 #undef ARG_UADDR
1797 1798 #undef ARG_KADDR
1798 1799 #undef ARG_COUNT
1799 1800 #undef ARG_CACHED
1800 1801
1801 1802 #endif /* __i386 */
1802 1803 #endif /* __lint */
1803 1804
1804 1805 /*
1805 1806 * Copy kernel data to user space.
1806 1807 */
1807 1808
1808 1809 #if defined(__lint)
1809 1810
1810 1811 /* ARGSUSED */
1811 1812 int
1812 1813 copyout(const void *kaddr, void *uaddr, size_t count)
1813 1814 { return (0); }
1814 1815
1815 1816 #else /* __lint */
1816 1817
1817 1818 #if defined(__amd64)
1818 1819
1819 1820 ENTRY(copyout)
1820 1821 pushq %rbp
1821 1822 movq %rsp, %rbp
1822 1823 subq $24, %rsp
1823 1824
1824 1825 /*
1825 1826 * save args in case we trap and need to rerun as a copyop
1826 1827 */
1827 1828 movq %rdi, (%rsp)
1828 1829 movq %rsi, 0x8(%rsp)
1829 1830 movq %rdx, 0x10(%rsp)
1830 1831
1831 1832 movq kernelbase(%rip), %rax
1832 1833 #ifdef DEBUG
1833 1834 cmpq %rax, %rdi /* %rdi = kaddr */
1834 1835 jnb 1f
1835 1836 leaq .copyout_panic_msg(%rip), %rdi
1836 1837 xorl %eax, %eax
1837 1838 call panic
1838 1839 1:
1839 1840 #endif
1840 1841 /*
1841 1842 * pass lofault value as 4th argument to do_copy_fault
1842 1843 */
1843 1844 leaq _copyout_err(%rip), %rcx
1844 1845
1845 1846 movq %gs:CPU_THREAD, %r9
1846 1847 cmpq %rax, %rsi /* test uaddr < kernelbase */
1847 1848 jae 3f /* take copyop if uaddr > kernelbase */
1848 1849 SMAP_DISABLE_INSTR(3)
1849 1850 jmp do_copy_fault /* Calls leave for us */
1850 1851
1851 1852 _copyout_err:
1852 1853 SMAP_ENABLE_INSTR(4)
1853 1854 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1854 1855 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1855 1856 3:
1856 1857 movq T_COPYOPS(%r9), %rax
1857 1858 cmpq $0, %rax
1858 1859 jz 2f
1859 1860
1860 1861 /*
1861 1862 * reload args for the copyop
1862 1863 */
1863 1864 movq (%rsp), %rdi
1864 1865 movq 0x8(%rsp), %rsi
1865 1866 movq 0x10(%rsp), %rdx
1866 1867 leave
1867 1868 jmp *CP_COPYOUT(%rax)
1868 1869
1869 1870 2: movl $-1, %eax
1870 1871 leave
1871 1872 ret
1872 1873 SET_SIZE(copyout)
1873 1874
1874 1875 #elif defined(__i386)
1875 1876
1876 1877 #define ARG_KADDR 4
1877 1878 #define ARG_UADDR 8
1878 1879
1879 1880 ENTRY(copyout)
1880 1881 movl kernelbase, %ecx
1881 1882 #ifdef DEBUG
1882 1883 cmpl %ecx, ARG_KADDR(%esp)
1883 1884 jnb 1f
1884 1885 pushl %ebp
1885 1886 movl %esp, %ebp
1886 1887 pushl $.copyout_panic_msg
1887 1888 call panic
1888 1889 1:
1889 1890 #endif
1890 1891 lea _copyout_err, %eax
1891 1892 movl %gs:CPU_THREAD, %edx
1892 1893 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1893 1894 jb do_copy_fault
1894 1895 jmp 3f
1895 1896
1896 1897 _copyout_err:
1897 1898 popl %ecx
1898 1899 popl %edi
1899 1900 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */
1900 1901 popl %esi
1901 1902 popl %ebp
1902 1903 3:
1903 1904 movl T_COPYOPS(%edx), %eax
1904 1905 cmpl $0, %eax
1905 1906 jz 2f
1906 1907 jmp *CP_COPYOUT(%eax)
1907 1908
1908 1909 2: movl $-1, %eax
1909 1910 ret
1910 1911 SET_SIZE(copyout)
1911 1912
1912 1913 #undef ARG_UADDR
1913 1914 #undef ARG_KADDR
1914 1915
1915 1916 #endif /* __i386 */
1916 1917 #endif /* __lint */
1917 1918
1918 1919 #if defined(__lint)
1919 1920
1920 1921 /* ARGSUSED */
1921 1922 int
1922 1923 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached)
1923 1924 { return (0); }
1924 1925
1925 1926 #else /* __lint */
1926 1927
1927 1928 #if defined(__amd64)
1928 1929
1929 1930 ENTRY(xcopyout_nta)
1930 1931 pushq %rbp
1931 1932 movq %rsp, %rbp
1932 1933 subq $24, %rsp
1933 1934
1934 1935 /*
1935 1936 * save args in case we trap and need to rerun as a copyop
1936 1937 */
1937 1938 movq %rdi, (%rsp)
1938 1939 movq %rsi, 0x8(%rsp)
1939 1940 movq %rdx, 0x10(%rsp)
1940 1941
1941 1942 movq kernelbase(%rip), %rax
1942 1943 #ifdef DEBUG
1943 1944 cmpq %rax, %rdi /* %rdi = kaddr */
1944 1945 jnb 1f
1945 1946 leaq .xcopyout_panic_msg(%rip), %rdi
1946 1947 xorl %eax, %eax
1947 1948 call panic
1948 1949 1:
1949 1950 #endif
1950 1951 movq %gs:CPU_THREAD, %r9
1951 1952 cmpq %rax, %rsi /* test uaddr < kernelbase */
1952 1953 jae 4f
1953 1954
1954 1955 cmpq $0, %rcx /* No non-temporal access? */
1955 1956 /*
1956 1957 * pass lofault value as 4th argument to do_copy_fault
1957 1958 */
1958 1959 leaq _xcopyout_err(%rip), %rcx
1959 1960 jnz 6f
1960 1961 /*
1961 1962 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1962 1963 */
1963 1964 cmpq $XCOPY_MIN_SIZE, %rdx
1964 1965 jae 5f
1965 1966 6:
1966 1967 SMAP_DISABLE_INSTR(4)
1967 1968 jmp do_copy_fault
1968 1969
1969 1970 /*
1970 1971 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1971 1972 * count is COUNT_ALIGN_SIZE aligned.
1972 1973 */
1973 1974 5:
1974 1975 movq %rdi, %r10
1975 1976 orq %rsi, %r10
1976 1977 andq $NTA_ALIGN_MASK, %r10
1977 1978 orq %rdx, %r10
1978 1979 andq $COUNT_ALIGN_MASK, %r10
1979 1980 jnz 6b
1980 1981 leaq _xcopyout_nta_err(%rip), %rcx
1981 1982 SMAP_DISABLE_INSTR(5)
1982 1983 call do_copy_fault_nta
1983 1984 SMAP_ENABLE_INSTR(5)
1984 1985 ret
1985 1986
1986 1987 4:
1987 1988 movl $EFAULT, %eax
1988 1989 jmp 3f
1989 1990
1990 1991 /*
1991 1992 * A fault during do_copy_fault or do_copy_fault_nta is
1992 1993 * indicated through an errno value in %rax and we iret from the
1993 1994 * trap handler to here.
1994 1995 */
1995 1996 _xcopyout_err:
1996 1997 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1997 1998 _xcopyout_nta_err:
1998 1999 SMAP_ENABLE_INSTR(6)
1999 2000 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2000 2001 3:
2001 2002 movq T_COPYOPS(%r9), %r8
2002 2003 cmpq $0, %r8
2003 2004 jz 2f
2004 2005
2005 2006 /*
2006 2007 * reload args for the copyop
2007 2008 */
2008 2009 movq (%rsp), %rdi
2009 2010 movq 0x8(%rsp), %rsi
2010 2011 movq 0x10(%rsp), %rdx
2011 2012 leave
2012 2013 jmp *CP_XCOPYOUT(%r8)
2013 2014
2014 2015 2: leave
2015 2016 ret
2016 2017 SET_SIZE(xcopyout_nta)
2017 2018
2018 2019 #elif defined(__i386)
2019 2020
2020 2021 #define ARG_KADDR 4
2021 2022 #define ARG_UADDR 8
2022 2023 #define ARG_COUNT 12
2023 2024 #define ARG_CACHED 16
2024 2025
2025 2026 ENTRY(xcopyout_nta)
2026 2027 movl kernelbase, %ecx
2027 2028 lea _xcopyout_err, %eax
2028 2029 movl %gs:CPU_THREAD, %edx
2029 2030 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2030 2031 jae 4f
2031 2032
2032 2033 cmpl $0, use_sse_copy /* no sse support */
2033 2034 jz do_copy_fault
2034 2035
2035 2036 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */
2036 2037 jnz do_copy_fault
2037 2038
2038 2039 /*
2039 2040 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
2040 2041 */
2041 2042 cmpl $XCOPY_MIN_SIZE, %edx
2042 2043 jb do_copy_fault
2043 2044
2044 2045 /*
2045 2046 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2046 2047 * count is COUNT_ALIGN_SIZE aligned.
2047 2048 */
2048 2049 movl ARG_UADDR(%esp), %ecx
2049 2050 orl ARG_KADDR(%esp), %ecx
2050 2051 andl $NTA_ALIGN_MASK, %ecx
2051 2052 orl ARG_COUNT(%esp), %ecx
2052 2053 andl $COUNT_ALIGN_MASK, %ecx
2053 2054 jnz do_copy_fault
2054 2055 jmp do_copy_fault_nta
2055 2056
2056 2057 4:
2057 2058 movl $EFAULT, %eax
2058 2059 jmp 3f
2059 2060
2060 2061 /*
2061 2062 * A fault during do_copy_fault or do_copy_fault_nta is
2062 2063 * indicated through an errno value in %eax and we iret from the
2063 2064 * trap handler to here.
2064 2065 */
2065 2066 _xcopyout_err:
2066 2067 / restore the original lofault
2067 2068 popl %ecx
2068 2069 popl %edi
2069 2070 movl %ecx, T_LOFAULT(%edx) / original lofault
2070 2071 popl %esi
2071 2072 popl %ebp
2072 2073 3:
2073 2074 cmpl $0, T_COPYOPS(%edx)
2074 2075 jz 2f
2075 2076 movl T_COPYOPS(%edx), %eax
2076 2077 jmp *CP_XCOPYOUT(%eax)
2077 2078
2078 2079 2: rep; ret /* use 2 byte return instruction when branch target */
2079 2080 /* AMD Software Optimization Guide - Section 6.2 */
2080 2081 SET_SIZE(xcopyout_nta)
2081 2082
2082 2083 #undef ARG_UADDR
2083 2084 #undef ARG_KADDR
2084 2085 #undef ARG_COUNT
2085 2086 #undef ARG_CACHED
2086 2087
2087 2088 #endif /* __i386 */
2088 2089 #endif /* __lint */
2089 2090
2090 2091 /*
2091 2092 * Copy a null terminated string from one point to another in
2092 2093 * the kernel address space.
2093 2094 */
2094 2095
2095 2096 #if defined(__lint)
2096 2097
2097 2098 /* ARGSUSED */
2098 2099 int
2099 2100 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied)
2100 2101 { return (0); }
2101 2102
2102 2103 #else /* __lint */
2103 2104
2104 2105 #if defined(__amd64)
2105 2106
2106 2107 ENTRY(copystr)
2107 2108 pushq %rbp
2108 2109 movq %rsp, %rbp
2109 2110 #ifdef DEBUG
2110 2111 movq kernelbase(%rip), %rax
2111 2112 cmpq %rax, %rdi /* %rdi = from */
2112 2113 jb 0f
2113 2114 cmpq %rax, %rsi /* %rsi = to */
2114 2115 jnb 1f
2115 2116 0: leaq .copystr_panic_msg(%rip), %rdi
2116 2117 xorl %eax, %eax
2117 2118 call panic
2118 2119 1:
2119 2120 #endif
2120 2121 movq %gs:CPU_THREAD, %r9
2121 2122 movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */
2122 2123 /* 5th argument to do_copystr */
2123 2124 xorl %r10d,%r10d /* pass smap restore need in %r10d */
2124 2125 /* as a non-ABI 6th arg */
2125 2126 do_copystr:
2126 2127 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
2127 2128 movq T_LOFAULT(%r9), %r11 /* save the current lofault */
2128 2129 movq %r8, T_LOFAULT(%r9) /* new lofault */
2129 2130
2130 2131 movq %rdx, %r8 /* save maxlength */
2131 2132
2132 2133 cmpq $0, %rdx /* %rdx = maxlength */
2133 2134 je copystr_enametoolong /* maxlength == 0 */
2134 2135
2135 2136 copystr_loop:
2136 2137 decq %r8
2137 2138 movb (%rdi), %al
2138 2139 incq %rdi
2139 2140 movb %al, (%rsi)
2140 2141 incq %rsi
2141 2142 cmpb $0, %al
2142 2143 je copystr_null /* null char */
2143 2144 cmpq $0, %r8
2144 2145 jne copystr_loop
2145 2146
2146 2147 copystr_enametoolong:
2147 2148 movl $ENAMETOOLONG, %eax
2148 2149 jmp copystr_out
2149 2150
2150 2151 copystr_null:
2151 2152 xorl %eax, %eax /* no error */
2152 2153
2153 2154 copystr_out:
2154 2155 cmpq $0, %rcx /* want length? */
2155 2156 je copystr_smap /* no */
2156 2157 subq %r8, %rdx /* compute length and store it */
2157 2158 movq %rdx, (%rcx)
2158 2159
2159 2160 copystr_smap:
2160 2161 cmpl $0, %r10d
2161 2162 jz copystr_done
2162 2163 SMAP_ENABLE_INSTR(7)
2163 2164
2164 2165 copystr_done:
2165 2166 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2166 2167 leave
2167 2168 ret
2168 2169 SET_SIZE(copystr)
2169 2170
2170 2171 #elif defined(__i386)
2171 2172
2172 2173 #define ARG_FROM 8
2173 2174 #define ARG_TO 12
2174 2175 #define ARG_MAXLEN 16
2175 2176 #define ARG_LENCOPIED 20
2176 2177
2177 2178 ENTRY(copystr)
2178 2179 #ifdef DEBUG
2179 2180 pushl %ebp
2180 2181 movl %esp, %ebp
2181 2182 movl kernelbase, %eax
2182 2183 cmpl %eax, ARG_FROM(%esp)
2183 2184 jb 0f
2184 2185 cmpl %eax, ARG_TO(%esp)
2185 2186 jnb 1f
2186 2187 0: pushl $.copystr_panic_msg
2187 2188 call panic
2188 2189 1: popl %ebp
2189 2190 #endif
2190 2191 /* get the current lofault address */
2191 2192 movl %gs:CPU_THREAD, %eax
2192 2193 movl T_LOFAULT(%eax), %eax
2193 2194 do_copystr:
2194 2195 pushl %ebp /* setup stack frame */
2195 2196 movl %esp, %ebp
2196 2197 pushl %ebx /* save registers */
2197 2198 pushl %edi
2198 2199
2199 2200 movl %gs:CPU_THREAD, %ebx
2200 2201 movl T_LOFAULT(%ebx), %edi
2201 2202 pushl %edi /* save the current lofault */
2202 2203 movl %eax, T_LOFAULT(%ebx) /* new lofault */
2203 2204
2204 2205 movl ARG_MAXLEN(%ebp), %ecx
2205 2206 cmpl $0, %ecx
2206 2207 je copystr_enametoolong /* maxlength == 0 */
2207 2208
2208 2209 movl ARG_FROM(%ebp), %ebx /* source address */
2209 2210 movl ARG_TO(%ebp), %edx /* destination address */
2210 2211
2211 2212 copystr_loop:
2212 2213 decl %ecx
2213 2214 movb (%ebx), %al
2214 2215 incl %ebx
2215 2216 movb %al, (%edx)
2216 2217 incl %edx
2217 2218 cmpb $0, %al
2218 2219 je copystr_null /* null char */
2219 2220 cmpl $0, %ecx
2220 2221 jne copystr_loop
2221 2222
2222 2223 copystr_enametoolong:
2223 2224 movl $ENAMETOOLONG, %eax
2224 2225 jmp copystr_out
2225 2226
2226 2227 copystr_null:
2227 2228 xorl %eax, %eax /* no error */
2228 2229
2229 2230 copystr_out:
2230 2231 cmpl $0, ARG_LENCOPIED(%ebp) /* want length? */
2231 2232 je copystr_done /* no */
2232 2233 movl ARG_MAXLEN(%ebp), %edx
2233 2234 subl %ecx, %edx /* compute length and store it */
2234 2235 movl ARG_LENCOPIED(%ebp), %ecx
2235 2236 movl %edx, (%ecx)
2236 2237
2237 2238 copystr_done:
2238 2239 popl %edi
2239 2240 movl %gs:CPU_THREAD, %ebx
2240 2241 movl %edi, T_LOFAULT(%ebx) /* restore the original lofault */
2241 2242
2242 2243 popl %edi
2243 2244 popl %ebx
2244 2245 popl %ebp
2245 2246 ret
2246 2247 SET_SIZE(copystr)
2247 2248
2248 2249 #undef ARG_FROM
2249 2250 #undef ARG_TO
2250 2251 #undef ARG_MAXLEN
2251 2252 #undef ARG_LENCOPIED
2252 2253
2253 2254 #endif /* __i386 */
2254 2255 #endif /* __lint */
2255 2256
2256 2257 /*
2257 2258 * Copy a null terminated string from the user address space into
2258 2259 * the kernel address space.
2259 2260 */
2260 2261
2261 2262 #if defined(__lint)
2262 2263
2263 2264 /* ARGSUSED */
2264 2265 int
2265 2266 copyinstr(const char *uaddr, char *kaddr, size_t maxlength,
2266 2267 size_t *lencopied)
2267 2268 { return (0); }
2268 2269
2269 2270 #else /* __lint */
2270 2271
2271 2272 #if defined(__amd64)
2272 2273
2273 2274 ENTRY(copyinstr)
2274 2275 pushq %rbp
2275 2276 movq %rsp, %rbp
2276 2277 subq $32, %rsp
2277 2278
2278 2279 /*
2279 2280 * save args in case we trap and need to rerun as a copyop
2280 2281 */
2281 2282 movq %rdi, (%rsp)
2282 2283 movq %rsi, 0x8(%rsp)
2283 2284 movq %rdx, 0x10(%rsp)
2284 2285 movq %rcx, 0x18(%rsp)
2285 2286
2286 2287 movq kernelbase(%rip), %rax
2287 2288 #ifdef DEBUG
2288 2289 cmpq %rax, %rsi /* %rsi = kaddr */
2289 2290 jnb 1f
2290 2291 leaq .copyinstr_panic_msg(%rip), %rdi
2291 2292 xorl %eax, %eax
2292 2293 call panic
2293 2294 1:
2294 2295 #endif
2295 2296 /*
2296 2297 * pass lofault value as 5th argument to do_copystr
2297 2298 * do_copystr expects whether or not we need smap in %r10d
2298 2299 */
2299 2300 leaq _copyinstr_error(%rip), %r8
2300 2301 movl $1, %r10d
2301 2302
2302 2303 cmpq %rax, %rdi /* test uaddr < kernelbase */
2303 2304 jae 4f
2304 2305 SMAP_DISABLE_INSTR(6)
2305 2306 jmp do_copystr
2306 2307 4:
2307 2308 movq %gs:CPU_THREAD, %r9
2308 2309 jmp 3f
2309 2310
2310 2311 _copyinstr_error:
2311 2312 SMAP_ENABLE_INSTR(8)
2312 2313 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2313 2314 3:
2314 2315 movq T_COPYOPS(%r9), %rax
2315 2316 cmpq $0, %rax
2316 2317 jz 2f
2317 2318
2318 2319 /*
2319 2320 * reload args for the copyop
2320 2321 */
2321 2322 movq (%rsp), %rdi
2322 2323 movq 0x8(%rsp), %rsi
2323 2324 movq 0x10(%rsp), %rdx
2324 2325 movq 0x18(%rsp), %rcx
2325 2326 leave
2326 2327 jmp *CP_COPYINSTR(%rax)
2327 2328
2328 2329 2: movl $EFAULT, %eax /* return EFAULT */
2329 2330 leave
2330 2331 ret
2331 2332 SET_SIZE(copyinstr)
2332 2333
2333 2334 #elif defined(__i386)
2334 2335
2335 2336 #define ARG_UADDR 4
2336 2337 #define ARG_KADDR 8
2337 2338
2338 2339 ENTRY(copyinstr)
2339 2340 movl kernelbase, %ecx
2340 2341 #ifdef DEBUG
2341 2342 cmpl %ecx, ARG_KADDR(%esp)
2342 2343 jnb 1f
2343 2344 pushl %ebp
2344 2345 movl %esp, %ebp
2345 2346 pushl $.copyinstr_panic_msg
2346 2347 call panic
2347 2348 1:
2348 2349 #endif
2349 2350 lea _copyinstr_error, %eax
2350 2351 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2351 2352 jb do_copystr
2352 2353 movl %gs:CPU_THREAD, %edx
2353 2354 jmp 3f
2354 2355
2355 2356 _copyinstr_error:
2356 2357 popl %edi
2357 2358 movl %gs:CPU_THREAD, %edx
2358 2359 movl %edi, T_LOFAULT(%edx) /* original lofault */
2359 2360
2360 2361 popl %edi
2361 2362 popl %ebx
2362 2363 popl %ebp
2363 2364 3:
2364 2365 movl T_COPYOPS(%edx), %eax
2365 2366 cmpl $0, %eax
2366 2367 jz 2f
2367 2368 jmp *CP_COPYINSTR(%eax)
2368 2369
2369 2370 2: movl $EFAULT, %eax /* return EFAULT */
2370 2371 ret
2371 2372 SET_SIZE(copyinstr)
2372 2373
2373 2374 #undef ARG_UADDR
2374 2375 #undef ARG_KADDR
2375 2376
2376 2377 #endif /* __i386 */
2377 2378 #endif /* __lint */
2378 2379
2379 2380 /*
2380 2381 * Copy a null terminated string from the kernel
2381 2382 * address space to the user address space.
2382 2383 */
2383 2384
2384 2385 #if defined(__lint)
2385 2386
2386 2387 /* ARGSUSED */
2387 2388 int
2388 2389 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength,
2389 2390 size_t *lencopied)
2390 2391 { return (0); }
2391 2392
2392 2393 #else /* __lint */
2393 2394
2394 2395 #if defined(__amd64)
2395 2396
2396 2397 ENTRY(copyoutstr)
2397 2398 pushq %rbp
2398 2399 movq %rsp, %rbp
2399 2400 subq $32, %rsp
2400 2401
2401 2402 /*
2402 2403 * save args in case we trap and need to rerun as a copyop
2403 2404 */
2404 2405 movq %rdi, (%rsp)
2405 2406 movq %rsi, 0x8(%rsp)
2406 2407 movq %rdx, 0x10(%rsp)
2407 2408 movq %rcx, 0x18(%rsp)
2408 2409
2409 2410 movq kernelbase(%rip), %rax
2410 2411 #ifdef DEBUG
2411 2412 cmpq %rax, %rdi /* %rdi = kaddr */
2412 2413 jnb 1f
2413 2414 leaq .copyoutstr_panic_msg(%rip), %rdi
2414 2415 jmp call_panic /* setup stack and call panic */
2415 2416 1:
2416 2417 #endif
2417 2418 /*
2418 2419 * pass lofault value as 5th argument to do_copystr
2419 2420 * pass one as 6th argument to do_copystr in %r10d
2420 2421 */
2421 2422 leaq _copyoutstr_error(%rip), %r8
2422 2423 movl $1, %r10d
2423 2424
2424 2425 cmpq %rax, %rsi /* test uaddr < kernelbase */
2425 2426 jae 4f
2426 2427 SMAP_DISABLE_INSTR(7)
2427 2428 jmp do_copystr
2428 2429 4:
2429 2430 movq %gs:CPU_THREAD, %r9
2430 2431 jmp 3f
2431 2432
2432 2433 _copyoutstr_error:
2433 2434 SMAP_ENABLE_INSTR(9)
2434 2435 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2435 2436 3:
2436 2437 movq T_COPYOPS(%r9), %rax
2437 2438 cmpq $0, %rax
2438 2439 jz 2f
2439 2440
2440 2441 /*
2441 2442 * reload args for the copyop
2442 2443 */
2443 2444 movq (%rsp), %rdi
2444 2445 movq 0x8(%rsp), %rsi
2445 2446 movq 0x10(%rsp), %rdx
2446 2447 movq 0x18(%rsp), %rcx
2447 2448 leave
2448 2449 jmp *CP_COPYOUTSTR(%rax)
2449 2450
2450 2451 2: movl $EFAULT, %eax /* return EFAULT */
2451 2452 leave
2452 2453 ret
2453 2454 SET_SIZE(copyoutstr)
2454 2455
2455 2456 #elif defined(__i386)
2456 2457
2457 2458 #define ARG_KADDR 4
2458 2459 #define ARG_UADDR 8
2459 2460
2460 2461 ENTRY(copyoutstr)
2461 2462 movl kernelbase, %ecx
2462 2463 #ifdef DEBUG
2463 2464 cmpl %ecx, ARG_KADDR(%esp)
2464 2465 jnb 1f
2465 2466 pushl %ebp
2466 2467 movl %esp, %ebp
2467 2468 pushl $.copyoutstr_panic_msg
2468 2469 call panic
2469 2470 1:
2470 2471 #endif
2471 2472 lea _copyoutstr_error, %eax
2472 2473 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2473 2474 jb do_copystr
2474 2475 movl %gs:CPU_THREAD, %edx
2475 2476 jmp 3f
2476 2477
2477 2478 _copyoutstr_error:
2478 2479 popl %edi
2479 2480 movl %gs:CPU_THREAD, %edx
2480 2481 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */
2481 2482
2482 2483 popl %edi
2483 2484 popl %ebx
2484 2485 popl %ebp
2485 2486 3:
2486 2487 movl T_COPYOPS(%edx), %eax
2487 2488 cmpl $0, %eax
2488 2489 jz 2f
2489 2490 jmp *CP_COPYOUTSTR(%eax)
2490 2491
2491 2492 2: movl $EFAULT, %eax /* return EFAULT */
2492 2493 ret
2493 2494 SET_SIZE(copyoutstr)
2494 2495
2495 2496 #undef ARG_KADDR
2496 2497 #undef ARG_UADDR
2497 2498
2498 2499 #endif /* __i386 */
2499 2500 #endif /* __lint */
2500 2501
2501 2502 /*
2502 2503 * Since all of the fuword() variants are so similar, we have a macro to spit
2503 2504 * them out. This allows us to create DTrace-unobservable functions easily.
2504 2505 */
2505 2506
2506 2507 #if defined(__lint)
2507 2508
2508 2509 #if defined(__amd64)
2509 2510
2510 2511 /* ARGSUSED */
2511 2512 int
2512 2513 fuword64(const void *addr, uint64_t *dst)
2513 2514 { return (0); }
2514 2515
2515 2516 #endif
2516 2517
2517 2518 /* ARGSUSED */
2518 2519 int
2519 2520 fuword32(const void *addr, uint32_t *dst)
2520 2521 { return (0); }
2521 2522
2522 2523 /* ARGSUSED */
2523 2524 int
2524 2525 fuword16(const void *addr, uint16_t *dst)
2525 2526 { return (0); }
2526 2527
2527 2528 /* ARGSUSED */
2528 2529 int
2529 2530 fuword8(const void *addr, uint8_t *dst)
2530 2531 { return (0); }
2531 2532
2532 2533 #else /* __lint */
2533 2534
2534 2535 #if defined(__amd64)
2535 2536
2536 2537 /*
2537 2538 * Note that we don't save and reload the arguments here
2538 2539 * because their values are not altered in the copy path.
2539 2540 * Additionally, when successful, the smap_enable jmp will
2540 2541 * actually return us to our original caller.
2541 2542 */
2542 2543
2543 2544 #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2544 2545 ENTRY(NAME) \
2545 2546 movq %gs:CPU_THREAD, %r9; \
2546 2547 cmpq kernelbase(%rip), %rdi; \
2547 2548 jae 1f; \
2548 2549 leaq _flt_/**/NAME, %rdx; \
2549 2550 movq %rdx, T_LOFAULT(%r9); \
2550 2551 SMAP_DISABLE_INSTR(DISNUM) \
2551 2552 INSTR (%rdi), REG; \
2552 2553 movq $0, T_LOFAULT(%r9); \
2553 2554 INSTR REG, (%rsi); \
2554 2555 xorl %eax, %eax; \
2555 2556 SMAP_ENABLE_INSTR(EN1) \
2556 2557 ret; \
2557 2558 _flt_/**/NAME: \
2558 2559 SMAP_ENABLE_INSTR(EN2) \
2559 2560 movq $0, T_LOFAULT(%r9); \
2560 2561 1: \
2561 2562 movq T_COPYOPS(%r9), %rax; \
2562 2563 cmpq $0, %rax; \
2563 2564 jz 2f; \
2564 2565 jmp *COPYOP(%rax); \
2565 2566 2: \
2566 2567 movl $-1, %eax; \
2567 2568 ret; \
2568 2569 SET_SIZE(NAME)
2569 2570
2570 2571 FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2571 2572 FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2572 2573 FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2573 2574 FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2574 2575
2575 2576 #elif defined(__i386)
2576 2577
2577 2578 #define FUWORD(NAME, INSTR, REG, COPYOP) \
2578 2579 ENTRY(NAME) \
2579 2580 movl %gs:CPU_THREAD, %ecx; \
2580 2581 movl kernelbase, %eax; \
2581 2582 cmpl %eax, 4(%esp); \
2582 2583 jae 1f; \
2583 2584 lea _flt_/**/NAME, %edx; \
2584 2585 movl %edx, T_LOFAULT(%ecx); \
2585 2586 movl 4(%esp), %eax; \
2586 2587 movl 8(%esp), %edx; \
2587 2588 INSTR (%eax), REG; \
2588 2589 movl $0, T_LOFAULT(%ecx); \
2589 2590 INSTR REG, (%edx); \
2590 2591 xorl %eax, %eax; \
2591 2592 ret; \
2592 2593 _flt_/**/NAME: \
2593 2594 movl $0, T_LOFAULT(%ecx); \
2594 2595 1: \
2595 2596 movl T_COPYOPS(%ecx), %eax; \
2596 2597 cmpl $0, %eax; \
2597 2598 jz 2f; \
2598 2599 jmp *COPYOP(%eax); \
2599 2600 2: \
2600 2601 movl $-1, %eax; \
2601 2602 ret; \
2602 2603 SET_SIZE(NAME)
2603 2604
2604 2605 FUWORD(fuword32, movl, %eax, CP_FUWORD32)
2605 2606 FUWORD(fuword16, movw, %ax, CP_FUWORD16)
2606 2607 FUWORD(fuword8, movb, %al, CP_FUWORD8)
2607 2608
2608 2609 #endif /* __i386 */
2609 2610
2610 2611 #undef FUWORD
2611 2612
2612 2613 #endif /* __lint */
2613 2614
2614 2615 /*
2615 2616 * Set user word.
2616 2617 */
2617 2618
2618 2619 #if defined(__lint)
2619 2620
2620 2621 #if defined(__amd64)
2621 2622
2622 2623 /* ARGSUSED */
2623 2624 int
2624 2625 suword64(void *addr, uint64_t value)
2625 2626 { return (0); }
2626 2627
2627 2628 #endif
2628 2629
2629 2630 /* ARGSUSED */
2630 2631 int
2631 2632 suword32(void *addr, uint32_t value)
2632 2633 { return (0); }
2633 2634
2634 2635 /* ARGSUSED */
2635 2636 int
2636 2637 suword16(void *addr, uint16_t value)
2637 2638 { return (0); }
2638 2639
2639 2640 /* ARGSUSED */
2640 2641 int
2641 2642 suword8(void *addr, uint8_t value)
2642 2643 { return (0); }
2643 2644
2644 2645 #else /* lint */
2645 2646
2646 2647 #if defined(__amd64)
2647 2648
2648 2649 /*
2649 2650 * Note that we don't save and reload the arguments here
2650 2651 * because their values are not altered in the copy path.
2651 2652 */
2652 2653
2653 2654 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2654 2655 ENTRY(NAME) \
2655 2656 movq %gs:CPU_THREAD, %r9; \
2656 2657 cmpq kernelbase(%rip), %rdi; \
2657 2658 jae 1f; \
2658 2659 leaq _flt_/**/NAME, %rdx; \
2659 2660 SMAP_DISABLE_INSTR(DISNUM) \
2660 2661 movq %rdx, T_LOFAULT(%r9); \
2661 2662 INSTR REG, (%rdi); \
2662 2663 movq $0, T_LOFAULT(%r9); \
2663 2664 xorl %eax, %eax; \
2664 2665 SMAP_ENABLE_INSTR(EN1) \
2665 2666 ret; \
2666 2667 _flt_/**/NAME: \
2667 2668 SMAP_ENABLE_INSTR(EN2) \
2668 2669 movq $0, T_LOFAULT(%r9); \
2669 2670 1: \
2670 2671 movq T_COPYOPS(%r9), %rax; \
2671 2672 cmpq $0, %rax; \
2672 2673 jz 3f; \
2673 2674 jmp *COPYOP(%rax); \
2674 2675 3: \
2675 2676 movl $-1, %eax; \
2676 2677 ret; \
2677 2678 SET_SIZE(NAME)
2678 2679
2679 2680 SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2680 2681 SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2681 2682 SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2682 2683 SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2683 2684
2684 2685 #elif defined(__i386)
2685 2686
2686 2687 #define SUWORD(NAME, INSTR, REG, COPYOP) \
2687 2688 ENTRY(NAME) \
2688 2689 movl %gs:CPU_THREAD, %ecx; \
2689 2690 movl kernelbase, %eax; \
2690 2691 cmpl %eax, 4(%esp); \
2691 2692 jae 1f; \
2692 2693 lea _flt_/**/NAME, %edx; \
2693 2694 movl %edx, T_LOFAULT(%ecx); \
2694 2695 movl 4(%esp), %eax; \
2695 2696 movl 8(%esp), %edx; \
2696 2697 INSTR REG, (%eax); \
2697 2698 movl $0, T_LOFAULT(%ecx); \
2698 2699 xorl %eax, %eax; \
2699 2700 ret; \
2700 2701 _flt_/**/NAME: \
2701 2702 movl $0, T_LOFAULT(%ecx); \
2702 2703 1: \
2703 2704 movl T_COPYOPS(%ecx), %eax; \
2704 2705 cmpl $0, %eax; \
2705 2706 jz 3f; \
2706 2707 movl COPYOP(%eax), %ecx; \
2707 2708 jmp *%ecx; \
2708 2709 3: \
2709 2710 movl $-1, %eax; \
2710 2711 ret; \
2711 2712 SET_SIZE(NAME)
2712 2713
2713 2714 SUWORD(suword32, movl, %edx, CP_SUWORD32)
2714 2715 SUWORD(suword16, movw, %dx, CP_SUWORD16)
2715 2716 SUWORD(suword8, movb, %dl, CP_SUWORD8)
2716 2717
2717 2718 #endif /* __i386 */
2718 2719
2719 2720 #undef SUWORD
2720 2721
2721 2722 #endif /* __lint */
2722 2723
2723 2724 #if defined(__lint)
2724 2725
2725 2726 #if defined(__amd64)
2726 2727
2727 2728 /*ARGSUSED*/
2728 2729 void
2729 2730 fuword64_noerr(const void *addr, uint64_t *dst)
2730 2731 {}
2731 2732
2732 2733 #endif
2733 2734
2734 2735 /*ARGSUSED*/
2735 2736 void
2736 2737 fuword32_noerr(const void *addr, uint32_t *dst)
2737 2738 {}
2738 2739
2739 2740 /*ARGSUSED*/
2740 2741 void
2741 2742 fuword8_noerr(const void *addr, uint8_t *dst)
2742 2743 {}
2743 2744
2744 2745 /*ARGSUSED*/
2745 2746 void
2746 2747 fuword16_noerr(const void *addr, uint16_t *dst)
2747 2748 {}
2748 2749
2749 2750 #else /* __lint */
2750 2751
2751 2752 #if defined(__amd64)
2752 2753
2753 2754 #define FUWORD_NOERR(NAME, INSTR, REG) \
2754 2755 ENTRY(NAME) \
2755 2756 cmpq kernelbase(%rip), %rdi; \
2756 2757 cmovnbq kernelbase(%rip), %rdi; \
2757 2758 INSTR (%rdi), REG; \
2758 2759 INSTR REG, (%rsi); \
2759 2760 ret; \
2760 2761 SET_SIZE(NAME)
2761 2762
2762 2763 FUWORD_NOERR(fuword64_noerr, movq, %rax)
2763 2764 FUWORD_NOERR(fuword32_noerr, movl, %eax)
2764 2765 FUWORD_NOERR(fuword16_noerr, movw, %ax)
2765 2766 FUWORD_NOERR(fuword8_noerr, movb, %al)
2766 2767
2767 2768 #elif defined(__i386)
2768 2769
2769 2770 #define FUWORD_NOERR(NAME, INSTR, REG) \
2770 2771 ENTRY(NAME) \
2771 2772 movl 4(%esp), %eax; \
2772 2773 cmpl kernelbase, %eax; \
2773 2774 jb 1f; \
2774 2775 movl kernelbase, %eax; \
2775 2776 1: movl 8(%esp), %edx; \
2776 2777 INSTR (%eax), REG; \
2777 2778 INSTR REG, (%edx); \
2778 2779 ret; \
2779 2780 SET_SIZE(NAME)
2780 2781
2781 2782 FUWORD_NOERR(fuword32_noerr, movl, %ecx)
2782 2783 FUWORD_NOERR(fuword16_noerr, movw, %cx)
2783 2784 FUWORD_NOERR(fuword8_noerr, movb, %cl)
2784 2785
2785 2786 #endif /* __i386 */
2786 2787
2787 2788 #undef FUWORD_NOERR
2788 2789
2789 2790 #endif /* __lint */
2790 2791
2791 2792 #if defined(__lint)
2792 2793
2793 2794 #if defined(__amd64)
2794 2795
2795 2796 /*ARGSUSED*/
2796 2797 void
2797 2798 suword64_noerr(void *addr, uint64_t value)
2798 2799 {}
2799 2800
2800 2801 #endif
2801 2802
2802 2803 /*ARGSUSED*/
2803 2804 void
2804 2805 suword32_noerr(void *addr, uint32_t value)
2805 2806 {}
2806 2807
2807 2808 /*ARGSUSED*/
2808 2809 void
2809 2810 suword16_noerr(void *addr, uint16_t value)
2810 2811 {}
2811 2812
2812 2813 /*ARGSUSED*/
2813 2814 void
2814 2815 suword8_noerr(void *addr, uint8_t value)
2815 2816 {}
2816 2817
2817 2818 #else /* lint */
2818 2819
2819 2820 #if defined(__amd64)
2820 2821
2821 2822 #define SUWORD_NOERR(NAME, INSTR, REG) \
2822 2823 ENTRY(NAME) \
2823 2824 cmpq kernelbase(%rip), %rdi; \
2824 2825 cmovnbq kernelbase(%rip), %rdi; \
2825 2826 INSTR REG, (%rdi); \
2826 2827 ret; \
2827 2828 SET_SIZE(NAME)
2828 2829
2829 2830 SUWORD_NOERR(suword64_noerr, movq, %rsi)
2830 2831 SUWORD_NOERR(suword32_noerr, movl, %esi)
2831 2832 SUWORD_NOERR(suword16_noerr, movw, %si)
2832 2833 SUWORD_NOERR(suword8_noerr, movb, %sil)
2833 2834
2834 2835 #elif defined(__i386)
2835 2836
2836 2837 #define SUWORD_NOERR(NAME, INSTR, REG) \
2837 2838 ENTRY(NAME) \
2838 2839 movl 4(%esp), %eax; \
2839 2840 cmpl kernelbase, %eax; \
2840 2841 jb 1f; \
2841 2842 movl kernelbase, %eax; \
2842 2843 1: \
2843 2844 movl 8(%esp), %edx; \
2844 2845 INSTR REG, (%eax); \
2845 2846 ret; \
2846 2847 SET_SIZE(NAME)
2847 2848
2848 2849 SUWORD_NOERR(suword32_noerr, movl, %edx)
2849 2850 SUWORD_NOERR(suword16_noerr, movw, %dx)
2850 2851 SUWORD_NOERR(suword8_noerr, movb, %dl)
2851 2852
2852 2853 #endif /* __i386 */
2853 2854
2854 2855 #undef SUWORD_NOERR
2855 2856
2856 2857 #endif /* lint */
2857 2858
2858 2859
2859 2860 #if defined(__lint)
2860 2861
2861 2862 /*ARGSUSED*/
2862 2863 int
2863 2864 subyte(void *addr, uchar_t value)
2864 2865 { return (0); }
2865 2866
2866 2867 /*ARGSUSED*/
2867 2868 void
2868 2869 subyte_noerr(void *addr, uchar_t value)
2869 2870 {}
2870 2871
2871 2872 /*ARGSUSED*/
2872 2873 int
2873 2874 fulword(const void *addr, ulong_t *valuep)
2874 2875 { return (0); }
2875 2876
2876 2877 /*ARGSUSED*/
2877 2878 void
2878 2879 fulword_noerr(const void *addr, ulong_t *valuep)
2879 2880 {}
2880 2881
2881 2882 /*ARGSUSED*/
2882 2883 int
2883 2884 sulword(void *addr, ulong_t valuep)
2884 2885 { return (0); }
2885 2886
2886 2887 /*ARGSUSED*/
2887 2888 void
2888 2889 sulword_noerr(void *addr, ulong_t valuep)
2889 2890 {}
2890 2891
2891 2892 #else
2892 2893
2893 2894 .weak subyte
2894 2895 subyte=suword8
2895 2896 .weak subyte_noerr
2896 2897 subyte_noerr=suword8_noerr
2897 2898
2898 2899 #if defined(__amd64)
2899 2900
2900 2901 .weak fulword
2901 2902 fulword=fuword64
2902 2903 .weak fulword_noerr
2903 2904 fulword_noerr=fuword64_noerr
2904 2905 .weak sulword
2905 2906 sulword=suword64
2906 2907 .weak sulword_noerr
2907 2908 sulword_noerr=suword64_noerr
2908 2909
2909 2910 #elif defined(__i386)
2910 2911
2911 2912 .weak fulword
2912 2913 fulword=fuword32
2913 2914 .weak fulword_noerr
2914 2915 fulword_noerr=fuword32_noerr
2915 2916 .weak sulword
2916 2917 sulword=suword32
2917 2918 .weak sulword_noerr
2918 2919 sulword_noerr=suword32_noerr
2919 2920
2920 2921 #endif /* __i386 */
2921 2922
2922 2923 #endif /* __lint */
2923 2924
2924 2925 #if defined(__lint)
2925 2926
2926 2927 /*
2927 2928 * Copy a block of storage - must not overlap (from + len <= to).
2928 2929 * No fault handler installed (to be called under on_fault())
2929 2930 */
2930 2931
2931 2932 /* ARGSUSED */
2932 2933 void
2933 2934 copyout_noerr(const void *kfrom, void *uto, size_t count)
2934 2935 {}
2935 2936
2936 2937 /* ARGSUSED */
2937 2938 void
2938 2939 copyin_noerr(const void *ufrom, void *kto, size_t count)
2939 2940 {}
2940 2941
2941 2942 /*
2942 2943 * Zero a block of storage in user space
2943 2944 */
2944 2945
2945 2946 /* ARGSUSED */
2946 2947 void
2947 2948 uzero(void *addr, size_t count)
2948 2949 {}
2949 2950
2950 2951 /*
2951 2952 * copy a block of storage in user space
2952 2953 */
2953 2954
2954 2955 /* ARGSUSED */
2955 2956 void
2956 2957 ucopy(const void *ufrom, void *uto, size_t ulength)
2957 2958 {}
2958 2959
2959 2960 /*
2960 2961 * copy a string in user space
2961 2962 */
2962 2963
2963 2964 /* ARGSUSED */
2964 2965 void
2965 2966 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied)
2966 2967 {}
2967 2968
2968 2969 #else /* __lint */
2969 2970
2970 2971 #if defined(__amd64)
2971 2972
2972 2973 ENTRY(copyin_noerr)
2973 2974 movq kernelbase(%rip), %rax
2974 2975 #ifdef DEBUG
2975 2976 cmpq %rax, %rsi /* %rsi = kto */
2976 2977 jae 1f
2977 2978 leaq .cpyin_ne_pmsg(%rip), %rdi
2978 2979 jmp call_panic /* setup stack and call panic */
2979 2980 1:
2980 2981 #endif
2981 2982 cmpq %rax, %rdi /* ufrom < kernelbase */
2982 2983 jb do_copy
2983 2984 movq %rax, %rdi /* force fault at kernelbase */
2984 2985 jmp do_copy
2985 2986 SET_SIZE(copyin_noerr)
2986 2987
2987 2988 ENTRY(copyout_noerr)
2988 2989 movq kernelbase(%rip), %rax
2989 2990 #ifdef DEBUG
2990 2991 cmpq %rax, %rdi /* %rdi = kfrom */
2991 2992 jae 1f
2992 2993 leaq .cpyout_ne_pmsg(%rip), %rdi
2993 2994 jmp call_panic /* setup stack and call panic */
2994 2995 1:
2995 2996 #endif
2996 2997 cmpq %rax, %rsi /* uto < kernelbase */
2997 2998 jb do_copy
2998 2999 movq %rax, %rsi /* force fault at kernelbase */
2999 3000 jmp do_copy
3000 3001 SET_SIZE(copyout_noerr)
3001 3002
3002 3003 ENTRY(uzero)
3003 3004 movq kernelbase(%rip), %rax
3004 3005 cmpq %rax, %rdi
3005 3006 jb do_zero
3006 3007 movq %rax, %rdi /* force fault at kernelbase */
3007 3008 jmp do_zero
3008 3009 SET_SIZE(uzero)
3009 3010
3010 3011 ENTRY(ucopy)
3011 3012 movq kernelbase(%rip), %rax
3012 3013 cmpq %rax, %rdi
3013 3014 cmovaeq %rax, %rdi /* force fault at kernelbase */
3014 3015 cmpq %rax, %rsi
3015 3016 cmovaeq %rax, %rsi /* force fault at kernelbase */
3016 3017 jmp do_copy
3017 3018 SET_SIZE(ucopy)
3018 3019
3019 3020 /*
3020 3021 * Note, the frame pointer is required here becuase do_copystr expects
3021 3022 * to be able to pop it off!
3022 3023 */
3023 3024 ENTRY(ucopystr)
3024 3025 pushq %rbp
3025 3026 movq %rsp, %rbp
3026 3027 movq kernelbase(%rip), %rax
3027 3028 cmpq %rax, %rdi
3028 3029 cmovaeq %rax, %rdi /* force fault at kernelbase */
3029 3030 cmpq %rax, %rsi
3030 3031 cmovaeq %rax, %rsi /* force fault at kernelbase */
3031 3032 /* do_copystr expects lofault address in %r8 */
3032 3033 /* do_copystr expects whether or not we need smap in %r10 */
3033 3034 xorl %r10d, %r10d
3034 3035 movq %gs:CPU_THREAD, %r8
3035 3036 movq T_LOFAULT(%r8), %r8
3036 3037 jmp do_copystr
3037 3038 SET_SIZE(ucopystr)
3038 3039
3039 3040 #elif defined(__i386)
3040 3041
3041 3042 ENTRY(copyin_noerr)
3042 3043 movl kernelbase, %eax
3043 3044 #ifdef DEBUG
3044 3045 cmpl %eax, 8(%esp)
3045 3046 jae 1f
3046 3047 pushl $.cpyin_ne_pmsg
3047 3048 call panic
3048 3049 1:
3049 3050 #endif
3050 3051 cmpl %eax, 4(%esp)
3051 3052 jb do_copy
3052 3053 movl %eax, 4(%esp) /* force fault at kernelbase */
3053 3054 jmp do_copy
3054 3055 SET_SIZE(copyin_noerr)
3055 3056
3056 3057 ENTRY(copyout_noerr)
3057 3058 movl kernelbase, %eax
3058 3059 #ifdef DEBUG
3059 3060 cmpl %eax, 4(%esp)
3060 3061 jae 1f
3061 3062 pushl $.cpyout_ne_pmsg
3062 3063 call panic
3063 3064 1:
3064 3065 #endif
3065 3066 cmpl %eax, 8(%esp)
3066 3067 jb do_copy
3067 3068 movl %eax, 8(%esp) /* force fault at kernelbase */
3068 3069 jmp do_copy
3069 3070 SET_SIZE(copyout_noerr)
3070 3071
3071 3072 ENTRY(uzero)
3072 3073 movl kernelbase, %eax
3073 3074 cmpl %eax, 4(%esp)
3074 3075 jb do_zero
3075 3076 movl %eax, 4(%esp) /* force fault at kernelbase */
3076 3077 jmp do_zero
3077 3078 SET_SIZE(uzero)
3078 3079
3079 3080 ENTRY(ucopy)
3080 3081 movl kernelbase, %eax
3081 3082 cmpl %eax, 4(%esp)
3082 3083 jb 1f
3083 3084 movl %eax, 4(%esp) /* force fault at kernelbase */
3084 3085 1:
3085 3086 cmpl %eax, 8(%esp)
3086 3087 jb do_copy
3087 3088 movl %eax, 8(%esp) /* force fault at kernelbase */
3088 3089 jmp do_copy
3089 3090 SET_SIZE(ucopy)
3090 3091
3091 3092 ENTRY(ucopystr)
3092 3093 movl kernelbase, %eax
3093 3094 cmpl %eax, 4(%esp)
3094 3095 jb 1f
3095 3096 movl %eax, 4(%esp) /* force fault at kernelbase */
3096 3097 1:
3097 3098 cmpl %eax, 8(%esp)
3098 3099 jb 2f
3099 3100 movl %eax, 8(%esp) /* force fault at kernelbase */
3100 3101 2:
3101 3102 /* do_copystr expects the lofault address in %eax */
3102 3103 movl %gs:CPU_THREAD, %eax
3103 3104 movl T_LOFAULT(%eax), %eax
3104 3105 jmp do_copystr
3105 3106 SET_SIZE(ucopystr)
3106 3107
3107 3108 #endif /* __i386 */
3108 3109
3109 3110 #ifdef DEBUG
3110 3111 .data
3111 3112 .kcopy_panic_msg:
3112 3113 .string "kcopy: arguments below kernelbase"
3113 3114 .bcopy_panic_msg:
3114 3115 .string "bcopy: arguments below kernelbase"
3115 3116 .kzero_panic_msg:
3116 3117 .string "kzero: arguments below kernelbase"
3117 3118 .bzero_panic_msg:
3118 3119 .string "bzero: arguments below kernelbase"
3119 3120 .copyin_panic_msg:
3120 3121 .string "copyin: kaddr argument below kernelbase"
3121 3122 .xcopyin_panic_msg:
3122 3123 .string "xcopyin: kaddr argument below kernelbase"
3123 3124 .copyout_panic_msg:
3124 3125 .string "copyout: kaddr argument below kernelbase"
3125 3126 .xcopyout_panic_msg:
3126 3127 .string "xcopyout: kaddr argument below kernelbase"
3127 3128 .copystr_panic_msg:
3128 3129 .string "copystr: arguments in user space"
3129 3130 .copyinstr_panic_msg:
3130 3131 .string "copyinstr: kaddr argument not in kernel address space"
3131 3132 .copyoutstr_panic_msg:
3132 3133 .string "copyoutstr: kaddr argument not in kernel address space"
3133 3134 .cpyin_ne_pmsg:
3134 3135 .string "copyin_noerr: argument not in kernel address space"
3135 3136 .cpyout_ne_pmsg:
3136 3137 .string "copyout_noerr: argument not in kernel address space"
3137 3138 #endif
3138 3139
3139 3140 #endif /* __lint */
3140 3141
3141 3142 /*
3142 3143 * These functions are used for SMAP, supervisor mode access protection. They
3143 3144 * are hotpatched to become real instructions when the system starts up which is
3144 3145 * done in mlsetup() as a part of enabling the other CR4 related features.
3145 3146 *
3146 3147 * Generally speaking, smap_disable() is a stac instruction and smap_enable is a
3147 3148 * clac instruction. It's safe to call these any number of times, and in fact,
3148 3149 * out of paranoia, the kernel will likely call it at several points.
3149 3150 */
3150 3151
3151 3152 #if defined(__lint)
3152 3153
3153 3154 void
3154 3155 smap_enable(void)
3155 3156 {}
3156 3157
3157 3158 void
3158 3159 smap_disable(void)
3159 3160 {}
3160 3161
3161 3162 #else
3162 3163
3163 3164 #if defined (__amd64) || defined(__i386)
3164 3165 ENTRY(smap_disable)
3165 3166 nop
3166 3167 nop
3167 3168 nop
3168 3169 ret
3169 3170 SET_SIZE(smap_disable)
3170 3171
3171 3172 ENTRY(smap_enable)
3172 3173 nop
3173 3174 nop
3174 3175 nop
3175 3176 ret
3176 3177 SET_SIZE(smap_enable)
3177 3178
3178 3179 #endif /* __amd64 || __i386 */
3179 3180
3180 3181 #endif /* __lint */
3181 3182
3182 3183 #ifndef __lint
3183 3184
3184 3185 .data
3185 3186 .align 4
3186 3187 .globl _smap_enable_patch_count
3187 3188 .type _smap_enable_patch_count,@object
3188 3189 .size _smap_enable_patch_count, 4
3189 3190 _smap_enable_patch_count:
3190 3191 .long SMAP_ENABLE_COUNT
3191 3192
3192 3193 .globl _smap_disable_patch_count
3193 3194 .type _smap_disable_patch_count,@object
3194 3195 .size _smap_disable_patch_count, 4
3195 3196 _smap_disable_patch_count:
3196 3197 .long SMAP_DISABLE_COUNT
3197 3198
3198 3199 #endif /* __lint */
↓ open down ↓ |
2230 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX