Print this page
de-linting of .s files
first
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/ia32/ml/sseblk.s
+++ new/usr/src/uts/intel/ia32/ml/sseblk.s
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 -#pragma ident "%Z%%M% %I% %E% SMI"
26 +/*
27 + * Copyright 2019 Joyent, Inc.
28 + */
27 29
28 30 #include <sys/asm_linkage.h>
29 31 #include <sys/regset.h>
30 32 #include <sys/privregs.h>
31 33
32 -#if defined(__lint)
33 -#include <sys/types.h>
34 -#include <sys/archsystm.h>
35 -#else
36 34 #include "assym.h"
37 -#endif
38 35
39 36 /*
40 37 * Do block operations using Streaming SIMD extensions
41 38 */
42 39
43 40 #if defined(DEBUG)
44 -#if defined(__amd64)
45 41 #define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \
46 42 movq %gs:CPU_THREAD, t; \
47 43 movsbl T_PREEMPT(t), r32; \
48 44 testl r32, r32; \
49 45 jne 5f; \
50 46 pushq %rbp; \
51 47 movq %rsp, %rbp; \
52 48 leaq msg(%rip), %rdi; \
53 49 xorl %eax, %eax; \
54 50 call panic; \
55 51 5:
56 -#elif defined(__i386)
57 -#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \
58 - movl %gs:CPU_THREAD, t; \
59 - movsbl T_PREEMPT(t), r32; \
60 - testl r32, r32; \
61 - jne 5f; \
62 - pushl %ebp; \
63 - movl %esp, %ebp; \
64 - pushl $msg; \
65 - call panic; \
66 -5:
67 -#endif /* __i386 */
68 52 #else /* DEBUG */
69 53 #define ASSERT_KPREEMPT_DISABLED(t, r32, msg)
70 54 #endif /* DEBUG */
71 55
72 56 #define BLOCKSHIFT 6
73 57 #define BLOCKSIZE 64 /* (1 << BLOCKSHIFT) */
74 58 #define BLOCKMASK 63 /* (BLOCKSIZE - 1) */
75 59
76 60 #if (1 << BLOCKSHIFT) != BLOCKSIZE || BLOCKMASK != (BLOCKSIZE - 1)
77 61 #error "mucked up constants"
78 62 #endif
79 63
80 -#if defined(__lint)
81 -
82 -/*ARGSUSED*/
83 -void
84 -hwblkclr(void *addr, size_t size)
85 -{}
86 -
87 -#else /* __lint */
88 -
89 -#if defined(__amd64)
90 -#define ADD addq
91 -#define SUB subq
92 -#else
93 -#define ADD addl
94 -#define SUB subl
95 -#endif
96 -
97 64 #define SAVE_XMM0(r) \
98 65 SAVE_XMM_PROLOG(r, 1); \
99 66 movdqa %xmm0, (r)
100 67
101 68 #define ZERO_LOOP_INIT_XMM(dst) \
102 69 pxor %xmm0, %xmm0
103 70
104 71 #define ZERO_LOOP_BODY_XMM(dst, cnt) \
105 72 movntdq %xmm0, (dst); \
106 73 movntdq %xmm0, 0x10(dst); \
107 74 movntdq %xmm0, 0x20(dst); \
108 75 movntdq %xmm0, 0x30(dst); \
109 - ADD $BLOCKSIZE, dst; \
110 - SUB $1, cnt
76 + addq $BLOCKSIZE, dst; \
77 + subq $1, cnt
111 78
112 79 #define ZERO_LOOP_FINI_XMM(dst) \
113 80 mfence
114 81
115 82 #define RSTOR_XMM0(r) \
116 83 movdqa 0x0(r), %xmm0; \
117 84 RSTOR_XMM_EPILOG(r, 1)
118 85
119 -#if defined(__amd64)
120 -
121 86 /*
122 87 * %rdi dst
123 88 * %rsi size
124 89 * %rax saved %cr0 (#if DEBUG then %eax is t->t_preempt)
125 90 * %r8 pointer to %xmm register save area
126 91 */
127 92 ENTRY(hwblkclr)
128 93 pushq %rbp
129 94 movq %rsp, %rbp
130 95 testl $BLOCKMASK, %edi /* address must be BLOCKSIZE aligned */
131 96 jne .dobzero
132 97 cmpq $BLOCKSIZE, %rsi /* size must be at least BLOCKSIZE */
133 98 jl .dobzero
134 99 testq $BLOCKMASK, %rsi /* .. and be a multiple of BLOCKSIZE */
135 100 jne .dobzero
136 101 shrq $BLOCKSHIFT, %rsi
137 102
138 103 ASSERT_KPREEMPT_DISABLED(%r11, %eax, .not_disabled)
139 104 movq %cr0, %rax
140 105 clts
141 106 testl $CR0_TS, %eax
142 107 jnz 1f
143 108
144 109 SAVE_XMM0(%r8)
145 110 1: ZERO_LOOP_INIT_XMM(%rdi)
146 111 9: ZERO_LOOP_BODY_XMM(%rdi, %rsi)
147 112 jnz 9b
148 113 ZERO_LOOP_FINI_XMM(%rdi)
149 114
150 115 testl $CR0_TS, %eax
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
151 116 jnz 2f
152 117 RSTOR_XMM0(%r8)
153 118 2: movq %rax, %cr0
154 119 leave
155 120 ret
156 121 .dobzero:
157 122 leave
158 123 jmp bzero
159 124 SET_SIZE(hwblkclr)
160 125
161 -#elif defined(__i386)
162 126
163 - /*
164 - * %eax dst
165 - * %ecx size in bytes, loop count
166 - * %ebx saved %cr0 (#if DEBUG then t->t_preempt)
167 - * %edi pointer to %xmm register save area
168 - */
169 - ENTRY(hwblkclr)
170 - movl 4(%esp), %eax
171 - movl 8(%esp), %ecx
172 - testl $BLOCKMASK, %eax /* address must be BLOCKSIZE aligned */
173 - jne .dobzero
174 - cmpl $BLOCKSIZE, %ecx /* size must be at least BLOCKSIZE */
175 - jl .dobzero
176 - testl $BLOCKMASK, %ecx /* .. and be a multiple of BLOCKSIZE */
177 - jne .dobzero
178 - shrl $BLOCKSHIFT, %ecx
179 - movl 0xc(%esp), %edx
180 - pushl %ebx
181 -
182 - pushl %esi
183 - ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled)
184 - popl %esi
185 - movl %cr0, %ebx
186 - clts
187 - testl $CR0_TS, %ebx
188 - jnz 1f
189 -
190 - pushl %edi
191 - SAVE_XMM0(%edi)
192 -1: ZERO_LOOP_INIT_XMM(%eax)
193 -9: ZERO_LOOP_BODY_XMM(%eax, %ecx)
194 - jnz 9b
195 - ZERO_LOOP_FINI_XMM(%eax)
196 -
197 - testl $CR0_TS, %ebx
198 - jnz 2f
199 - RSTOR_XMM0(%edi)
200 - popl %edi
201 -2: movl %ebx, %cr0
202 - popl %ebx
203 - ret
204 -.dobzero:
205 - jmp bzero
206 - SET_SIZE(hwblkclr)
207 -
208 -#endif /* __i386 */
209 -#endif /* __lint */
210 -
211 -
212 -#if defined(__lint)
213 -
214 -/*ARGSUSED*/
215 -void
216 -hwblkpagecopy(const void *src, void *dst)
217 -{}
218 -
219 -#else /* __lint */
220 -
221 127 #define PREFETCH_START(src) \
222 128 prefetchnta 0x0(src); \
223 129 prefetchnta 0x40(src)
224 130
225 131 #define SAVE_XMMS(r) \
226 132 SAVE_XMM_PROLOG(r, 8); \
227 133 movdqa %xmm0, (r); \
228 134 movdqa %xmm1, 0x10(r); \
229 135 movdqa %xmm2, 0x20(r); \
230 136 movdqa %xmm3, 0x30(r); \
231 137 movdqa %xmm4, 0x40(r); \
232 138 movdqa %xmm5, 0x50(r); \
233 139 movdqa %xmm6, 0x60(r); \
234 140 movdqa %xmm7, 0x70(r)
235 141
236 142 #define COPY_LOOP_INIT_XMM(src) \
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
237 143 prefetchnta 0x80(src); \
238 144 prefetchnta 0xc0(src); \
239 145 movdqa 0x0(src), %xmm0; \
240 146 movdqa 0x10(src), %xmm1; \
241 147 movdqa 0x20(src), %xmm2; \
242 148 movdqa 0x30(src), %xmm3; \
243 149 movdqa 0x40(src), %xmm4; \
244 150 movdqa 0x50(src), %xmm5; \
245 151 movdqa 0x60(src), %xmm6; \
246 152 movdqa 0x70(src), %xmm7; \
247 - ADD $0x80, src
153 + addq $0x80, src
248 154
249 155 #define COPY_LOOP_BODY_XMM(src, dst, cnt) \
250 156 prefetchnta 0x80(src); \
251 157 prefetchnta 0xc0(src); \
252 158 prefetchnta 0x100(src); \
253 159 prefetchnta 0x140(src); \
254 160 movntdq %xmm0, (dst); \
255 161 movntdq %xmm1, 0x10(dst); \
256 162 movntdq %xmm2, 0x20(dst); \
257 163 movntdq %xmm3, 0x30(dst); \
258 164 movdqa 0x0(src), %xmm0; \
259 165 movdqa 0x10(src), %xmm1; \
260 166 movntdq %xmm4, 0x40(dst); \
261 167 movntdq %xmm5, 0x50(dst); \
262 168 movdqa 0x20(src), %xmm2; \
263 169 movdqa 0x30(src), %xmm3; \
264 170 movntdq %xmm6, 0x60(dst); \
265 171 movntdq %xmm7, 0x70(dst); \
266 172 movdqa 0x40(src), %xmm4; \
267 173 movdqa 0x50(src), %xmm5; \
268 - ADD $0x80, dst; \
174 + addq $0x80, dst; \
269 175 movdqa 0x60(src), %xmm6; \
270 176 movdqa 0x70(src), %xmm7; \
271 - ADD $0x80, src; \
177 + addq $0x80, src; \
272 178 subl $1, cnt
273 179
274 180 #define COPY_LOOP_FINI_XMM(dst) \
275 181 movntdq %xmm0, 0x0(dst); \
276 182 movntdq %xmm1, 0x10(dst); \
277 183 movntdq %xmm2, 0x20(dst); \
278 184 movntdq %xmm3, 0x30(dst); \
279 185 movntdq %xmm4, 0x40(dst); \
280 186 movntdq %xmm5, 0x50(dst); \
281 187 movntdq %xmm6, 0x60(dst); \
282 188 movntdq %xmm7, 0x70(dst)
283 189
284 190 #define RSTOR_XMMS(r) \
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
285 191 movdqa 0x0(r), %xmm0; \
286 192 movdqa 0x10(r), %xmm1; \
287 193 movdqa 0x20(r), %xmm2; \
288 194 movdqa 0x30(r), %xmm3; \
289 195 movdqa 0x40(r), %xmm4; \
290 196 movdqa 0x50(r), %xmm5; \
291 197 movdqa 0x60(r), %xmm6; \
292 198 movdqa 0x70(r), %xmm7; \
293 199 RSTOR_XMM_EPILOG(r, 8)
294 200
295 -#if defined(__amd64)
296 -
297 201 /*
298 202 * %rdi src
299 203 * %rsi dst
300 204 * %rdx #if DEBUG then curthread
301 205 * %ecx loop count
302 206 * %rax saved %cr0 (#if DEBUG then %eax is t->t_prempt)
303 207 * %r8 pointer to %xmm register save area
304 208 */
305 209 ENTRY(hwblkpagecopy)
306 210 pushq %rbp
307 211 movq %rsp, %rbp
308 212 PREFETCH_START(%rdi)
309 213 /*
310 214 * PAGESIZE is 4096, each loop moves 128 bytes, but the initial
311 215 * load and final store save us on loop count
312 216 */
313 217 movl $_CONST(32 - 1), %ecx
314 218 ASSERT_KPREEMPT_DISABLED(%rdx, %eax, .not_disabled)
315 219 movq %cr0, %rax
316 220 clts
317 221 testl $CR0_TS, %eax
318 222 jnz 3f
319 223 SAVE_XMMS(%r8)
320 224 3: COPY_LOOP_INIT_XMM(%rdi)
321 225 4: COPY_LOOP_BODY_XMM(%rdi, %rsi, %ecx)
322 226 jnz 4b
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
323 227 COPY_LOOP_FINI_XMM(%rsi)
324 228 testl $CR0_TS, %eax
325 229 jnz 5f
326 230 RSTOR_XMMS(%r8)
327 231 5: movq %rax, %cr0
328 232 mfence
329 233 leave
330 234 ret
331 235 SET_SIZE(hwblkpagecopy)
332 236
333 -#elif defined(__i386)
334 -
335 - /*
336 - * %eax src
337 - * %edx dst
338 - * %ecx loop count
339 - * %ebx saved %cr0 (#if DEBUG then t->t_prempt)
340 - * %edi pointer to %xmm register save area
341 - * %esi #if DEBUG temporary thread pointer
342 - */
343 - ENTRY(hwblkpagecopy)
344 - movl 4(%esp), %eax
345 - movl 8(%esp), %edx
346 - PREFETCH_START(%eax)
347 - pushl %ebx
348 - /*
349 - * PAGESIZE is 4096, each loop moves 128 bytes, but the initial
350 - * load and final store save us one loop count
351 - */
352 - movl $_CONST(32 - 1), %ecx
353 - pushl %esi
354 - ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled)
355 - popl %esi
356 - movl %cr0, %ebx
357 - clts
358 - testl $CR0_TS, %ebx
359 - jnz 3f
360 - pushl %edi
361 - SAVE_XMMS(%edi)
362 -3: COPY_LOOP_INIT_XMM(%eax)
363 -4: COPY_LOOP_BODY_XMM(%eax, %edx, %ecx)
364 - jnz 4b
365 - COPY_LOOP_FINI_XMM(%edx)
366 - testl $CR0_TS, %ebx
367 - jnz 5f
368 - RSTOR_XMMS(%edi)
369 - popl %edi
370 -5: movl %ebx, %cr0
371 - popl %ebx
372 - mfence
373 - ret
374 - SET_SIZE(hwblkpagecopy)
375 -
376 -#endif /* __i386 */
377 -#endif /* __lint */
378 -
379 -#if defined(__lint)
380 -
381 -/*
382 - * Version of hwblkclr which doesn't use XMM registers.
383 - * Note that it requires aligned dst and len.
384 - *
385 - * XXPV This needs to be performance tuned at some point.
386 - * Is 4 the best number of iterations to unroll?
387 - */
388 -/*ARGSUSED*/
389 -void
390 -block_zero_no_xmm(void *dst, int len)
391 -{}
392 -
393 -#else /* __lint */
394 -
395 -#if defined(__amd64)
396 -
397 237 ENTRY(block_zero_no_xmm)
398 238 pushq %rbp
399 239 movq %rsp, %rbp
400 240 xorl %eax, %eax
401 241 addq %rsi, %rdi
402 242 negq %rsi
403 243 1:
404 244 movnti %rax, (%rdi, %rsi)
405 245 movnti %rax, 8(%rdi, %rsi)
406 246 movnti %rax, 16(%rdi, %rsi)
407 247 movnti %rax, 24(%rdi, %rsi)
408 248 addq $32, %rsi
409 249 jnz 1b
410 250 mfence
411 251 leave
412 252 ret
413 253 SET_SIZE(block_zero_no_xmm)
414 254
415 -#elif defined(__i386)
416 255
417 - ENTRY(block_zero_no_xmm)
418 - pushl %ebp
419 - movl %esp, %ebp
420 - xorl %eax, %eax
421 - movl 8(%ebp), %edx
422 - movl 12(%ebp), %ecx
423 - addl %ecx, %edx
424 - negl %ecx
425 -1:
426 - movnti %eax, (%edx, %ecx)
427 - movnti %eax, 4(%edx, %ecx)
428 - movnti %eax, 8(%edx, %ecx)
429 - movnti %eax, 12(%edx, %ecx)
430 - addl $16, %ecx
431 - jnz 1b
432 - mfence
433 - leave
434 - ret
435 - SET_SIZE(block_zero_no_xmm)
436 -
437 -#endif /* __i386 */
438 -#endif /* __lint */
439 -
440 -
441 -#if defined(__lint)
442 -
443 -/*
444 - * Version of page copy which doesn't use XMM registers.
445 - *
446 - * XXPV This needs to be performance tuned at some point.
447 - * Is 4 the right number of iterations to unroll?
448 - * Is the load/store order optimal? Should it use prefetch?
449 - */
450 -/*ARGSUSED*/
451 -void
452 -page_copy_no_xmm(void *dst, void *src)
453 -{}
454 -
455 -#else /* __lint */
456 -
457 -#if defined(__amd64)
458 -
459 256 ENTRY(page_copy_no_xmm)
460 257 movq $MMU_STD_PAGESIZE, %rcx
461 258 addq %rcx, %rdi
462 259 addq %rcx, %rsi
463 260 negq %rcx
464 261 1:
465 262 movq (%rsi, %rcx), %rax
466 263 movnti %rax, (%rdi, %rcx)
467 264 movq 8(%rsi, %rcx), %rax
468 265 movnti %rax, 8(%rdi, %rcx)
469 266 movq 16(%rsi, %rcx), %rax
470 267 movnti %rax, 16(%rdi, %rcx)
471 268 movq 24(%rsi, %rcx), %rax
472 269 movnti %rax, 24(%rdi, %rcx)
473 270 addq $32, %rcx
474 271 jnz 1b
475 272 mfence
476 273 ret
477 274 SET_SIZE(page_copy_no_xmm)
478 275
479 -#elif defined(__i386)
480 -
481 - ENTRY(page_copy_no_xmm)
482 - pushl %esi
483 - movl $MMU_STD_PAGESIZE, %ecx
484 - movl 8(%esp), %edx
485 - movl 12(%esp), %esi
486 - addl %ecx, %edx
487 - addl %ecx, %esi
488 - negl %ecx
489 -1:
490 - movl (%esi, %ecx), %eax
491 - movnti %eax, (%edx, %ecx)
492 - movl 4(%esi, %ecx), %eax
493 - movnti %eax, 4(%edx, %ecx)
494 - movl 8(%esi, %ecx), %eax
495 - movnti %eax, 8(%edx, %ecx)
496 - movl 12(%esi, %ecx), %eax
497 - movnti %eax, 12(%edx, %ecx)
498 - addl $16, %ecx
499 - jnz 1b
500 - mfence
501 - popl %esi
502 - ret
503 - SET_SIZE(page_copy_no_xmm)
504 -
505 -#endif /* __i386 */
506 -#endif /* __lint */
507 -
508 -#if defined(DEBUG) && !defined(__lint)
276 +#if defined(DEBUG)
509 277 .text
510 278 .not_disabled:
511 279 .string "sseblk: preemption not disabled!"
512 280 #endif
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX