de-linting of .s files
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/param.h>
28 #include <sys/errno.h>
29 #include <sys/asm_linkage.h>
30 #include <sys/vtrace.h>
31 #include <sys/machthread.h>
32 #include <sys/clock.h>
33 #include <sys/asi.h>
34 #include <sys/fsr.h>
35 #include <sys/privregs.h>
36
37 #include "assym.h"
38
39
40 /*
41 * Pseudo-code to aid in understanding the control flow of the
42 * bcopy routine.
43 *
44 * On entry to bcopy:
45 *
46 * %l6 = curthread->t_lofault;
47 * used_block_copy = FALSE; ! %l6 |= 1
48 * if (%l6 != NULL) {
49 * curthread->t_lofault = .copyerr;
50 * caller_error_handler = TRUE ! %l6 |= 2
51 * }
52 *
53 * if (length < VIS_COPY)
54 * goto regular_copy;
55 *
56 * if (!use_vis)
57 * goto_regular_copy;
58 *
59 * if (curthread->t_lwp == NULL) {
60 * ! Kernel threads do not have pcb's in which to store
61 * ! the floating point state, disallow preemption during
62 * ! the copy.
63 * kpreempt_disable(curthread);
64 * }
65 *
66 * old_fprs = %fprs;
67 * old_gsr = %gsr;
68 * if (%fprs.fef) {
69 * ! If we need to save 4 blocks of fpregs then make sure
70 * ! the length is still appropriate for that extra overhead.
71 * if (length < (large_length + (64 * 4))) {
72 * if (curthread->t_lwp == NULL)
73 * kpreempt_enable(curthread);
74 * goto regular_copy;
75 * }
76 * %fprs.fef = 1;
77 * save current fpregs on stack using blockstore
78 * } else {
79 * %fprs.fef = 1;
80 * }
81 *
82 * used_block_copy = 1; ! %l6 |= 1
83 * do_blockcopy_here;
84 *
85 * In lofault handler:
86 * curthread->t_lofault = .copyerr2;
87 * Continue on with the normal exit handler
88 *
89 * On exit:
90 * call_kpreempt = 0;
91 * if (used_block_copy) { ! %l6 & 1
92 * %gsr = old_gsr;
93 * if (old_fprs & FPRS_FEF)
94 * restore fpregs from stack using blockload
95 * else
96 * zero fpregs
97 * %fprs = old_fprs;
98 * if (curthread->t_lwp == NULL) {
99 * kpreempt_enable(curthread);
100 * call_kpreempt = 1;
101 * }
102 * }
103 * curthread->t_lofault = (%l6 & ~3);
104 * if (call_kpreempt)
105 * kpreempt(%pil);
106 * return (0)
107 *
108 * In second lofault handler (.copyerr2):
109 * We've tried to restore fp state from the stack and failed. To
110 * prevent from returning with a corrupted fp state, we will panic.
111 */
112
113 /*
114 * Notes on preserving existing fp state:
115 *
116 * When a copyOP decides to use fp we may have to preserve existing
117 * floating point state. It is not the caller's state that we need to
118 * preserve - the rest of the kernel does not use fp and, anyway, fp
119 * registers are volatile across a call. Some examples:
120 *
121 * - userland has fp state and is interrupted (device interrupt
122 * or trap) and within the interrupt/trap handling we use
123 * bcopy()
124 * - another (higher level) interrupt or trap handler uses bcopy
125 * while a bcopy from an earlier interrupt is still active
126 * - an asynchronous error trap occurs while fp state exists (in
127 * userland or in kernel copy) and the tl0 component of the handling
128 * uses bcopy
129 * - a user process with fp state incurs a copy-on-write fault and
130 * hwblkpagecopy always uses fp
131 *
132 * We therefore need a per-call place in which to preserve fp state -
133 * using our stack is ideal (and since fp copy cannot be leaf optimized
134 * because of calls it makes, this is no hardship).
135 *
136 * To make sure that floating point state is always saved and restored
137 * correctly, the following "big rules" must be followed when the floating
138 * point registers will be used:
139 *
140 * 1. %l6 always holds the caller's lofault handler. Also in this register,
141 * Bit 1 (FPUSED_FLAG) indicates that the floating point registers are in
142 * use. Bit 2 (BCOPY_FLAG) indicates that the call was to bcopy.
143 *
144 * 2. The FPUSED flag indicates that all FP state has been successfully stored
145 * on the stack. It should not be set until this save has been completed.
146 *
147 * 3. The FPUSED flag should not be cleared on exit until all FP state has
148 * been restored from the stack. If an error occurs while restoring
149 * data from the stack, the error handler can check this flag to see if
150 * a restore is necessary.
151 *
152 * 4. Code run under the new lofault handler must be kept to a minimum. In
153 * particular, any calls to kpreempt() should not be made until after the
154 * lofault handler has been restored.
155 */
156
157 /*
158 * This shadows sys/machsystm.h which can't be included due to the lack of
159 * _ASM guards in include files it references. Change it here, change it there.
160 */
161 #define VIS_COPY_THRESHOLD 900
162
163 /*
164 * Less then or equal this number of bytes we will always copy byte-for-byte
165 */
166 #define SMALL_LIMIT 7
167
168 /*
169 * Flags set in the lower bits of the t_lofault address:
170 * FPUSED_FLAG: The FP registers were in use and must be restored
171 * BCOPY_FLAG: Set for bcopy calls, cleared for kcopy calls
172 * COPY_FLAGS: Both of the above
173 *
174 * Other flags:
175 * KPREEMPT_FLAG: kpreempt needs to be called
176 */
177 #define FPUSED_FLAG 1
178 #define BCOPY_FLAG 2
179 #define COPY_FLAGS (FPUSED_FLAG | BCOPY_FLAG)
180 #define KPREEMPT_FLAG 4
181
182 /*
183 * Size of stack frame in order to accomodate a 64-byte aligned
184 * floating-point register save area and 2 32-bit temp locations.
185 */
186 #define HWCOPYFRAMESIZE ((64 * 5) + (2 * 4))
187
188 #define SAVED_FPREGS_OFFSET (64 * 5)
189 #define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 4)
190 #define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 4)
191
192 /*
193 * Common macros used by the various versions of the block copy
194 * routines in this file.
195 */
196
197 #define FZERO \
198 fzero %f0 ;\
199 fzero %f2 ;\
200 faddd %f0, %f2, %f4 ;\
201 fmuld %f0, %f2, %f6 ;\
202 faddd %f0, %f2, %f8 ;\
203 fmuld %f0, %f2, %f10 ;\
204 faddd %f0, %f2, %f12 ;\
205 fmuld %f0, %f2, %f14 ;\
206 faddd %f0, %f2, %f16 ;\
207 fmuld %f0, %f2, %f18 ;\
208 faddd %f0, %f2, %f20 ;\
209 fmuld %f0, %f2, %f22 ;\
210 faddd %f0, %f2, %f24 ;\
211 fmuld %f0, %f2, %f26 ;\
212 faddd %f0, %f2, %f28 ;\
213 fmuld %f0, %f2, %f30 ;\
214 faddd %f0, %f2, %f32 ;\
215 fmuld %f0, %f2, %f34 ;\
216 faddd %f0, %f2, %f36 ;\
217 fmuld %f0, %f2, %f38 ;\
218 faddd %f0, %f2, %f40 ;\
219 fmuld %f0, %f2, %f42 ;\
220 faddd %f0, %f2, %f44 ;\
221 fmuld %f0, %f2, %f46 ;\
222 faddd %f0, %f2, %f48 ;\
223 fmuld %f0, %f2, %f50 ;\
224 faddd %f0, %f2, %f52 ;\
225 fmuld %f0, %f2, %f54 ;\
226 faddd %f0, %f2, %f56 ;\
227 fmuld %f0, %f2, %f58 ;\
228 faddd %f0, %f2, %f60 ;\
229 fmuld %f0, %f2, %f62
230
231
232 #define FALIGN_D0 \
233 faligndata %d0, %d2, %d48 ;\
234 faligndata %d2, %d4, %d50 ;\
235 faligndata %d4, %d6, %d52 ;\
236 faligndata %d6, %d8, %d54 ;\
237 faligndata %d8, %d10, %d56 ;\
238 faligndata %d10, %d12, %d58 ;\
239 faligndata %d12, %d14, %d60 ;\
240 faligndata %d14, %d16, %d62
241
242 #define FALIGN_D16 \
243 faligndata %d16, %d18, %d48 ;\
244 faligndata %d18, %d20, %d50 ;\
245 faligndata %d20, %d22, %d52 ;\
246 faligndata %d22, %d24, %d54 ;\
247 faligndata %d24, %d26, %d56 ;\
248 faligndata %d26, %d28, %d58 ;\
249 faligndata %d28, %d30, %d60 ;\
250 faligndata %d30, %d32, %d62
251
252 #define FALIGN_D32 \
253 faligndata %d32, %d34, %d48 ;\
254 faligndata %d34, %d36, %d50 ;\
255 faligndata %d36, %d38, %d52 ;\
256 faligndata %d38, %d40, %d54 ;\
257 faligndata %d40, %d42, %d56 ;\
258 faligndata %d42, %d44, %d58 ;\
259 faligndata %d44, %d46, %d60 ;\
260 faligndata %d46, %d0, %d62
261
262 #define FALIGN_D2 \
263 faligndata %d2, %d4, %d48 ;\
264 faligndata %d4, %d6, %d50 ;\
265 faligndata %d6, %d8, %d52 ;\
266 faligndata %d8, %d10, %d54 ;\
267 faligndata %d10, %d12, %d56 ;\
268 faligndata %d12, %d14, %d58 ;\
269 faligndata %d14, %d16, %d60 ;\
270 faligndata %d16, %d18, %d62
271
272 #define FALIGN_D18 \
273 faligndata %d18, %d20, %d48 ;\
274 faligndata %d20, %d22, %d50 ;\
275 faligndata %d22, %d24, %d52 ;\
276 faligndata %d24, %d26, %d54 ;\
277 faligndata %d26, %d28, %d56 ;\
278 faligndata %d28, %d30, %d58 ;\
279 faligndata %d30, %d32, %d60 ;\
280 faligndata %d32, %d34, %d62
281
282 #define FALIGN_D34 \
283 faligndata %d34, %d36, %d48 ;\
284 faligndata %d36, %d38, %d50 ;\
285 faligndata %d38, %d40, %d52 ;\
286 faligndata %d40, %d42, %d54 ;\
287 faligndata %d42, %d44, %d56 ;\
288 faligndata %d44, %d46, %d58 ;\
289 faligndata %d46, %d0, %d60 ;\
290 faligndata %d0, %d2, %d62
291
292 #define FALIGN_D4 \
293 faligndata %d4, %d6, %d48 ;\
294 faligndata %d6, %d8, %d50 ;\
295 faligndata %d8, %d10, %d52 ;\
296 faligndata %d10, %d12, %d54 ;\
297 faligndata %d12, %d14, %d56 ;\
298 faligndata %d14, %d16, %d58 ;\
299 faligndata %d16, %d18, %d60 ;\
300 faligndata %d18, %d20, %d62
301
302 #define FALIGN_D20 \
303 faligndata %d20, %d22, %d48 ;\
304 faligndata %d22, %d24, %d50 ;\
305 faligndata %d24, %d26, %d52 ;\
306 faligndata %d26, %d28, %d54 ;\
307 faligndata %d28, %d30, %d56 ;\
308 faligndata %d30, %d32, %d58 ;\
309 faligndata %d32, %d34, %d60 ;\
310 faligndata %d34, %d36, %d62
311
312 #define FALIGN_D36 \
313 faligndata %d36, %d38, %d48 ;\
314 faligndata %d38, %d40, %d50 ;\
315 faligndata %d40, %d42, %d52 ;\
316 faligndata %d42, %d44, %d54 ;\
317 faligndata %d44, %d46, %d56 ;\
318 faligndata %d46, %d0, %d58 ;\
319 faligndata %d0, %d2, %d60 ;\
320 faligndata %d2, %d4, %d62
321
322 #define FALIGN_D6 \
323 faligndata %d6, %d8, %d48 ;\
324 faligndata %d8, %d10, %d50 ;\
325 faligndata %d10, %d12, %d52 ;\
326 faligndata %d12, %d14, %d54 ;\
327 faligndata %d14, %d16, %d56 ;\
328 faligndata %d16, %d18, %d58 ;\
329 faligndata %d18, %d20, %d60 ;\
330 faligndata %d20, %d22, %d62
331
332 #define FALIGN_D22 \
333 faligndata %d22, %d24, %d48 ;\
334 faligndata %d24, %d26, %d50 ;\
335 faligndata %d26, %d28, %d52 ;\
336 faligndata %d28, %d30, %d54 ;\
337 faligndata %d30, %d32, %d56 ;\
338 faligndata %d32, %d34, %d58 ;\
339 faligndata %d34, %d36, %d60 ;\
340 faligndata %d36, %d38, %d62
341
342 #define FALIGN_D38 \
343 faligndata %d38, %d40, %d48 ;\
344 faligndata %d40, %d42, %d50 ;\
345 faligndata %d42, %d44, %d52 ;\
346 faligndata %d44, %d46, %d54 ;\
347 faligndata %d46, %d0, %d56 ;\
348 faligndata %d0, %d2, %d58 ;\
349 faligndata %d2, %d4, %d60 ;\
350 faligndata %d4, %d6, %d62
351
352 #define FALIGN_D8 \
353 faligndata %d8, %d10, %d48 ;\
354 faligndata %d10, %d12, %d50 ;\
355 faligndata %d12, %d14, %d52 ;\
356 faligndata %d14, %d16, %d54 ;\
357 faligndata %d16, %d18, %d56 ;\
358 faligndata %d18, %d20, %d58 ;\
359 faligndata %d20, %d22, %d60 ;\
360 faligndata %d22, %d24, %d62
361
362 #define FALIGN_D24 \
363 faligndata %d24, %d26, %d48 ;\
364 faligndata %d26, %d28, %d50 ;\
365 faligndata %d28, %d30, %d52 ;\
366 faligndata %d30, %d32, %d54 ;\
367 faligndata %d32, %d34, %d56 ;\
368 faligndata %d34, %d36, %d58 ;\
369 faligndata %d36, %d38, %d60 ;\
370 faligndata %d38, %d40, %d62
371
372 #define FALIGN_D40 \
373 faligndata %d40, %d42, %d48 ;\
374 faligndata %d42, %d44, %d50 ;\
375 faligndata %d44, %d46, %d52 ;\
376 faligndata %d46, %d0, %d54 ;\
377 faligndata %d0, %d2, %d56 ;\
378 faligndata %d2, %d4, %d58 ;\
379 faligndata %d4, %d6, %d60 ;\
380 faligndata %d6, %d8, %d62
381
382 #define FALIGN_D10 \
383 faligndata %d10, %d12, %d48 ;\
384 faligndata %d12, %d14, %d50 ;\
385 faligndata %d14, %d16, %d52 ;\
386 faligndata %d16, %d18, %d54 ;\
387 faligndata %d18, %d20, %d56 ;\
388 faligndata %d20, %d22, %d58 ;\
389 faligndata %d22, %d24, %d60 ;\
390 faligndata %d24, %d26, %d62
391
392 #define FALIGN_D26 \
393 faligndata %d26, %d28, %d48 ;\
394 faligndata %d28, %d30, %d50 ;\
395 faligndata %d30, %d32, %d52 ;\
396 faligndata %d32, %d34, %d54 ;\
397 faligndata %d34, %d36, %d56 ;\
398 faligndata %d36, %d38, %d58 ;\
399 faligndata %d38, %d40, %d60 ;\
400 faligndata %d40, %d42, %d62
401
402 #define FALIGN_D42 \
403 faligndata %d42, %d44, %d48 ;\
404 faligndata %d44, %d46, %d50 ;\
405 faligndata %d46, %d0, %d52 ;\
406 faligndata %d0, %d2, %d54 ;\
407 faligndata %d2, %d4, %d56 ;\
408 faligndata %d4, %d6, %d58 ;\
409 faligndata %d6, %d8, %d60 ;\
410 faligndata %d8, %d10, %d62
411
412 #define FALIGN_D12 \
413 faligndata %d12, %d14, %d48 ;\
414 faligndata %d14, %d16, %d50 ;\
415 faligndata %d16, %d18, %d52 ;\
416 faligndata %d18, %d20, %d54 ;\
417 faligndata %d20, %d22, %d56 ;\
418 faligndata %d22, %d24, %d58 ;\
419 faligndata %d24, %d26, %d60 ;\
420 faligndata %d26, %d28, %d62
421
422 #define FALIGN_D28 \
423 faligndata %d28, %d30, %d48 ;\
424 faligndata %d30, %d32, %d50 ;\
425 faligndata %d32, %d34, %d52 ;\
426 faligndata %d34, %d36, %d54 ;\
427 faligndata %d36, %d38, %d56 ;\
428 faligndata %d38, %d40, %d58 ;\
429 faligndata %d40, %d42, %d60 ;\
430 faligndata %d42, %d44, %d62
431
432 #define FALIGN_D44 \
433 faligndata %d44, %d46, %d48 ;\
434 faligndata %d46, %d0, %d50 ;\
435 faligndata %d0, %d2, %d52 ;\
436 faligndata %d2, %d4, %d54 ;\
437 faligndata %d4, %d6, %d56 ;\
438 faligndata %d6, %d8, %d58 ;\
439 faligndata %d8, %d10, %d60 ;\
440 faligndata %d10, %d12, %d62
441
442 #define FALIGN_D14 \
443 faligndata %d14, %d16, %d48 ;\
444 faligndata %d16, %d18, %d50 ;\
445 faligndata %d18, %d20, %d52 ;\
446 faligndata %d20, %d22, %d54 ;\
447 faligndata %d22, %d24, %d56 ;\
448 faligndata %d24, %d26, %d58 ;\
449 faligndata %d26, %d28, %d60 ;\
450 faligndata %d28, %d30, %d62
451
452 #define FALIGN_D30 \
453 faligndata %d30, %d32, %d48 ;\
454 faligndata %d32, %d34, %d50 ;\
455 faligndata %d34, %d36, %d52 ;\
456 faligndata %d36, %d38, %d54 ;\
457 faligndata %d38, %d40, %d56 ;\
458 faligndata %d40, %d42, %d58 ;\
459 faligndata %d42, %d44, %d60 ;\
460 faligndata %d44, %d46, %d62
461
462 #define FALIGN_D46 \
463 faligndata %d46, %d0, %d48 ;\
464 faligndata %d0, %d2, %d50 ;\
465 faligndata %d2, %d4, %d52 ;\
466 faligndata %d4, %d6, %d54 ;\
467 faligndata %d6, %d8, %d56 ;\
468 faligndata %d8, %d10, %d58 ;\
469 faligndata %d10, %d12, %d60 ;\
470 faligndata %d12, %d14, %d62
471
472
473 /*
474 * Copy a block of storage, returning an error code if `from' or
475 * `to' takes a kernel pagefault which cannot be resolved.
476 * Returns errno value on pagefault error, 0 if all ok
477 */
478
479
480
481 .seg ".text"
482 .align 4
483
484 ENTRY(kcopy)
485
486 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
487 set .copyerr, %l6 ! copyerr is lofault value
488 ldn [THREAD_REG + T_LOFAULT], %l7 ! save existing handler
489 membar #Sync ! sync error barrier (see copy.s)
490 stn %l6, [THREAD_REG + T_LOFAULT] ! set t_lofault
491 !
492 ! Note that we carefully do *not* flag the setting of
493 ! t_lofault.
494 !
495 ba,pt %ncc, .do_copy ! common code
496 mov %l7, %l6
497
498 /*
499 * We got here because of a fault during kcopy or bcopy if a fault
500 * handler existed when bcopy was called.
501 * Errno value is in %g1.
502 */
503 .copyerr:
504 set .copyerr2, %l1
505 membar #Sync ! sync error barrier
506 stn %l1, [THREAD_REG + T_LOFAULT] ! set t_lofault
507 btst FPUSED_FLAG, %l6
508 bz %icc, 1f
509 and %l6, BCOPY_FLAG, %l1 ! copy flag to %l1
510
511 membar #Sync
512
513 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
514 wr %o2, 0, %gsr
515
516 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
517 btst FPRS_FEF, %o3
518 bz %icc, 4f
519 nop
520
521 ! restore fpregs from stack
522 membar #Sync
523 add %fp, STACK_BIAS - 257, %o2
524 and %o2, -64, %o2
525 ldda [%o2]ASI_BLK_P, %d0
526 add %o2, 64, %o2
527 ldda [%o2]ASI_BLK_P, %d16
528 add %o2, 64, %o2
529 ldda [%o2]ASI_BLK_P, %d32
530 add %o2, 64, %o2
531 ldda [%o2]ASI_BLK_P, %d48
532 membar #Sync
533
534 ba,pt %ncc, 2f
535 wr %o3, 0, %fprs ! restore fprs
536
537 4:
538 FZERO ! zero all of the fpregs
539 wr %o3, 0, %fprs ! restore fprs
540
541 2: ldn [THREAD_REG + T_LWP], %o2
542 tst %o2
543 bnz,pt %ncc, 1f
544 nop
545
546 ldsb [THREAD_REG + T_PREEMPT], %l0
547 deccc %l0
548 bnz,pn %ncc, 1f
549 stb %l0, [THREAD_REG + T_PREEMPT]
550
551 ! Check for a kernel preemption request
552 ldn [THREAD_REG + T_CPU], %l0
553 ldub [%l0 + CPU_KPRUNRUN], %l0
554 tst %l0
555 bnz,a,pt %ncc, 1f ! Need to call kpreempt?
556 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag
557
558 !
559 ! Need to cater for the different expectations of kcopy
560 ! and bcopy. kcopy will *always* set a t_lofault handler
561 ! If it fires, we're expected to just return the error code
562 ! and *not* to invoke any existing error handler. As far as
563 ! bcopy is concerned, we only set t_lofault if there was an
564 ! existing lofault handler. In that case we're expected to
565 ! invoke the previously existing handler after restting the
566 ! t_lofault value.
567 !
568 1:
569 andn %l6, COPY_FLAGS, %l6 ! remove flags from lofault address
570 membar #Sync ! sync error barrier
571 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
572
573 ! call kpreempt if necessary
574 btst KPREEMPT_FLAG, %l1
575 bz,pt %icc, 2f
576 nop
577 call kpreempt
578 rdpr %pil, %o0 ! pass %pil
579 2:
580 btst BCOPY_FLAG, %l1
581 bnz,pn %ncc, 3f
582 nop
583 ret
584 restore %g1, 0, %o0
585
586 3:
587 !
588 ! We're here via bcopy. There *must* have been an error handler
589 ! in place otheerwise we would have died a nasty death already.
590 !
591 jmp %l6 ! goto real handler
592 restore %g0, 0, %o0 ! dispose of copy window
593
594 /*
595 * We got here because of a fault in .copyerr. We can't safely restore fp
596 * state, so we panic.
597 */
598 fp_panic_msg:
599 .asciz "Unable to restore fp state after copy operation"
600
601 .align 4
602 .copyerr2:
603 set fp_panic_msg, %o0
604 call panic
605 nop
606 SET_SIZE(kcopy)
607
608
609 /*
610 * Copy a block of storage - must not overlap (from + len <= to).
611 * Registers: l6 - saved t_lofault
612 *
613 * Copy a page of memory.
614 * Assumes double word alignment and a count >= 256.
615 */
616
617 ENTRY(bcopy)
618
619 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
620 ldn [THREAD_REG + T_LOFAULT], %l6 ! save t_lofault
621 tst %l6
622 !
623 ! We've already captured whether t_lofault was zero on entry.
624 ! We need to mark ourselves as being from bcopy since both
625 ! kcopy and bcopy use the same code path. If BCOPY_FLAG is set
626 ! and the saved lofault was zero, we won't reset lofault on
627 ! returning.
628 !
629 or %l6, BCOPY_FLAG, %l6
630 bz,pt %ncc, .do_copy
631 sethi %hi(.copyerr), %o2
632 or %o2, %lo(.copyerr), %o2
633 membar #Sync ! sync error barrier
634 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector
635
636 .do_copy:
637 cmp %i2, 12 ! for small counts
638 blu %ncc, .bytecp ! just copy bytes
639 .empty
640
641 cmp %i2, VIS_COPY_THRESHOLD ! for large counts
642 blu,pt %ncc, .bcb_punt
643 .empty
644
645 !
646 ! Check to see if VIS acceleration is enabled
647 !
648 sethi %hi(use_hw_bcopy), %o2
649 ld [%o2 + %lo(use_hw_bcopy)], %o2
650 tst %o2
651 bz,pn %icc, .bcb_punt
652 nop
653
654 subcc %i1, %i0, %i3
655 bneg,a,pn %ncc, 1f
656 neg %i3
657 1:
658 /*
659 * Compare against 256 since we should be checking block addresses
660 * and (dest & ~63) - (src & ~63) can be 3 blocks even if
661 * src = dest + (64 * 3) + 63.
662 */
663 cmp %i3, 256
664 blu,pn %ncc, .bcb_punt
665 nop
666
667 ldn [THREAD_REG + T_LWP], %o3
668 tst %o3
669 bnz,pt %ncc, 1f
670 nop
671
672 ! kpreempt_disable();
673 ldsb [THREAD_REG + T_PREEMPT], %o2
674 inc %o2
675 stb %o2, [THREAD_REG + T_PREEMPT]
676
677 1:
678 rd %fprs, %o2 ! check for unused fp
679 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
680 btst FPRS_FEF, %o2
681 bz,a %icc, .do_blockcopy
682 wr %g0, FPRS_FEF, %fprs
683
684 .bcb_fpregs_inuse:
685 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger
686 bgeu %ncc, 1f ! if we have to save the fpregs)
687 nop
688
689 tst %o3
690 bnz,pt %ncc, .bcb_punt
691 nop
692
693 ldsb [THREAD_REG + T_PREEMPT], %l0
694 deccc %l0
695 bnz,pn %icc, .bcb_punt
696 stb %l0, [THREAD_REG + T_PREEMPT]
697
698 ! Check for a kernel preemption request
699 ldn [THREAD_REG + T_CPU], %l0
700 ldub [%l0 + CPU_KPRUNRUN], %l0
701 tst %l0
702 bz,pt %icc, .bcb_punt
703 nop
704
705 ! Attempt to preempt
706 call kpreempt
707 rdpr %pil, %o0 ! pass %pil
708
709 ba,pt %ncc, .bcb_punt
710 nop
711
712 1:
713 wr %g0, FPRS_FEF, %fprs
714
715 ! save in-use fpregs on stack
716 membar #Sync
717 add %fp, STACK_BIAS - 257, %o2
718 and %o2, -64, %o2
719 stda %d0, [%o2]ASI_BLK_P
720 add %o2, 64, %o2
721 stda %d16, [%o2]ASI_BLK_P
722 add %o2, 64, %o2
723 stda %d32, [%o2]ASI_BLK_P
724 add %o2, 64, %o2
725 stda %d48, [%o2]ASI_BLK_P
726 membar #Sync
727
728 .do_blockcopy:
729 membar #StoreStore|#StoreLoad|#LoadStore
730
731 rd %gsr, %o2
732 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
733
734 ! Set the lower bit in the saved t_lofault to indicate
735 ! that we need to clear the %fprs register on the way
736 ! out
737 or %l6, FPUSED_FLAG, %l6
738
739 ! Swap src/dst since the code below is memcpy code
740 ! and memcpy/bcopy have different calling sequences
741 mov %i1, %i5
742 mov %i0, %i1
743 mov %i5, %i0
744
745 !!! This code is nearly identical to the version in the sun4u
746 !!! libc_psr. Most bugfixes made to that file should be
747 !!! merged into this routine.
748
749 andcc %i0, 7, %o3
750 bz,pt %ncc, blkcpy
751 sub %o3, 8, %o3
752 neg %o3
753 sub %i2, %o3, %i2
754
755 ! Align Destination on double-word boundary
756
757 2: ldub [%i1], %o4
758 inc %i1
759 inc %i0
760 deccc %o3
761 bgu %ncc, 2b
762 stb %o4, [%i0 - 1]
763 blkcpy:
764 andcc %i0, 63, %i3
765 bz,pn %ncc, blalign ! now block aligned
766 sub %i3, 64, %i3
767 neg %i3 ! bytes till block aligned
768 sub %i2, %i3, %i2 ! update %i2 with new count
769
770 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
771 ! double word copies.
772
773 alignaddr %i1, %g0, %g1
774 ldd [%g1], %d0
775 add %g1, 8, %g1
776 6:
777 ldd [%g1], %d2
778 add %g1, 8, %g1
779 subcc %i3, 8, %i3
780 faligndata %d0, %d2, %d8
781 std %d8, [%i0]
782 add %i1, 8, %i1
783 bz,pn %ncc, blalign
784 add %i0, 8, %i0
785 ldd [%g1], %d0
786 add %g1, 8, %g1
787 subcc %i3, 8, %i3
788 faligndata %d2, %d0, %d8
789 std %d8, [%i0]
790 add %i1, 8, %i1
791 bgu,pn %ncc, 6b
792 add %i0, 8, %i0
793
794 blalign:
795 membar #StoreLoad
796 ! %i2 = total length
797 ! %i3 = blocks (length - 64) / 64
798 ! %i4 = doubles remaining (length - blocks)
799 sub %i2, 64, %i3
800 andn %i3, 63, %i3
801 sub %i2, %i3, %i4
802 andn %i4, 7, %i4
803 sub %i4, 16, %i4
804 sub %i2, %i4, %i2
805 sub %i2, %i3, %i2
806
807 andn %i1, 0x3f, %l7 ! blk aligned address
808 alignaddr %i1, %g0, %g0 ! gen %gsr
809
810 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5
811 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3
812 add %i1, %i4, %i1
813 add %i1, %i3, %i1
814
815 ldda [%l7]ASI_BLK_P, %d0
816 add %l7, 64, %l7
817 ldda [%l7]ASI_BLK_P, %d16
818 add %l7, 64, %l7
819 ldda [%l7]ASI_BLK_P, %d32
820 add %l7, 64, %l7
821 sub %i3, 128, %i3
822
823 ! switch statement to get us to the right 8 byte blk within a
824 ! 64 byte block
825 cmp %i5, 4
826 bgeu,a hlf
827 cmp %i5, 6
828 cmp %i5, 2
829 bgeu,a sqtr
830 nop
831 cmp %i5, 1
832 be,a seg1
833 nop
834 ba,pt %ncc, seg0
835 nop
836 sqtr:
837 be,a seg2
838 nop
839 ba,pt %ncc, seg3
840 nop
841
842 hlf:
843 bgeu,a fqtr
844 nop
845 cmp %i5, 5
846 be,a seg5
847 nop
848 ba,pt %ncc, seg4
849 nop
850 fqtr:
851 be,a seg6
852 nop
853 ba,pt %ncc, seg7
854 nop
855
856
857 seg0:
858 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
859 FALIGN_D0
860 ldda [%l7]ASI_BLK_P, %d0
861 stda %d48, [%i0]ASI_BLK_P
862 add %l7, 64, %l7
863 subcc %i3, 64, %i3
864 bz,pn %ncc, 0f
865 add %i0, 64, %i0
866 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
867 FALIGN_D16
868 ldda [%l7]ASI_BLK_P, %d16
869 stda %d48, [%i0]ASI_BLK_P
870 add %l7, 64, %l7
871 subcc %i3, 64, %i3
872 bz,pn %ncc, 1f
873 add %i0, 64, %i0
874 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
875 FALIGN_D32
876 ldda [%l7]ASI_BLK_P, %d32
877 stda %d48, [%i0]ASI_BLK_P
878 add %l7, 64, %l7
879 subcc %i3, 64, %i3
880 bz,pn %ncc, 2f
881 add %i0, 64, %i0
882 ba,a,pt %ncc, seg0
883
884 0:
885 FALIGN_D16
886 stda %d48, [%i0]ASI_BLK_P
887 add %i0, 64, %i0
888 membar #Sync
889 FALIGN_D32
890 stda %d48, [%i0]ASI_BLK_P
891 ba,pt %ncc, blkd0
892 add %i0, 64, %i0
893
894 1:
895 FALIGN_D32
896 stda %d48, [%i0]ASI_BLK_P
897 add %i0, 64, %i0
898 membar #Sync
899 FALIGN_D0
900 stda %d48, [%i0]ASI_BLK_P
901 ba,pt %ncc, blkd16
902 add %i0, 64, %i0
903
904 2:
905 FALIGN_D0
906 stda %d48, [%i0]ASI_BLK_P
907 add %i0, 64, %i0
908 membar #Sync
909 FALIGN_D16
910 stda %d48, [%i0]ASI_BLK_P
911 ba,pt %ncc, blkd32
912 add %i0, 64, %i0
913
914 seg1:
915 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
916 FALIGN_D2
917 ldda [%l7]ASI_BLK_P, %d0
918 stda %d48, [%i0]ASI_BLK_P
919 add %l7, 64, %l7
920 subcc %i3, 64, %i3
921 bz,pn %ncc, 0f
922 add %i0, 64, %i0
923 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
924 FALIGN_D18
925 ldda [%l7]ASI_BLK_P, %d16
926 stda %d48, [%i0]ASI_BLK_P
927 add %l7, 64, %l7
928 subcc %i3, 64, %i3
929 bz,pn %ncc, 1f
930 add %i0, 64, %i0
931 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
932 FALIGN_D34
933 ldda [%l7]ASI_BLK_P, %d32
934 stda %d48, [%i0]ASI_BLK_P
935 add %l7, 64, %l7
936 subcc %i3, 64, %i3
937 bz,pn %ncc, 2f
938 add %i0, 64, %i0
939 ba,a,pt %ncc, seg1
940 0:
941 FALIGN_D18
942 stda %d48, [%i0]ASI_BLK_P
943 add %i0, 64, %i0
944 membar #Sync
945 FALIGN_D34
946 stda %d48, [%i0]ASI_BLK_P
947 ba,pt %ncc, blkd2
948 add %i0, 64, %i0
949
950 1:
951 FALIGN_D34
952 stda %d48, [%i0]ASI_BLK_P
953 add %i0, 64, %i0
954 membar #Sync
955 FALIGN_D2
956 stda %d48, [%i0]ASI_BLK_P
957 ba,pt %ncc, blkd18
958 add %i0, 64, %i0
959
960 2:
961 FALIGN_D2
962 stda %d48, [%i0]ASI_BLK_P
963 add %i0, 64, %i0
964 membar #Sync
965 FALIGN_D18
966 stda %d48, [%i0]ASI_BLK_P
967 ba,pt %ncc, blkd34
968 add %i0, 64, %i0
969
970 seg2:
971 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
972 FALIGN_D4
973 ldda [%l7]ASI_BLK_P, %d0
974 stda %d48, [%i0]ASI_BLK_P
975 add %l7, 64, %l7
976 subcc %i3, 64, %i3
977 bz,pn %ncc, 0f
978 add %i0, 64, %i0
979 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
980 FALIGN_D20
981 ldda [%l7]ASI_BLK_P, %d16
982 stda %d48, [%i0]ASI_BLK_P
983 add %l7, 64, %l7
984 subcc %i3, 64, %i3
985 bz,pn %ncc, 1f
986 add %i0, 64, %i0
987 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
988 FALIGN_D36
989 ldda [%l7]ASI_BLK_P, %d32
990 stda %d48, [%i0]ASI_BLK_P
991 add %l7, 64, %l7
992 subcc %i3, 64, %i3
993 bz,pn %ncc, 2f
994 add %i0, 64, %i0
995 ba,a,pt %ncc, seg2
996
997 0:
998 FALIGN_D20
999 stda %d48, [%i0]ASI_BLK_P
1000 add %i0, 64, %i0
1001 membar #Sync
1002 FALIGN_D36
1003 stda %d48, [%i0]ASI_BLK_P
1004 ba,pt %ncc, blkd4
1005 add %i0, 64, %i0
1006
1007 1:
1008 FALIGN_D36
1009 stda %d48, [%i0]ASI_BLK_P
1010 add %i0, 64, %i0
1011 membar #Sync
1012 FALIGN_D4
1013 stda %d48, [%i0]ASI_BLK_P
1014 ba,pt %ncc, blkd20
1015 add %i0, 64, %i0
1016
1017 2:
1018 FALIGN_D4
1019 stda %d48, [%i0]ASI_BLK_P
1020 add %i0, 64, %i0
1021 membar #Sync
1022 FALIGN_D20
1023 stda %d48, [%i0]ASI_BLK_P
1024 ba,pt %ncc, blkd36
1025 add %i0, 64, %i0
1026
1027 seg3:
1028 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1029 FALIGN_D6
1030 ldda [%l7]ASI_BLK_P, %d0
1031 stda %d48, [%i0]ASI_BLK_P
1032 add %l7, 64, %l7
1033 subcc %i3, 64, %i3
1034 bz,pn %ncc, 0f
1035 add %i0, 64, %i0
1036 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1037 FALIGN_D22
1038 ldda [%l7]ASI_BLK_P, %d16
1039 stda %d48, [%i0]ASI_BLK_P
1040 add %l7, 64, %l7
1041 subcc %i3, 64, %i3
1042 bz,pn %ncc, 1f
1043 add %i0, 64, %i0
1044 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1045 FALIGN_D38
1046 ldda [%l7]ASI_BLK_P, %d32
1047 stda %d48, [%i0]ASI_BLK_P
1048 add %l7, 64, %l7
1049 subcc %i3, 64, %i3
1050 bz,pn %ncc, 2f
1051 add %i0, 64, %i0
1052 ba,a,pt %ncc, seg3
1053
1054 0:
1055 FALIGN_D22
1056 stda %d48, [%i0]ASI_BLK_P
1057 add %i0, 64, %i0
1058 membar #Sync
1059 FALIGN_D38
1060 stda %d48, [%i0]ASI_BLK_P
1061 ba,pt %ncc, blkd6
1062 add %i0, 64, %i0
1063
1064 1:
1065 FALIGN_D38
1066 stda %d48, [%i0]ASI_BLK_P
1067 add %i0, 64, %i0
1068 membar #Sync
1069 FALIGN_D6
1070 stda %d48, [%i0]ASI_BLK_P
1071 ba,pt %ncc, blkd22
1072 add %i0, 64, %i0
1073
1074 2:
1075 FALIGN_D6
1076 stda %d48, [%i0]ASI_BLK_P
1077 add %i0, 64, %i0
1078 membar #Sync
1079 FALIGN_D22
1080 stda %d48, [%i0]ASI_BLK_P
1081 ba,pt %ncc, blkd38
1082 add %i0, 64, %i0
1083
1084 seg4:
1085 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1086 FALIGN_D8
1087 ldda [%l7]ASI_BLK_P, %d0
1088 stda %d48, [%i0]ASI_BLK_P
1089 add %l7, 64, %l7
1090 subcc %i3, 64, %i3
1091 bz,pn %ncc, 0f
1092 add %i0, 64, %i0
1093 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1094 FALIGN_D24
1095 ldda [%l7]ASI_BLK_P, %d16
1096 stda %d48, [%i0]ASI_BLK_P
1097 add %l7, 64, %l7
1098 subcc %i3, 64, %i3
1099 bz,pn %ncc, 1f
1100 add %i0, 64, %i0
1101 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1102 FALIGN_D40
1103 ldda [%l7]ASI_BLK_P, %d32
1104 stda %d48, [%i0]ASI_BLK_P
1105 add %l7, 64, %l7
1106 subcc %i3, 64, %i3
1107 bz,pn %ncc, 2f
1108 add %i0, 64, %i0
1109 ba,a,pt %ncc, seg4
1110
1111 0:
1112 FALIGN_D24
1113 stda %d48, [%i0]ASI_BLK_P
1114 add %i0, 64, %i0
1115 membar #Sync
1116 FALIGN_D40
1117 stda %d48, [%i0]ASI_BLK_P
1118 ba,pt %ncc, blkd8
1119 add %i0, 64, %i0
1120
1121 1:
1122 FALIGN_D40
1123 stda %d48, [%i0]ASI_BLK_P
1124 add %i0, 64, %i0
1125 membar #Sync
1126 FALIGN_D8
1127 stda %d48, [%i0]ASI_BLK_P
1128 ba,pt %ncc, blkd24
1129 add %i0, 64, %i0
1130
1131 2:
1132 FALIGN_D8
1133 stda %d48, [%i0]ASI_BLK_P
1134 add %i0, 64, %i0
1135 membar #Sync
1136 FALIGN_D24
1137 stda %d48, [%i0]ASI_BLK_P
1138 ba,pt %ncc, blkd40
1139 add %i0, 64, %i0
1140
1141 seg5:
1142 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1143 FALIGN_D10
1144 ldda [%l7]ASI_BLK_P, %d0
1145 stda %d48, [%i0]ASI_BLK_P
1146 add %l7, 64, %l7
1147 subcc %i3, 64, %i3
1148 bz,pn %ncc, 0f
1149 add %i0, 64, %i0
1150 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1151 FALIGN_D26
1152 ldda [%l7]ASI_BLK_P, %d16
1153 stda %d48, [%i0]ASI_BLK_P
1154 add %l7, 64, %l7
1155 subcc %i3, 64, %i3
1156 bz,pn %ncc, 1f
1157 add %i0, 64, %i0
1158 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1159 FALIGN_D42
1160 ldda [%l7]ASI_BLK_P, %d32
1161 stda %d48, [%i0]ASI_BLK_P
1162 add %l7, 64, %l7
1163 subcc %i3, 64, %i3
1164 bz,pn %ncc, 2f
1165 add %i0, 64, %i0
1166 ba,a,pt %ncc, seg5
1167
1168 0:
1169 FALIGN_D26
1170 stda %d48, [%i0]ASI_BLK_P
1171 add %i0, 64, %i0
1172 membar #Sync
1173 FALIGN_D42
1174 stda %d48, [%i0]ASI_BLK_P
1175 ba,pt %ncc, blkd10
1176 add %i0, 64, %i0
1177
1178 1:
1179 FALIGN_D42
1180 stda %d48, [%i0]ASI_BLK_P
1181 add %i0, 64, %i0
1182 membar #Sync
1183 FALIGN_D10
1184 stda %d48, [%i0]ASI_BLK_P
1185 ba,pt %ncc, blkd26
1186 add %i0, 64, %i0
1187
1188 2:
1189 FALIGN_D10
1190 stda %d48, [%i0]ASI_BLK_P
1191 add %i0, 64, %i0
1192 membar #Sync
1193 FALIGN_D26
1194 stda %d48, [%i0]ASI_BLK_P
1195 ba,pt %ncc, blkd42
1196 add %i0, 64, %i0
1197
1198 seg6:
1199 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1200 FALIGN_D12
1201 ldda [%l7]ASI_BLK_P, %d0
1202 stda %d48, [%i0]ASI_BLK_P
1203 add %l7, 64, %l7
1204 subcc %i3, 64, %i3
1205 bz,pn %ncc, 0f
1206 add %i0, 64, %i0
1207 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1208 FALIGN_D28
1209 ldda [%l7]ASI_BLK_P, %d16
1210 stda %d48, [%i0]ASI_BLK_P
1211 add %l7, 64, %l7
1212 subcc %i3, 64, %i3
1213 bz,pn %ncc, 1f
1214 add %i0, 64, %i0
1215 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1216 FALIGN_D44
1217 ldda [%l7]ASI_BLK_P, %d32
1218 stda %d48, [%i0]ASI_BLK_P
1219 add %l7, 64, %l7
1220 subcc %i3, 64, %i3
1221 bz,pn %ncc, 2f
1222 add %i0, 64, %i0
1223 ba,a,pt %ncc, seg6
1224
1225 0:
1226 FALIGN_D28
1227 stda %d48, [%i0]ASI_BLK_P
1228 add %i0, 64, %i0
1229 membar #Sync
1230 FALIGN_D44
1231 stda %d48, [%i0]ASI_BLK_P
1232 ba,pt %ncc, blkd12
1233 add %i0, 64, %i0
1234
1235 1:
1236 FALIGN_D44
1237 stda %d48, [%i0]ASI_BLK_P
1238 add %i0, 64, %i0
1239 membar #Sync
1240 FALIGN_D12
1241 stda %d48, [%i0]ASI_BLK_P
1242 ba,pt %ncc, blkd28
1243 add %i0, 64, %i0
1244
1245 2:
1246 FALIGN_D12
1247 stda %d48, [%i0]ASI_BLK_P
1248 add %i0, 64, %i0
1249 membar #Sync
1250 FALIGN_D28
1251 stda %d48, [%i0]ASI_BLK_P
1252 ba,pt %ncc, blkd44
1253 add %i0, 64, %i0
1254
1255 seg7:
1256 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1257 FALIGN_D14
1258 ldda [%l7]ASI_BLK_P, %d0
1259 stda %d48, [%i0]ASI_BLK_P
1260 add %l7, 64, %l7
1261 subcc %i3, 64, %i3
1262 bz,pn %ncc, 0f
1263 add %i0, 64, %i0
1264 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1265 FALIGN_D30
1266 ldda [%l7]ASI_BLK_P, %d16
1267 stda %d48, [%i0]ASI_BLK_P
1268 add %l7, 64, %l7
1269 subcc %i3, 64, %i3
1270 bz,pn %ncc, 1f
1271 add %i0, 64, %i0
1272 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1273 FALIGN_D46
1274 ldda [%l7]ASI_BLK_P, %d32
1275 stda %d48, [%i0]ASI_BLK_P
1276 add %l7, 64, %l7
1277 subcc %i3, 64, %i3
1278 bz,pn %ncc, 2f
1279 add %i0, 64, %i0
1280 ba,a,pt %ncc, seg7
1281
1282 0:
1283 FALIGN_D30
1284 stda %d48, [%i0]ASI_BLK_P
1285 add %i0, 64, %i0
1286 membar #Sync
1287 FALIGN_D46
1288 stda %d48, [%i0]ASI_BLK_P
1289 ba,pt %ncc, blkd14
1290 add %i0, 64, %i0
1291
1292 1:
1293 FALIGN_D46
1294 stda %d48, [%i0]ASI_BLK_P
1295 add %i0, 64, %i0
1296 membar #Sync
1297 FALIGN_D14
1298 stda %d48, [%i0]ASI_BLK_P
1299 ba,pt %ncc, blkd30
1300 add %i0, 64, %i0
1301
1302 2:
1303 FALIGN_D14
1304 stda %d48, [%i0]ASI_BLK_P
1305 add %i0, 64, %i0
1306 membar #Sync
1307 FALIGN_D30
1308 stda %d48, [%i0]ASI_BLK_P
1309 ba,pt %ncc, blkd46
1310 add %i0, 64, %i0
1311
1312
1313 !
1314 ! dribble out the last partial block
1315 !
1316 blkd0:
1317 subcc %i4, 8, %i4
1318 blu,pn %ncc, blkdone
1319 faligndata %d0, %d2, %d48
1320 std %d48, [%i0]
1321 add %i0, 8, %i0
1322 blkd2:
1323 subcc %i4, 8, %i4
1324 blu,pn %ncc, blkdone
1325 faligndata %d2, %d4, %d48
1326 std %d48, [%i0]
1327 add %i0, 8, %i0
1328 blkd4:
1329 subcc %i4, 8, %i4
1330 blu,pn %ncc, blkdone
1331 faligndata %d4, %d6, %d48
1332 std %d48, [%i0]
1333 add %i0, 8, %i0
1334 blkd6:
1335 subcc %i4, 8, %i4
1336 blu,pn %ncc, blkdone
1337 faligndata %d6, %d8, %d48
1338 std %d48, [%i0]
1339 add %i0, 8, %i0
1340 blkd8:
1341 subcc %i4, 8, %i4
1342 blu,pn %ncc, blkdone
1343 faligndata %d8, %d10, %d48
1344 std %d48, [%i0]
1345 add %i0, 8, %i0
1346 blkd10:
1347 subcc %i4, 8, %i4
1348 blu,pn %ncc, blkdone
1349 faligndata %d10, %d12, %d48
1350 std %d48, [%i0]
1351 add %i0, 8, %i0
1352 blkd12:
1353 subcc %i4, 8, %i4
1354 blu,pn %ncc, blkdone
1355 faligndata %d12, %d14, %d48
1356 std %d48, [%i0]
1357 add %i0, 8, %i0
1358 blkd14:
1359 subcc %i4, 8, %i4
1360 blu,pn %ncc, blkdone
1361 fsrc1 %d14, %d0
1362 ba,a,pt %ncc, blkleft
1363
1364 blkd16:
1365 subcc %i4, 8, %i4
1366 blu,pn %ncc, blkdone
1367 faligndata %d16, %d18, %d48
1368 std %d48, [%i0]
1369 add %i0, 8, %i0
1370 blkd18:
1371 subcc %i4, 8, %i4
1372 blu,pn %ncc, blkdone
1373 faligndata %d18, %d20, %d48
1374 std %d48, [%i0]
1375 add %i0, 8, %i0
1376 blkd20:
1377 subcc %i4, 8, %i4
1378 blu,pn %ncc, blkdone
1379 faligndata %d20, %d22, %d48
1380 std %d48, [%i0]
1381 add %i0, 8, %i0
1382 blkd22:
1383 subcc %i4, 8, %i4
1384 blu,pn %ncc, blkdone
1385 faligndata %d22, %d24, %d48
1386 std %d48, [%i0]
1387 add %i0, 8, %i0
1388 blkd24:
1389 subcc %i4, 8, %i4
1390 blu,pn %ncc, blkdone
1391 faligndata %d24, %d26, %d48
1392 std %d48, [%i0]
1393 add %i0, 8, %i0
1394 blkd26:
1395 subcc %i4, 8, %i4
1396 blu,pn %ncc, blkdone
1397 faligndata %d26, %d28, %d48
1398 std %d48, [%i0]
1399 add %i0, 8, %i0
1400 blkd28:
1401 subcc %i4, 8, %i4
1402 blu,pn %ncc, blkdone
1403 faligndata %d28, %d30, %d48
1404 std %d48, [%i0]
1405 add %i0, 8, %i0
1406 blkd30:
1407 subcc %i4, 8, %i4
1408 blu,pn %ncc, blkdone
1409 fsrc1 %d30, %d0
1410 ba,a,pt %ncc, blkleft
1411 blkd32:
1412 subcc %i4, 8, %i4
1413 blu,pn %ncc, blkdone
1414 faligndata %d32, %d34, %d48
1415 std %d48, [%i0]
1416 add %i0, 8, %i0
1417 blkd34:
1418 subcc %i4, 8, %i4
1419 blu,pn %ncc, blkdone
1420 faligndata %d34, %d36, %d48
1421 std %d48, [%i0]
1422 add %i0, 8, %i0
1423 blkd36:
1424 subcc %i4, 8, %i4
1425 blu,pn %ncc, blkdone
1426 faligndata %d36, %d38, %d48
1427 std %d48, [%i0]
1428 add %i0, 8, %i0
1429 blkd38:
1430 subcc %i4, 8, %i4
1431 blu,pn %ncc, blkdone
1432 faligndata %d38, %d40, %d48
1433 std %d48, [%i0]
1434 add %i0, 8, %i0
1435 blkd40:
1436 subcc %i4, 8, %i4
1437 blu,pn %ncc, blkdone
1438 faligndata %d40, %d42, %d48
1439 std %d48, [%i0]
1440 add %i0, 8, %i0
1441 blkd42:
1442 subcc %i4, 8, %i4
1443 blu,pn %ncc, blkdone
1444 faligndata %d42, %d44, %d48
1445 std %d48, [%i0]
1446 add %i0, 8, %i0
1447 blkd44:
1448 subcc %i4, 8, %i4
1449 blu,pn %ncc, blkdone
1450 faligndata %d44, %d46, %d48
1451 std %d48, [%i0]
1452 add %i0, 8, %i0
1453 blkd46:
1454 subcc %i4, 8, %i4
1455 blu,pn %ncc, blkdone
1456 fsrc1 %d46, %d0
1457
1458 blkleft:
1459 1:
1460 ldd [%l7], %d2
1461 add %l7, 8, %l7
1462 subcc %i4, 8, %i4
1463 faligndata %d0, %d2, %d8
1464 std %d8, [%i0]
1465 blu,pn %ncc, blkdone
1466 add %i0, 8, %i0
1467 ldd [%l7], %d0
1468 add %l7, 8, %l7
1469 subcc %i4, 8, %i4
1470 faligndata %d2, %d0, %d8
1471 std %d8, [%i0]
1472 bgeu,pt %ncc, 1b
1473 add %i0, 8, %i0
1474
1475 blkdone:
1476 tst %i2
1477 bz,pt %ncc, .bcb_exit
1478 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0
1479
1480 7: ldub [%i1], %i4
1481 inc %i1
1482 inc %i0
1483 deccc %i2
1484 bgu,pt %ncc, 7b
1485 stb %i4, [%i0 - 1]
1486
1487 .bcb_exit:
1488 membar #StoreLoad|#StoreStore
1489 btst FPUSED_FLAG, %l6
1490 bz %icc, 1f
1491 and %l6, COPY_FLAGS, %l1 ! Store flags in %l1
1492 ! We can't clear the flags from %l6 yet.
1493 ! If there's an error, .copyerr will
1494 ! need them
1495
1496 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
1497 wr %o2, 0, %gsr
1498
1499 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
1500 btst FPRS_FEF, %o3
1501 bz %icc, 4f
1502 nop
1503
1504 ! restore fpregs from stack
1505 membar #Sync
1506 add %fp, STACK_BIAS - 257, %o2
1507 and %o2, -64, %o2
1508 ldda [%o2]ASI_BLK_P, %d0
1509 add %o2, 64, %o2
1510 ldda [%o2]ASI_BLK_P, %d16
1511 add %o2, 64, %o2
1512 ldda [%o2]ASI_BLK_P, %d32
1513 add %o2, 64, %o2
1514 ldda [%o2]ASI_BLK_P, %d48
1515 membar #Sync
1516
1517 ba,pt %ncc, 2f
1518 wr %o3, 0, %fprs ! restore fprs
1519
1520 4:
1521 FZERO ! zero all of the fpregs
1522 wr %o3, 0, %fprs ! restore fprs
1523
1524 2: ldn [THREAD_REG + T_LWP], %o2
1525 tst %o2
1526 bnz,pt %ncc, 1f
1527 nop
1528
1529 ldsb [THREAD_REG + T_PREEMPT], %l0
1530 deccc %l0
1531 bnz,pn %ncc, 1f
1532 stb %l0, [THREAD_REG + T_PREEMPT]
1533
1534 ! Check for a kernel preemption request
1535 ldn [THREAD_REG + T_CPU], %l0
1536 ldub [%l0 + CPU_KPRUNRUN], %l0
1537 tst %l0
1538 bnz,a,pt %ncc, 1f ! Need to call kpreempt?
1539 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag
1540
1541 1:
1542 btst BCOPY_FLAG, %l1
1543 bz,pn %icc, 3f
1544 andncc %l6, COPY_FLAGS, %l6
1545
1546 !
1547 ! Here via bcopy. Check to see if the handler was NULL.
1548 ! If so, just return quietly. Otherwise, reset the
1549 ! handler and go home.
1550 !
1551 bnz,pn %ncc, 3f
1552 nop
1553
1554 !
1555 ! Null handler. Check for kpreempt flag, call if necessary,
1556 ! then return.
1557 !
1558 btst KPREEMPT_FLAG, %l1
1559 bz,pt %icc, 2f
1560 nop
1561 call kpreempt
1562 rdpr %pil, %o0 ! pass %pil
1563 2:
1564 ret
1565 restore %g0, 0, %o0
1566
1567 !
1568 ! Here via kcopy or bcopy with a handler.Reset the
1569 ! fault handler.
1570 !
1571 3:
1572 membar #Sync
1573 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1574
1575 ! call kpreempt if necessary
1576 btst KPREEMPT_FLAG, %l1
1577 bz,pt %icc, 4f
1578 nop
1579 call kpreempt
1580 rdpr %pil, %o0
1581 4:
1582 ret
1583 restore %g0, 0, %o0
1584
1585 .bcb_punt:
1586 !
1587 ! use aligned transfers where possible
1588 !
1589 xor %i0, %i1, %o4 ! xor from and to address
1590 btst 7, %o4 ! if lower three bits zero
1591 bz %icc, .aldoubcp ! can align on double boundary
1592 .empty ! assembler complaints about label
1593
1594 xor %i0, %i1, %o4 ! xor from and to address
1595 btst 3, %o4 ! if lower two bits zero
1596 bz %icc, .alwordcp ! can align on word boundary
1597 btst 3, %i0 ! delay slot, from address unaligned?
1598 !
1599 ! use aligned reads and writes where possible
1600 ! this differs from wordcp in that it copes
1601 ! with odd alignment between source and destnation
1602 ! using word reads and writes with the proper shifts
1603 ! in between to align transfers to and from memory
1604 ! i0 - src address, i1 - dest address, i2 - count
1605 ! i3, i4 - tmps for used generating complete word
1606 ! i5 (word to write)
1607 ! l0 size in bits of upper part of source word (US)
1608 ! l1 size in bits of lower part of source word (LS = 32 - US)
1609 ! l2 size in bits of upper part of destination word (UD)
1610 ! l3 size in bits of lower part of destination word (LD = 32 - UD)
1611 ! l4 number of bytes leftover after aligned transfers complete
1612 ! l5 the number 32
1613 !
1614 mov 32, %l5 ! load an oft-needed constant
1615 bz .align_dst_only
1616 btst 3, %i1 ! is destnation address aligned?
1617 clr %i4 ! clear registers used in either case
1618 bz %icc, .align_src_only
1619 clr %l0
1620 !
1621 ! both source and destination addresses are unaligned
1622 !
1623 1: ! align source
1624 ldub [%i0], %i3 ! read a byte from source address
1625 add %i0, 1, %i0 ! increment source address
1626 or %i4, %i3, %i4 ! or in with previous bytes (if any)
1627 btst 3, %i0 ! is source aligned?
1628 add %l0, 8, %l0 ! increment size of upper source (US)
1629 bnz,a 1b
1630 sll %i4, 8, %i4 ! make room for next byte
1631
1632 sub %l5, %l0, %l1 ! generate shift left count (LS)
1633 sll %i4, %l1, %i4 ! prepare to get rest
1634 ld [%i0], %i3 ! read a word
1635 add %i0, 4, %i0 ! increment source address
1636 srl %i3, %l0, %i5 ! upper src bits into lower dst bits
1637 or %i4, %i5, %i5 ! merge
1638 mov 24, %l3 ! align destination
1639 1:
1640 srl %i5, %l3, %i4 ! prepare to write a single byte
1641 stb %i4, [%i1] ! write a byte
1642 add %i1, 1, %i1 ! increment destination address
1643 sub %i2, 1, %i2 ! decrement count
1644 btst 3, %i1 ! is destination aligned?
1645 bnz,a 1b
1646 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD)
1647 sub %l5, %l3, %l2 ! generate shift left count (UD)
1648 sll %i5, %l2, %i5 ! move leftover into upper bytes
1649 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left
1650 bgu %ncc, .more_needed ! need more to fill than we have
1651 nop
1652
1653 sll %i3, %l1, %i3 ! clear upper used byte(s)
1654 srl %i3, %l1, %i3
1655 ! get the odd bytes between alignments
1656 sub %l0, %l2, %l0 ! regenerate shift count
1657 sub %l5, %l0, %l1 ! generate new shift left count (LS)
1658 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
1659 andn %i2, 3, %i2 ! # of aligned bytes that can be moved
1660 srl %i3, %l0, %i4
1661 or %i5, %i4, %i5
1662 st %i5, [%i1] ! write a word
1663 subcc %i2, 4, %i2 ! decrement count
1664 bz %ncc, .unalign_out
1665 add %i1, 4, %i1 ! increment destination address
1666
1667 b 2f
1668 sll %i3, %l1, %i5 ! get leftover into upper bits
1669 .more_needed:
1670 sll %i3, %l0, %i3 ! save remaining byte(s)
1671 srl %i3, %l0, %i3
1672 sub %l2, %l0, %l1 ! regenerate shift count
1673 sub %l5, %l1, %l0 ! generate new shift left count
1674 sll %i3, %l1, %i4 ! move to fill empty space
1675 b 3f
1676 or %i5, %i4, %i5 ! merge to complete word
1677 !
1678 ! the source address is aligned and destination is not
1679 !
1680 .align_dst_only:
1681 ld [%i0], %i4 ! read a word
1682 add %i0, 4, %i0 ! increment source address
1683 mov 24, %l0 ! initial shift alignment count
1684 1:
1685 srl %i4, %l0, %i3 ! prepare to write a single byte
1686 stb %i3, [%i1] ! write a byte
1687 add %i1, 1, %i1 ! increment destination address
1688 sub %i2, 1, %i2 ! decrement count
1689 btst 3, %i1 ! is destination aligned?
1690 bnz,a 1b
1691 sub %l0, 8, %l0 ! delay slot, decrement shift count
1692 .xfer:
1693 sub %l5, %l0, %l1 ! generate shift left count
1694 sll %i4, %l1, %i5 ! get leftover
1695 3:
1696 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
1697 andn %i2, 3, %i2 ! # of aligned bytes that can be moved
1698 2:
1699 ld [%i0], %i3 ! read a source word
1700 add %i0, 4, %i0 ! increment source address
1701 srl %i3, %l0, %i4 ! upper src bits into lower dst bits
1702 or %i5, %i4, %i5 ! merge with upper dest bits (leftover)
1703 st %i5, [%i1] ! write a destination word
1704 subcc %i2, 4, %i2 ! decrement count
1705 bz %ncc, .unalign_out ! check if done
1706 add %i1, 4, %i1 ! increment destination address
1707 b 2b ! loop
1708 sll %i3, %l1, %i5 ! get leftover
1709 .unalign_out:
1710 tst %l4 ! any bytes leftover?
1711 bz %ncc, .cpdone
1712 .empty ! allow next instruction in delay slot
1713 1:
1714 sub %l0, 8, %l0 ! decrement shift
1715 srl %i3, %l0, %i4 ! upper src byte into lower dst byte
1716 stb %i4, [%i1] ! write a byte
1717 subcc %l4, 1, %l4 ! decrement count
1718 bz %ncc, .cpdone ! done?
1719 add %i1, 1, %i1 ! increment destination
1720 tst %l0 ! any more previously read bytes
1721 bnz %ncc, 1b ! we have leftover bytes
1722 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants
1723 b .dbytecp ! let dbytecp do the rest
1724 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
1725 !
1726 ! the destination address is aligned and the source is not
1727 !
1728 .align_src_only:
1729 ldub [%i0], %i3 ! read a byte from source address
1730 add %i0, 1, %i0 ! increment source address
1731 or %i4, %i3, %i4 ! or in with previous bytes (if any)
1732 btst 3, %i0 ! is source aligned?
1733 add %l0, 8, %l0 ! increment shift count (US)
1734 bnz,a .align_src_only
1735 sll %i4, 8, %i4 ! make room for next byte
1736 b,a .xfer
1737 !
1738 ! if from address unaligned for double-word moves,
1739 ! move bytes till it is, if count is < 56 it could take
1740 ! longer to align the thing than to do the transfer
1741 ! in word size chunks right away
1742 !
1743 .aldoubcp:
1744 cmp %i2, 56 ! if count < 56, use wordcp, it takes
1745 blu,a %ncc, .alwordcp ! longer to align doubles than words
1746 mov 3, %o0 ! mask for word alignment
1747 call .alignit ! copy bytes until aligned
1748 mov 7, %o0 ! mask for double alignment
1749 !
1750 ! source and destination are now double-word aligned
1751 ! i3 has aligned count returned by alignit
1752 !
1753 and %i2, 7, %i2 ! unaligned leftover count
1754 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
1755 5:
1756 ldx [%i0+%i1], %o4 ! read from address
1757 stx %o4, [%i1] ! write at destination address
1758 subcc %i3, 8, %i3 ! dec count
1759 bgu %ncc, 5b
1760 add %i1, 8, %i1 ! delay slot, inc to address
1761 cmp %i2, 4 ! see if we can copy a word
1762 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp
1763 .empty
1764 !
1765 ! for leftover bytes we fall into wordcp, if needed
1766 !
1767 .wordcp:
1768 and %i2, 3, %i2 ! unaligned leftover count
1769 5:
1770 ld [%i0+%i1], %o4 ! read from address
1771 st %o4, [%i1] ! write at destination address
1772 subcc %i3, 4, %i3 ! dec count
1773 bgu %ncc, 5b
1774 add %i1, 4, %i1 ! delay slot, inc to address
1775 b,a .dbytecp
1776
1777 ! we come here to align copies on word boundaries
1778 .alwordcp:
1779 call .alignit ! go word-align it
1780 mov 3, %o0 ! bits that must be zero to be aligned
1781 b .wordcp
1782 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
1783
1784 !
1785 ! byte copy, works with any alignment
1786 !
1787 .bytecp:
1788 b .dbytecp
1789 sub %i0, %i1, %i0 ! i0 gets difference of src and dst
1790
1791 !
1792 ! differenced byte copy, works with any alignment
1793 ! assumes dest in %i1 and (source - dest) in %i0
1794 !
1795 1:
1796 stb %o4, [%i1] ! write to address
1797 inc %i1 ! inc to address
1798 .dbytecp:
1799 deccc %i2 ! dec count
1800 bgeu,a %ncc, 1b ! loop till done
1801 ldub [%i0+%i1], %o4 ! read from address
1802 !
1803 ! FPUSED_FLAG will not have been set in any path leading to
1804 ! this point. No need to deal with it.
1805 !
1806 .cpdone:
1807 btst BCOPY_FLAG, %l6
1808 bz,pn %icc, 2f
1809 andncc %l6, BCOPY_FLAG, %l6
1810 !
1811 ! Here via bcopy. Check to see if the handler was NULL.
1812 ! If so, just return quietly. Otherwise, reset the
1813 ! handler and go home.
1814 !
1815 bnz,pn %ncc, 2f
1816 nop
1817 !
1818 ! Null handler.
1819 !
1820 ret
1821 restore %g0, 0, %o0
1822 !
1823 ! Here via kcopy or bcopy with a handler.Reset the
1824 ! fault handler.
1825 !
1826 2:
1827 membar #Sync
1828 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1829 ret
1830 restore %g0, 0, %o0 ! return (0)
1831
1832 /*
1833 * Common code used to align transfers on word and doubleword
1834 * boudaries. Aligns source and destination and returns a count
1835 * of aligned bytes to transfer in %i3
1836 */
1837 1:
1838 inc %i0 ! inc from
1839 stb %o4, [%i1] ! write a byte
1840 inc %i1 ! inc to
1841 dec %i2 ! dec count
1842 .alignit:
1843 btst %o0, %i0 ! %o0 is bit mask to check for alignment
1844 bnz,a 1b
1845 ldub [%i0], %o4 ! read next byte
1846
1847 retl
1848 andn %i2, %o0, %i3 ! return size of aligned bytes
1849 SET_SIZE(bcopy)
1850
1851 /*
1852 * Block copy with possibly overlapped operands.
1853 */
1854
1855 ENTRY(ovbcopy)
1856 tst %o2 ! check count
1857 bgu,a %ncc, 1f ! nothing to do or bad arguments
1858 subcc %o0, %o1, %o3 ! difference of from and to address
1859
1860 retl ! return
1861 nop
1862 1:
1863 bneg,a %ncc, 2f
1864 neg %o3 ! if < 0, make it positive
1865 2: cmp %o2, %o3 ! cmp size and abs(from - to)
1866 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy,
1867 .empty ! no overlap
1868 cmp %o0, %o1 ! compare from and to addresses
1869 blu %ncc, .ov_bkwd ! if from < to, copy backwards
1870 nop
1871 !
1872 ! Copy forwards.
1873 !
1874 .ov_fwd:
1875 ldub [%o0], %o3 ! read from address
1876 inc %o0 ! inc from address
1877 stb %o3, [%o1] ! write to address
1878 deccc %o2 ! dec count
1879 bgu %ncc, .ov_fwd ! loop till done
1880 inc %o1 ! inc to address
1881
1882 retl ! return
1883 nop
1884 !
1885 ! Copy backwards.
1886 !
1887 .ov_bkwd:
1888 deccc %o2 ! dec count
1889 ldub [%o0 + %o2], %o3 ! get byte at end of src
1890 bgu %ncc, .ov_bkwd ! loop till done
1891 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst
1892
1893 retl ! return
1894 nop
1895 SET_SIZE(ovbcopy)
1896
1897 /*
1898 * hwblkpagecopy()
1899 *
1900 * Copies exactly one page. This routine assumes the caller (ppcopy)
1901 * has already disabled kernel preemption and has checked
1902 * use_hw_bcopy.
1903 */
1904 ENTRY(hwblkpagecopy)
1905 ! get another window w/space for three aligned blocks of saved fpregs
1906 save %sp, -SA(MINFRAME + 4*64), %sp
1907
1908 ! %i0 - source address (arg)
1909 ! %i1 - destination address (arg)
1910 ! %i2 - length of region (not arg)
1911 ! %l0 - saved fprs
1912 ! %l1 - pointer to saved fpregs
1913
1914 rd %fprs, %l0 ! check for unused fp
1915 btst FPRS_FEF, %l0
1916 bz 1f
1917 membar #Sync
1918
1919 ! save in-use fpregs on stack
1920 add %fp, STACK_BIAS - 193, %l1
1921 and %l1, -64, %l1
1922 stda %d0, [%l1]ASI_BLK_P
1923 add %l1, 64, %l3
1924 stda %d16, [%l3]ASI_BLK_P
1925 add %l3, 64, %l3
1926 stda %d32, [%l3]ASI_BLK_P
1927 membar #Sync
1928
1929 1: wr %g0, FPRS_FEF, %fprs
1930 ldda [%i0]ASI_BLK_P, %d0
1931 add %i0, 64, %i0
1932 set PAGESIZE - 64, %i2
1933
1934 2: ldda [%i0]ASI_BLK_P, %d16
1935 fsrc1 %d0, %d32
1936 fsrc1 %d2, %d34
1937 fsrc1 %d4, %d36
1938 fsrc1 %d6, %d38
1939 fsrc1 %d8, %d40
1940 fsrc1 %d10, %d42
1941 fsrc1 %d12, %d44
1942 fsrc1 %d14, %d46
1943 stda %d32, [%i1]ASI_BLK_P
1944 add %i0, 64, %i0
1945 subcc %i2, 64, %i2
1946 bz,pn %ncc, 3f
1947 add %i1, 64, %i1
1948 ldda [%i0]ASI_BLK_P, %d0
1949 fsrc1 %d16, %d32
1950 fsrc1 %d18, %d34
1951 fsrc1 %d20, %d36
1952 fsrc1 %d22, %d38
1953 fsrc1 %d24, %d40
1954 fsrc1 %d26, %d42
1955 fsrc1 %d28, %d44
1956 fsrc1 %d30, %d46
1957 stda %d32, [%i1]ASI_BLK_P
1958 add %i0, 64, %i0
1959 sub %i2, 64, %i2
1960 ba,pt %ncc, 2b
1961 add %i1, 64, %i1
1962
1963 3: membar #Sync
1964 btst FPRS_FEF, %l0
1965 bz 4f
1966 stda %d16, [%i1]ASI_BLK_P
1967
1968 ! restore fpregs from stack
1969 membar #Sync
1970 ldda [%l1]ASI_BLK_P, %d0
1971 add %l1, 64, %l3
1972 ldda [%l3]ASI_BLK_P, %d16
1973 add %l3, 64, %l3
1974 ldda [%l3]ASI_BLK_P, %d32
1975
1976 4: wr %l0, 0, %fprs ! restore fprs
1977 membar #Sync
1978 ret
1979 restore %g0, 0, %o0
1980 SET_SIZE(hwblkpagecopy)
1981
1982
1983 /*
1984 * Transfer data to and from user space -
1985 * Note that these routines can cause faults
1986 * It is assumed that the kernel has nothing at
1987 * less than KERNELBASE in the virtual address space.
1988 *
1989 * Note that copyin(9F) and copyout(9F) are part of the
1990 * DDI/DKI which specifies that they return '-1' on "errors."
1991 *
1992 * Sigh.
1993 *
1994 * So there's two extremely similar routines - xcopyin() and xcopyout()
1995 * which return the errno that we've faithfully computed. This
1996 * allows other callers (e.g. uiomove(9F)) to work correctly.
1997 * Given that these are used pretty heavily, we expand the calling
1998 * sequences inline for all flavours (rather than making wrappers).
1999 *
2000 * There are also stub routines for xcopyout_little and xcopyin_little,
2001 * which currently are intended to handle requests of <= 16 bytes from
2002 * do_unaligned. Future enhancement to make them handle 8k pages efficiently
2003 * is left as an exercise...
2004 */
2005
2006 /*
2007 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
2008 *
2009 * General theory of operation:
2010 *
2011 * The only difference between default_copy{in,out} and
2012 * default_xcopy{in,out} is in the error handling routine they invoke
2013 * when a memory access error is seen. default_xcopyOP returns the errno
2014 * while default_copyOP returns -1 (see above). copy{in,out}_noerr set
2015 * a special flag (by oring the value 2 into the fault handler address)
2016 * if they are called with a fault handler already in place. That flag
2017 * causes the default handlers to trampoline to the previous handler
2018 * upon an error.
2019 *
2020 * None of the copyops routines grab a window until it's decided that
2021 * we need to do a HW block copy operation. This saves a window
2022 * spill/fill when we're called during socket ops. The typical IO
2023 * path won't cause spill/fill traps.
2024 *
2025 * This code uses a set of 4 limits for the maximum size that will
2026 * be copied given a particular input/output address alignment.
2027 * the default limits are:
2028 *
2029 * single byte aligned - 900 (hw_copy_limit_1)
2030 * two byte aligned - 1800 (hw_copy_limit_2)
2031 * four byte aligned - 3600 (hw_copy_limit_4)
2032 * eight byte aligned - 7200 (hw_copy_limit_8)
2033 *
2034 * If the value for a particular limit is zero, the copy will be done
2035 * via the copy loops rather than VIS.
2036 *
2037 * Flow:
2038 *
2039 * If count == zero return zero.
2040 *
2041 * Store the previous lo_fault handler into %g6.
2042 * Place our secondary lofault handler into %g5.
2043 * Place the address of our nowindow fault handler into %o3.
2044 * Place the address of the windowed fault handler into %o4.
2045 * --> We'll use this handler if we end up grabbing a window
2046 * --> before we use VIS instructions.
2047 *
2048 * If count is less than or equal to SMALL_LIMIT (7) we
2049 * always do a byte for byte copy.
2050 *
2051 * If count is > SMALL_LIMIT, we check the alignment of the input
2052 * and output pointers. Based on the alignment we check count
2053 * against a soft limit of VIS_COPY_THRESHOLD (900 on spitfire). If
2054 * we're larger than VIS_COPY_THRESHOLD, we check against a limit based
2055 * on detected alignment. If we exceed the alignment value we copy
2056 * via VIS instructions.
2057 *
2058 * If we don't exceed one of the limits, we store -count in %o3,
2059 * we store the number of chunks (8, 4, 2 or 1 byte) operated
2060 * on in our basic copy loop in %o2. Following this we branch
2061 * to the appropriate copy loop and copy that many chunks.
2062 * Since we've been adding the chunk size to %o3 each time through
2063 * as well as decrementing %o2, we can tell if any data is
2064 * is left to be copied by examining %o3. If that is zero, we're
2065 * done and can go home. If not, we figure out what the largest
2066 * chunk size left to be copied is and branch to that copy loop
2067 * unless there's only one byte left. We load that as we're
2068 * branching to code that stores it just before we return.
2069 *
2070 * There is one potential situation in which we start to do a VIS
2071 * copy but decide to punt and return to the copy loops. There is
2072 * (in the default configuration) a window of 256 bytes between
2073 * the single byte aligned copy limit and what VIS treats as its
2074 * minimum if floating point is in use in the calling app. We need
2075 * to be prepared to handle this. See the .small_copyOP label for
2076 * details.
2077 *
2078 * Fault handlers are invoked if we reference memory that has no
2079 * current mapping. All forms share the same copyio_fault handler.
2080 * This routine handles fixing up the stack and general housecleaning.
2081 * Each copy operation has a simple fault handler that is then called
2082 * to do the work specific to the invidual operation. The handlers
2083 * for default_copyOP and copyOP_noerr are found at the end of
2084 * default_copyout. The handlers for default_xcopyOP are found at the
2085 * end of xdefault_copyin.
2086 */
2087
2088 /*
2089 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
2090 */
2091
2092 /*
2093 * We save the arguments in the following registers in case of a fault:
2094 * kaddr - %g2
2095 * uaddr - %g3
2096 * count - %g4
2097 */
2098 #define SAVE_SRC %g2
2099 #define SAVE_DST %g3
2100 #define SAVE_COUNT %g4
2101
2102 #define REAL_LOFAULT %g5
2103 #define SAVED_LOFAULT %g6
2104
2105 /*
2106 * Generic copyio fault handler. This is the first line of defense when a
2107 * fault occurs in (x)copyin/(x)copyout. In order for this to function
2108 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
2109 * This allows us to share common code for all the flavors of the copy
2110 * operations, including the _noerr versions.
2111 *
2112 * Note that this function will restore the original input parameters before
2113 * calling REAL_LOFAULT. So the real handler can vector to the appropriate
2114 * member of the t_copyop structure, if needed.
2115 */
2116 ENTRY(copyio_fault)
2117 btst FPUSED_FLAG, SAVED_LOFAULT
2118 bz 1f
2119 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
2120
2121 membar #Sync
2122
2123 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
2124 wr %o2, 0, %gsr ! restore gsr
2125
2126 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
2127 btst FPRS_FEF, %o3
2128 bz 4f
2129 nop
2130
2131 ! restore fpregs from stack
2132 membar #Sync
2133 add %fp, STACK_BIAS - 257, %o2
2134 and %o2, -64, %o2
2135 ldda [%o2]ASI_BLK_P, %d0
2136 add %o2, 64, %o2
2137 ldda [%o2]ASI_BLK_P, %d16
2138 add %o2, 64, %o2
2139 ldda [%o2]ASI_BLK_P, %d32
2140 add %o2, 64, %o2
2141 ldda [%o2]ASI_BLK_P, %d48
2142 membar #Sync
2143
2144 ba,pt %ncc, 1f
2145 wr %o3, 0, %fprs ! restore fprs
2146
2147 4:
2148 FZERO ! zero all of the fpregs
2149 wr %o3, 0, %fprs ! restore fprs
2150
2151 1:
2152
2153 restore
2154
2155 mov SAVE_SRC, %o0
2156 mov SAVE_DST, %o1
2157 jmp REAL_LOFAULT
2158 mov SAVE_COUNT, %o2
2159 SET_SIZE(copyio_fault)
2160
2161 ENTRY(copyio_fault_nowindow)
2162 membar #Sync
2163 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
2164
2165 mov SAVE_SRC, %o0
2166 mov SAVE_DST, %o1
2167 jmp REAL_LOFAULT
2168 mov SAVE_COUNT, %o2
2169 SET_SIZE(copyio_fault_nowindow)
2170
2171 ENTRY(copyout)
2172 sethi %hi(.copyout_err), REAL_LOFAULT
2173 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
2174
2175 .do_copyout:
2176 !
2177 ! Check the length and bail if zero.
2178 !
2179 tst %o2
2180 bnz,pt %ncc, 1f
2181 nop
2182 retl
2183 clr %o0
2184 1:
2185 sethi %hi(copyio_fault), %o4
2186 or %o4, %lo(copyio_fault), %o4
2187 sethi %hi(copyio_fault_nowindow), %o3
2188 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
2189 or %o3, %lo(copyio_fault_nowindow), %o3
2190 membar #Sync
2191 stn %o3, [THREAD_REG + T_LOFAULT]
2192
2193 mov %o0, SAVE_SRC
2194 mov %o1, SAVE_DST
2195 mov %o2, SAVE_COUNT
2196
2197 !
2198 ! Check to see if we're more than SMALL_LIMIT (7 bytes).
2199 ! Run in leaf mode, using the %o regs as our input regs.
2200 !
2201 subcc %o2, SMALL_LIMIT, %o3
2202 bgu,a,pt %ncc, .dco_ns
2203 or %o0, %o1, %o3
2204 !
2205 ! What was previously ".small_copyout"
2206 ! Do full differenced copy.
2207 !
2208 .dcobcp:
2209 sub %g0, %o2, %o3 ! negate count
2210 add %o0, %o2, %o0 ! make %o0 point at the end
2211 add %o1, %o2, %o1 ! make %o1 point at the end
2212 ba,pt %ncc, .dcocl
2213 ldub [%o0 + %o3], %o4 ! load first byte
2214 !
2215 ! %o0 and %o2 point at the end and remain pointing at the end
2216 ! of their buffers. We pull things out by adding %o3 (which is
2217 ! the negation of the length) to the buffer end which gives us
2218 ! the curent location in the buffers. By incrementing %o3 we walk
2219 ! through both buffers without having to bump each buffer's
2220 ! pointer. A very fast 4 instruction loop.
2221 !
2222 .align 16
2223 .dcocl:
2224 stba %o4, [%o1 + %o3]ASI_USER
2225 inccc %o3
2226 bl,a,pt %ncc, .dcocl
2227 ldub [%o0 + %o3], %o4
2228 !
2229 ! We're done. Go home.
2230 !
2231 membar #Sync
2232 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
2233 retl
2234 clr %o0
2235 !
2236 ! Try aligned copies from here.
2237 !
2238 .dco_ns:
2239 ! %o0 = kernel addr (to be copied from)
2240 ! %o1 = user addr (to be copied to)
2241 ! %o2 = length
2242 ! %o3 = %o1 | %o2 (used for alignment checking)
2243 ! %o4 is alternate lo_fault
2244 ! %o5 is original lo_fault
2245 !
2246 ! See if we're single byte aligned. If we are, check the
2247 ! limit for single byte copies. If we're smaller or equal,
2248 ! bounce to the byte for byte copy loop. Otherwise do it in
2249 ! HW (if enabled).
2250 !
2251 btst 1, %o3
2252 bz,pt %icc, .dcoh8
2253 btst 7, %o3
2254 !
2255 ! Single byte aligned. Do we do it via HW or via
2256 ! byte for byte? Do a quick no memory reference
2257 ! check to pick up small copies.
2258 !
2259 subcc %o2, VIS_COPY_THRESHOLD, %o3
2260 bleu,pt %ncc, .dcobcp
2261 sethi %hi(hw_copy_limit_1), %o3
2262 !
2263 ! Big enough that we need to check the HW limit for
2264 ! this size copy.
2265 !
2266 ld [%o3 + %lo(hw_copy_limit_1)], %o3
2267 !
2268 ! Is HW copy on? If not, do everything byte for byte.
2269 !
2270 tst %o3
2271 bz,pn %icc, .dcobcp
2272 subcc %o3, %o2, %o3
2273 !
2274 ! If we're less than or equal to the single byte copy limit,
2275 ! bop to the copy loop.
2276 !
2277 bge,pt %ncc, .dcobcp
2278 nop
2279 !
2280 ! We're big enough and copy is on. Do it with HW.
2281 !
2282 ba,pt %ncc, .big_copyout
2283 nop
2284 .dcoh8:
2285 !
2286 ! 8 byte aligned?
2287 !
2288 bnz,a %ncc, .dcoh4
2289 btst 3, %o3
2290 !
2291 ! See if we're in the "small range".
2292 ! If so, go off and do the copy.
2293 ! If not, load the hard limit. %o3 is
2294 ! available for reuse.
2295 !
2296 subcc %o2, VIS_COPY_THRESHOLD, %o3
2297 bleu,pt %ncc, .dcos8
2298 sethi %hi(hw_copy_limit_8), %o3
2299 ld [%o3 + %lo(hw_copy_limit_8)], %o3
2300 !
2301 ! If it's zero, there's no HW bcopy.
2302 ! Bop off to the aligned copy.
2303 !
2304 tst %o3
2305 bz,pn %icc, .dcos8
2306 subcc %o3, %o2, %o3
2307 !
2308 ! We're negative if our size is larger than hw_copy_limit_8.
2309 !
2310 bge,pt %ncc, .dcos8
2311 nop
2312 !
2313 ! HW assist is on and we're large enough. Do it.
2314 !
2315 ba,pt %ncc, .big_copyout
2316 nop
2317 .dcos8:
2318 !
2319 ! Housekeeping for copy loops. Uses same idea as in the byte for
2320 ! byte copy loop above.
2321 !
2322 add %o0, %o2, %o0
2323 add %o1, %o2, %o1
2324 sub %g0, %o2, %o3
2325 ba,pt %ncc, .dodebc
2326 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy
2327 !
2328 ! 4 byte aligned?
2329 !
2330 .dcoh4:
2331 bnz,pn %ncc, .dcoh2
2332 !
2333 ! See if we're in the "small range".
2334 ! If so, go off an do the copy.
2335 ! If not, load the hard limit. %o3 is
2336 ! available for reuse.
2337 !
2338 subcc %o2, VIS_COPY_THRESHOLD, %o3
2339 bleu,pt %ncc, .dcos4
2340 sethi %hi(hw_copy_limit_4), %o3
2341 ld [%o3 + %lo(hw_copy_limit_4)], %o3
2342 !
2343 ! If it's zero, there's no HW bcopy.
2344 ! Bop off to the aligned copy.
2345 !
2346 tst %o3
2347 bz,pn %icc, .dcos4
2348 subcc %o3, %o2, %o3
2349 !
2350 ! We're negative if our size is larger than hw_copy_limit_4.
2351 !
2352 bge,pt %ncc, .dcos4
2353 nop
2354 !
2355 ! HW assist is on and we're large enough. Do it.
2356 !
2357 ba,pt %ncc, .big_copyout
2358 nop
2359 .dcos4:
2360 add %o0, %o2, %o0
2361 add %o1, %o2, %o1
2362 sub %g0, %o2, %o3
2363 ba,pt %ncc, .dodfbc
2364 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy
2365 !
2366 ! We must be 2 byte aligned. Off we go.
2367 ! The check for small copies was done in the
2368 ! delay at .dcoh4
2369 !
2370 .dcoh2:
2371 ble %ncc, .dcos2
2372 sethi %hi(hw_copy_limit_2), %o3
2373 ld [%o3 + %lo(hw_copy_limit_2)], %o3
2374 tst %o3
2375 bz,pn %icc, .dcos2
2376 subcc %o3, %o2, %o3
2377 bge,pt %ncc, .dcos2
2378 nop
2379 !
2380 ! HW is on and we're big enough. Do it.
2381 !
2382 ba,pt %ncc, .big_copyout
2383 nop
2384 .dcos2:
2385 add %o0, %o2, %o0
2386 add %o1, %o2, %o1
2387 sub %g0, %o2, %o3
2388 ba,pt %ncc, .dodtbc
2389 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy
2390 .small_copyout:
2391 !
2392 ! Why are we doing this AGAIN? There are certain conditions in
2393 ! big_copyout that will cause us to forego the HW assisted copies
2394 ! and bounce back to a non-HW assisted copy. This dispatches those
2395 ! copies. Note that we branch around this in the main line code.
2396 !
2397 ! We make no check for limits or HW enablement here. We've
2398 ! already been told that we're a poster child so just go off
2399 ! and do it.
2400 !
2401 or %o0, %o1, %o3
2402 btst 1, %o3
2403 bnz %icc, .dcobcp ! Most likely
2404 btst 7, %o3
2405 bz %icc, .dcos8
2406 btst 3, %o3
2407 bz %icc, .dcos4
2408 nop
2409 ba,pt %ncc, .dcos2
2410 nop
2411 .align 32
2412 .dodebc:
2413 ldx [%o0 + %o3], %o4
2414 deccc %o2
2415 stxa %o4, [%o1 + %o3]ASI_USER
2416 bg,pt %ncc, .dodebc
2417 addcc %o3, 8, %o3
2418 !
2419 ! End of copy loop. Check to see if we're done. Most
2420 ! eight byte aligned copies end here.
2421 !
2422 bz,pt %ncc, .dcofh
2423 nop
2424 !
2425 ! Something is left - do it byte for byte.
2426 !
2427 ba,pt %ncc, .dcocl
2428 ldub [%o0 + %o3], %o4 ! load next byte
2429 !
2430 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
2431 !
2432 .align 32
2433 .dodfbc:
2434 lduw [%o0 + %o3], %o4
2435 deccc %o2
2436 sta %o4, [%o1 + %o3]ASI_USER
2437 bg,pt %ncc, .dodfbc
2438 addcc %o3, 4, %o3
2439 !
2440 ! End of copy loop. Check to see if we're done. Most
2441 ! four byte aligned copies end here.
2442 !
2443 bz,pt %ncc, .dcofh
2444 nop
2445 !
2446 ! Something is left. Do it byte for byte.
2447 !
2448 ba,pt %ncc, .dcocl
2449 ldub [%o0 + %o3], %o4 ! load next byte
2450 !
2451 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
2452 ! copy.
2453 !
2454 .align 32
2455 .dodtbc:
2456 lduh [%o0 + %o3], %o4
2457 deccc %o2
2458 stha %o4, [%o1 + %o3]ASI_USER
2459 bg,pt %ncc, .dodtbc
2460 addcc %o3, 2, %o3
2461 !
2462 ! End of copy loop. Anything left?
2463 !
2464 bz,pt %ncc, .dcofh
2465 nop
2466 !
2467 ! Deal with the last byte
2468 !
2469 ldub [%o0 + %o3], %o4
2470 stba %o4, [%o1 + %o3]ASI_USER
2471 .dcofh:
2472 membar #Sync
2473 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
2474 retl
2475 clr %o0
2476
2477 .big_copyout:
2478 !
2479 ! Are we using the FP registers?
2480 !
2481 rd %fprs, %o3 ! check for unused fp
2482 btst FPRS_FEF, %o3
2483 bnz %icc, .copyout_fpregs_inuse
2484 nop
2485 !
2486 ! We're going to go off and do a block copy.
2487 ! Switch fault hendlers and grab a window. We
2488 ! don't do a membar #Sync since we've done only
2489 ! kernel data to this point.
2490 !
2491 stn %o4, [THREAD_REG + T_LOFAULT]
2492 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
2493 !
2494 ! %o3 is now %i3. Save original %fprs.
2495 !
2496 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
2497 ba,pt %ncc, .do_block_copyout ! Not in use. Go off and do it.
2498 wr %g0, FPRS_FEF, %fprs ! clear %fprs
2499 !
2500 .copyout_fpregs_inuse:
2501 !
2502 ! We're here if the FP regs are in use. Need to see if the request
2503 ! exceeds our suddenly larger minimum.
2504 !
2505 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger
2506 bl %ncc, .small_copyout
2507 nop
2508 !
2509 ! We're going to go off and do a block copy.
2510 ! Change to the heavy duty fault handler and grab a window first.
2511 !
2512 stn %o4, [THREAD_REG + T_LOFAULT]
2513 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
2514 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
2515 !
2516 ! save in-use fpregs on stack
2517 !
2518 wr %g0, FPRS_FEF, %fprs
2519 membar #Sync
2520 add %fp, STACK_BIAS - 257, %o2
2521 and %o2, -64, %o2
2522 stda %d0, [%o2]ASI_BLK_P
2523 add %o2, 64, %o2
2524 stda %d16, [%o2]ASI_BLK_P
2525 add %o2, 64, %o2
2526 stda %d32, [%o2]ASI_BLK_P
2527 add %o2, 64, %o2
2528 stda %d48, [%o2]ASI_BLK_P
2529 membar #Sync
2530
2531 .do_block_copyout:
2532 membar #StoreStore|#StoreLoad|#LoadStore
2533
2534 rd %gsr, %o2
2535 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
2536
2537 ! Set the lower bit in the saved t_lofault to indicate
2538 ! that we need to clear the %fprs register on the way
2539 ! out
2540 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
2541
2542 ! Swap src/dst since the code below is memcpy code
2543 ! and memcpy/bcopy have different calling sequences
2544 mov %i1, %i5
2545 mov %i0, %i1
2546 mov %i5, %i0
2547
2548 !!! This code is nearly identical to the version in the sun4u
2549 !!! libc_psr. Most bugfixes made to that file should be
2550 !!! merged into this routine.
2551
2552 andcc %i0, 7, %o3
2553 bz %ncc, copyout_blkcpy
2554 sub %o3, 8, %o3
2555 neg %o3
2556 sub %i2, %o3, %i2
2557
2558 ! Align Destination on double-word boundary
2559
2560 2: ldub [%i1], %o4
2561 inc %i1
2562 stba %o4, [%i0]ASI_USER
2563 deccc %o3
2564 bgu %ncc, 2b
2565 inc %i0
2566 copyout_blkcpy:
2567 andcc %i0, 63, %i3
2568 bz,pn %ncc, copyout_blalign ! now block aligned
2569 sub %i3, 64, %i3
2570 neg %i3 ! bytes till block aligned
2571 sub %i2, %i3, %i2 ! update %i2 with new count
2572
2573 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
2574 ! double word copies.
2575
2576 alignaddr %i1, %g0, %g1
2577 ldd [%g1], %d0
2578 add %g1, 8, %g1
2579 6:
2580 ldd [%g1], %d2
2581 add %g1, 8, %g1
2582 subcc %i3, 8, %i3
2583 faligndata %d0, %d2, %d8
2584 stda %d8, [%i0]ASI_USER
2585 add %i1, 8, %i1
2586 bz,pn %ncc, copyout_blalign
2587 add %i0, 8, %i0
2588 ldd [%g1], %d0
2589 add %g1, 8, %g1
2590 subcc %i3, 8, %i3
2591 faligndata %d2, %d0, %d8
2592 stda %d8, [%i0]ASI_USER
2593 add %i1, 8, %i1
2594 bgu,pn %ncc, 6b
2595 add %i0, 8, %i0
2596
2597 copyout_blalign:
2598 membar #StoreLoad
2599 ! %i2 = total length
2600 ! %i3 = blocks (length - 64) / 64
2601 ! %i4 = doubles remaining (length - blocks)
2602 sub %i2, 64, %i3
2603 andn %i3, 63, %i3
2604 sub %i2, %i3, %i4
2605 andn %i4, 7, %i4
2606 sub %i4, 16, %i4
2607 sub %i2, %i4, %i2
2608 sub %i2, %i3, %i2
2609
2610 andn %i1, 0x3f, %l7 ! blk aligned address
2611 alignaddr %i1, %g0, %g0 ! gen %gsr
2612
2613 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5
2614 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3
2615 add %i1, %i4, %i1
2616 add %i1, %i3, %i1
2617
2618 ldda [%l7]ASI_BLK_P, %d0
2619 add %l7, 64, %l7
2620 ldda [%l7]ASI_BLK_P, %d16
2621 add %l7, 64, %l7
2622 ldda [%l7]ASI_BLK_P, %d32
2623 add %l7, 64, %l7
2624 sub %i3, 128, %i3
2625
2626 ! switch statement to get us to the right 8 byte blk within a
2627 ! 64 byte block
2628
2629 cmp %i5, 4
2630 bgeu,a copyout_hlf
2631 cmp %i5, 6
2632 cmp %i5, 2
2633 bgeu,a copyout_sqtr
2634 nop
2635 cmp %i5, 1
2636 be,a copyout_seg1
2637 nop
2638 ba,pt %ncc, copyout_seg0
2639 nop
2640 copyout_sqtr:
2641 be,a copyout_seg2
2642 nop
2643 ba,pt %ncc, copyout_seg3
2644 nop
2645
2646 copyout_hlf:
2647 bgeu,a copyout_fqtr
2648 nop
2649 cmp %i5, 5
2650 be,a copyout_seg5
2651 nop
2652 ba,pt %ncc, copyout_seg4
2653 nop
2654 copyout_fqtr:
2655 be,a copyout_seg6
2656 nop
2657 ba,pt %ncc, copyout_seg7
2658 nop
2659
2660 copyout_seg0:
2661 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2662 FALIGN_D0
2663 ldda [%l7]ASI_BLK_P, %d0
2664 stda %d48, [%i0]ASI_BLK_AIUS
2665 add %l7, 64, %l7
2666 subcc %i3, 64, %i3
2667 bz,pn %ncc, 0f
2668 add %i0, 64, %i0
2669 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2670 FALIGN_D16
2671 ldda [%l7]ASI_BLK_P, %d16
2672 stda %d48, [%i0]ASI_BLK_AIUS
2673 add %l7, 64, %l7
2674 subcc %i3, 64, %i3
2675 bz,pn %ncc, 1f
2676 add %i0, 64, %i0
2677 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2678 FALIGN_D32
2679 ldda [%l7]ASI_BLK_P, %d32
2680 stda %d48, [%i0]ASI_BLK_AIUS
2681 add %l7, 64, %l7
2682 subcc %i3, 64, %i3
2683 bz,pn %ncc, 2f
2684 add %i0, 64, %i0
2685 ba,a,pt %ncc, copyout_seg0
2686
2687 0:
2688 FALIGN_D16
2689 stda %d48, [%i0]ASI_BLK_AIUS
2690 add %i0, 64, %i0
2691 membar #Sync
2692 FALIGN_D32
2693 stda %d48, [%i0]ASI_BLK_AIUS
2694 ba,pt %ncc, copyout_blkd0
2695 add %i0, 64, %i0
2696
2697 1:
2698 FALIGN_D32
2699 stda %d48, [%i0]ASI_BLK_AIUS
2700 add %i0, 64, %i0
2701 membar #Sync
2702 FALIGN_D0
2703 stda %d48, [%i0]ASI_BLK_AIUS
2704 ba,pt %ncc, copyout_blkd16
2705 add %i0, 64, %i0
2706
2707 2:
2708 FALIGN_D0
2709 stda %d48, [%i0]ASI_BLK_AIUS
2710 add %i0, 64, %i0
2711 membar #Sync
2712 FALIGN_D16
2713 stda %d48, [%i0]ASI_BLK_AIUS
2714 ba,pt %ncc, copyout_blkd32
2715 add %i0, 64, %i0
2716
2717 copyout_seg1:
2718 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2719 FALIGN_D2
2720 ldda [%l7]ASI_BLK_P, %d0
2721 stda %d48, [%i0]ASI_BLK_AIUS
2722 add %l7, 64, %l7
2723 subcc %i3, 64, %i3
2724 bz,pn %ncc, 0f
2725 add %i0, 64, %i0
2726 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2727 FALIGN_D18
2728 ldda [%l7]ASI_BLK_P, %d16
2729 stda %d48, [%i0]ASI_BLK_AIUS
2730 add %l7, 64, %l7
2731 subcc %i3, 64, %i3
2732 bz,pn %ncc, 1f
2733 add %i0, 64, %i0
2734 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2735 FALIGN_D34
2736 ldda [%l7]ASI_BLK_P, %d32
2737 stda %d48, [%i0]ASI_BLK_AIUS
2738 add %l7, 64, %l7
2739 subcc %i3, 64, %i3
2740 bz,pn %ncc, 2f
2741 add %i0, 64, %i0
2742 ba,a,pt %ncc, copyout_seg1
2743 0:
2744 FALIGN_D18
2745 stda %d48, [%i0]ASI_BLK_AIUS
2746 add %i0, 64, %i0
2747 membar #Sync
2748 FALIGN_D34
2749 stda %d48, [%i0]ASI_BLK_AIUS
2750 ba,pt %ncc, copyout_blkd2
2751 add %i0, 64, %i0
2752
2753 1:
2754 FALIGN_D34
2755 stda %d48, [%i0]ASI_BLK_AIUS
2756 add %i0, 64, %i0
2757 membar #Sync
2758 FALIGN_D2
2759 stda %d48, [%i0]ASI_BLK_AIUS
2760 ba,pt %ncc, copyout_blkd18
2761 add %i0, 64, %i0
2762
2763 2:
2764 FALIGN_D2
2765 stda %d48, [%i0]ASI_BLK_AIUS
2766 add %i0, 64, %i0
2767 membar #Sync
2768 FALIGN_D18
2769 stda %d48, [%i0]ASI_BLK_AIUS
2770 ba,pt %ncc, copyout_blkd34
2771 add %i0, 64, %i0
2772
2773 copyout_seg2:
2774 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2775 FALIGN_D4
2776 ldda [%l7]ASI_BLK_P, %d0
2777 stda %d48, [%i0]ASI_BLK_AIUS
2778 add %l7, 64, %l7
2779 subcc %i3, 64, %i3
2780 bz,pn %ncc, 0f
2781 add %i0, 64, %i0
2782 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2783 FALIGN_D20
2784 ldda [%l7]ASI_BLK_P, %d16
2785 stda %d48, [%i0]ASI_BLK_AIUS
2786 add %l7, 64, %l7
2787 subcc %i3, 64, %i3
2788 bz,pn %ncc, 1f
2789 add %i0, 64, %i0
2790 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2791 FALIGN_D36
2792 ldda [%l7]ASI_BLK_P, %d32
2793 stda %d48, [%i0]ASI_BLK_AIUS
2794 add %l7, 64, %l7
2795 subcc %i3, 64, %i3
2796 bz,pn %ncc, 2f
2797 add %i0, 64, %i0
2798 ba,a,pt %ncc, copyout_seg2
2799
2800 0:
2801 FALIGN_D20
2802 stda %d48, [%i0]ASI_BLK_AIUS
2803 add %i0, 64, %i0
2804 membar #Sync
2805 FALIGN_D36
2806 stda %d48, [%i0]ASI_BLK_AIUS
2807 ba,pt %ncc, copyout_blkd4
2808 add %i0, 64, %i0
2809
2810 1:
2811 FALIGN_D36
2812 stda %d48, [%i0]ASI_BLK_AIUS
2813 add %i0, 64, %i0
2814 membar #Sync
2815 FALIGN_D4
2816 stda %d48, [%i0]ASI_BLK_AIUS
2817 ba,pt %ncc, copyout_blkd20
2818 add %i0, 64, %i0
2819
2820 2:
2821 FALIGN_D4
2822 stda %d48, [%i0]ASI_BLK_AIUS
2823 add %i0, 64, %i0
2824 membar #Sync
2825 FALIGN_D20
2826 stda %d48, [%i0]ASI_BLK_AIUS
2827 ba,pt %ncc, copyout_blkd36
2828 add %i0, 64, %i0
2829
2830 copyout_seg3:
2831 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2832 FALIGN_D6
2833 ldda [%l7]ASI_BLK_P, %d0
2834 stda %d48, [%i0]ASI_BLK_AIUS
2835 add %l7, 64, %l7
2836 subcc %i3, 64, %i3
2837 bz,pn %ncc, 0f
2838 add %i0, 64, %i0
2839 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2840 FALIGN_D22
2841 ldda [%l7]ASI_BLK_P, %d16
2842 stda %d48, [%i0]ASI_BLK_AIUS
2843 add %l7, 64, %l7
2844 subcc %i3, 64, %i3
2845 bz,pn %ncc, 1f
2846 add %i0, 64, %i0
2847 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2848 FALIGN_D38
2849 ldda [%l7]ASI_BLK_P, %d32
2850 stda %d48, [%i0]ASI_BLK_AIUS
2851 add %l7, 64, %l7
2852 subcc %i3, 64, %i3
2853 bz,pn %ncc, 2f
2854 add %i0, 64, %i0
2855 ba,a,pt %ncc, copyout_seg3
2856
2857 0:
2858 FALIGN_D22
2859 stda %d48, [%i0]ASI_BLK_AIUS
2860 add %i0, 64, %i0
2861 membar #Sync
2862 FALIGN_D38
2863 stda %d48, [%i0]ASI_BLK_AIUS
2864 ba,pt %ncc, copyout_blkd6
2865 add %i0, 64, %i0
2866
2867 1:
2868 FALIGN_D38
2869 stda %d48, [%i0]ASI_BLK_AIUS
2870 add %i0, 64, %i0
2871 membar #Sync
2872 FALIGN_D6
2873 stda %d48, [%i0]ASI_BLK_AIUS
2874 ba,pt %ncc, copyout_blkd22
2875 add %i0, 64, %i0
2876
2877 2:
2878 FALIGN_D6
2879 stda %d48, [%i0]ASI_BLK_AIUS
2880 add %i0, 64, %i0
2881 membar #Sync
2882 FALIGN_D22
2883 stda %d48, [%i0]ASI_BLK_AIUS
2884 ba,pt %ncc, copyout_blkd38
2885 add %i0, 64, %i0
2886
2887 copyout_seg4:
2888 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2889 FALIGN_D8
2890 ldda [%l7]ASI_BLK_P, %d0
2891 stda %d48, [%i0]ASI_BLK_AIUS
2892 add %l7, 64, %l7
2893 subcc %i3, 64, %i3
2894 bz,pn %ncc, 0f
2895 add %i0, 64, %i0
2896 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2897 FALIGN_D24
2898 ldda [%l7]ASI_BLK_P, %d16
2899 stda %d48, [%i0]ASI_BLK_AIUS
2900 add %l7, 64, %l7
2901 subcc %i3, 64, %i3
2902 bz,pn %ncc, 1f
2903 add %i0, 64, %i0
2904 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2905 FALIGN_D40
2906 ldda [%l7]ASI_BLK_P, %d32
2907 stda %d48, [%i0]ASI_BLK_AIUS
2908 add %l7, 64, %l7
2909 subcc %i3, 64, %i3
2910 bz,pn %ncc, 2f
2911 add %i0, 64, %i0
2912 ba,a,pt %ncc, copyout_seg4
2913
2914 0:
2915 FALIGN_D24
2916 stda %d48, [%i0]ASI_BLK_AIUS
2917 add %i0, 64, %i0
2918 membar #Sync
2919 FALIGN_D40
2920 stda %d48, [%i0]ASI_BLK_AIUS
2921 ba,pt %ncc, copyout_blkd8
2922 add %i0, 64, %i0
2923
2924 1:
2925 FALIGN_D40
2926 stda %d48, [%i0]ASI_BLK_AIUS
2927 add %i0, 64, %i0
2928 membar #Sync
2929 FALIGN_D8
2930 stda %d48, [%i0]ASI_BLK_AIUS
2931 ba,pt %ncc, copyout_blkd24
2932 add %i0, 64, %i0
2933
2934 2:
2935 FALIGN_D8
2936 stda %d48, [%i0]ASI_BLK_AIUS
2937 add %i0, 64, %i0
2938 membar #Sync
2939 FALIGN_D24
2940 stda %d48, [%i0]ASI_BLK_AIUS
2941 ba,pt %ncc, copyout_blkd40
2942 add %i0, 64, %i0
2943
2944 copyout_seg5:
2945 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2946 FALIGN_D10
2947 ldda [%l7]ASI_BLK_P, %d0
2948 stda %d48, [%i0]ASI_BLK_AIUS
2949 add %l7, 64, %l7
2950 subcc %i3, 64, %i3
2951 bz,pn %ncc, 0f
2952 add %i0, 64, %i0
2953 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2954 FALIGN_D26
2955 ldda [%l7]ASI_BLK_P, %d16
2956 stda %d48, [%i0]ASI_BLK_AIUS
2957 add %l7, 64, %l7
2958 subcc %i3, 64, %i3
2959 bz,pn %ncc, 1f
2960 add %i0, 64, %i0
2961 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2962 FALIGN_D42
2963 ldda [%l7]ASI_BLK_P, %d32
2964 stda %d48, [%i0]ASI_BLK_AIUS
2965 add %l7, 64, %l7
2966 subcc %i3, 64, %i3
2967 bz,pn %ncc, 2f
2968 add %i0, 64, %i0
2969 ba,a,pt %ncc, copyout_seg5
2970
2971 0:
2972 FALIGN_D26
2973 stda %d48, [%i0]ASI_BLK_AIUS
2974 add %i0, 64, %i0
2975 membar #Sync
2976 FALIGN_D42
2977 stda %d48, [%i0]ASI_BLK_AIUS
2978 ba,pt %ncc, copyout_blkd10
2979 add %i0, 64, %i0
2980
2981 1:
2982 FALIGN_D42
2983 stda %d48, [%i0]ASI_BLK_AIUS
2984 add %i0, 64, %i0
2985 membar #Sync
2986 FALIGN_D10
2987 stda %d48, [%i0]ASI_BLK_AIUS
2988 ba,pt %ncc, copyout_blkd26
2989 add %i0, 64, %i0
2990
2991 2:
2992 FALIGN_D10
2993 stda %d48, [%i0]ASI_BLK_AIUS
2994 add %i0, 64, %i0
2995 membar #Sync
2996 FALIGN_D26
2997 stda %d48, [%i0]ASI_BLK_AIUS
2998 ba,pt %ncc, copyout_blkd42
2999 add %i0, 64, %i0
3000
3001 copyout_seg6:
3002 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3003 FALIGN_D12
3004 ldda [%l7]ASI_BLK_P, %d0
3005 stda %d48, [%i0]ASI_BLK_AIUS
3006 add %l7, 64, %l7
3007 subcc %i3, 64, %i3
3008 bz,pn %ncc, 0f
3009 add %i0, 64, %i0
3010 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3011 FALIGN_D28
3012 ldda [%l7]ASI_BLK_P, %d16
3013 stda %d48, [%i0]ASI_BLK_AIUS
3014 add %l7, 64, %l7
3015 subcc %i3, 64, %i3
3016 bz,pn %ncc, 1f
3017 add %i0, 64, %i0
3018 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3019 FALIGN_D44
3020 ldda [%l7]ASI_BLK_P, %d32
3021 stda %d48, [%i0]ASI_BLK_AIUS
3022 add %l7, 64, %l7
3023 subcc %i3, 64, %i3
3024 bz,pn %ncc, 2f
3025 add %i0, 64, %i0
3026 ba,a,pt %ncc, copyout_seg6
3027
3028 0:
3029 FALIGN_D28
3030 stda %d48, [%i0]ASI_BLK_AIUS
3031 add %i0, 64, %i0
3032 membar #Sync
3033 FALIGN_D44
3034 stda %d48, [%i0]ASI_BLK_AIUS
3035 ba,pt %ncc, copyout_blkd12
3036 add %i0, 64, %i0
3037
3038 1:
3039 FALIGN_D44
3040 stda %d48, [%i0]ASI_BLK_AIUS
3041 add %i0, 64, %i0
3042 membar #Sync
3043 FALIGN_D12
3044 stda %d48, [%i0]ASI_BLK_AIUS
3045 ba,pt %ncc, copyout_blkd28
3046 add %i0, 64, %i0
3047
3048 2:
3049 FALIGN_D12
3050 stda %d48, [%i0]ASI_BLK_AIUS
3051 add %i0, 64, %i0
3052 membar #Sync
3053 FALIGN_D28
3054 stda %d48, [%i0]ASI_BLK_AIUS
3055 ba,pt %ncc, copyout_blkd44
3056 add %i0, 64, %i0
3057
3058 copyout_seg7:
3059 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3060 FALIGN_D14
3061 ldda [%l7]ASI_BLK_P, %d0
3062 stda %d48, [%i0]ASI_BLK_AIUS
3063 add %l7, 64, %l7
3064 subcc %i3, 64, %i3
3065 bz,pn %ncc, 0f
3066 add %i0, 64, %i0
3067 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3068 FALIGN_D30
3069 ldda [%l7]ASI_BLK_P, %d16
3070 stda %d48, [%i0]ASI_BLK_AIUS
3071 add %l7, 64, %l7
3072 subcc %i3, 64, %i3
3073 bz,pn %ncc, 1f
3074 add %i0, 64, %i0
3075 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3076 FALIGN_D46
3077 ldda [%l7]ASI_BLK_P, %d32
3078 stda %d48, [%i0]ASI_BLK_AIUS
3079 add %l7, 64, %l7
3080 subcc %i3, 64, %i3
3081 bz,pn %ncc, 2f
3082 add %i0, 64, %i0
3083 ba,a,pt %ncc, copyout_seg7
3084
3085 0:
3086 FALIGN_D30
3087 stda %d48, [%i0]ASI_BLK_AIUS
3088 add %i0, 64, %i0
3089 membar #Sync
3090 FALIGN_D46
3091 stda %d48, [%i0]ASI_BLK_AIUS
3092 ba,pt %ncc, copyout_blkd14
3093 add %i0, 64, %i0
3094
3095 1:
3096 FALIGN_D46
3097 stda %d48, [%i0]ASI_BLK_AIUS
3098 add %i0, 64, %i0
3099 membar #Sync
3100 FALIGN_D14
3101 stda %d48, [%i0]ASI_BLK_AIUS
3102 ba,pt %ncc, copyout_blkd30
3103 add %i0, 64, %i0
3104
3105 2:
3106 FALIGN_D14
3107 stda %d48, [%i0]ASI_BLK_AIUS
3108 add %i0, 64, %i0
3109 membar #Sync
3110 FALIGN_D30
3111 stda %d48, [%i0]ASI_BLK_AIUS
3112 ba,pt %ncc, copyout_blkd46
3113 add %i0, 64, %i0
3114
3115
3116 !
3117 ! dribble out the last partial block
3118 !
3119 copyout_blkd0:
3120 subcc %i4, 8, %i4
3121 blu,pn %ncc, copyout_blkdone
3122 faligndata %d0, %d2, %d48
3123 stda %d48, [%i0]ASI_USER
3124 add %i0, 8, %i0
3125 copyout_blkd2:
3126 subcc %i4, 8, %i4
3127 blu,pn %ncc, copyout_blkdone
3128 faligndata %d2, %d4, %d48
3129 stda %d48, [%i0]ASI_USER
3130 add %i0, 8, %i0
3131 copyout_blkd4:
3132 subcc %i4, 8, %i4
3133 blu,pn %ncc, copyout_blkdone
3134 faligndata %d4, %d6, %d48
3135 stda %d48, [%i0]ASI_USER
3136 add %i0, 8, %i0
3137 copyout_blkd6:
3138 subcc %i4, 8, %i4
3139 blu,pn %ncc, copyout_blkdone
3140 faligndata %d6, %d8, %d48
3141 stda %d48, [%i0]ASI_USER
3142 add %i0, 8, %i0
3143 copyout_blkd8:
3144 subcc %i4, 8, %i4
3145 blu,pn %ncc, copyout_blkdone
3146 faligndata %d8, %d10, %d48
3147 stda %d48, [%i0]ASI_USER
3148 add %i0, 8, %i0
3149 copyout_blkd10:
3150 subcc %i4, 8, %i4
3151 blu,pn %ncc, copyout_blkdone
3152 faligndata %d10, %d12, %d48
3153 stda %d48, [%i0]ASI_USER
3154 add %i0, 8, %i0
3155 copyout_blkd12:
3156 subcc %i4, 8, %i4
3157 blu,pn %ncc, copyout_blkdone
3158 faligndata %d12, %d14, %d48
3159 stda %d48, [%i0]ASI_USER
3160 add %i0, 8, %i0
3161 copyout_blkd14:
3162 subcc %i4, 8, %i4
3163 blu,pn %ncc, copyout_blkdone
3164 fsrc1 %d14, %d0
3165 ba,a,pt %ncc, copyout_blkleft
3166
3167 copyout_blkd16:
3168 subcc %i4, 8, %i4
3169 blu,pn %ncc, copyout_blkdone
3170 faligndata %d16, %d18, %d48
3171 stda %d48, [%i0]ASI_USER
3172 add %i0, 8, %i0
3173 copyout_blkd18:
3174 subcc %i4, 8, %i4
3175 blu,pn %ncc, copyout_blkdone
3176 faligndata %d18, %d20, %d48
3177 stda %d48, [%i0]ASI_USER
3178 add %i0, 8, %i0
3179 copyout_blkd20:
3180 subcc %i4, 8, %i4
3181 blu,pn %ncc, copyout_blkdone
3182 faligndata %d20, %d22, %d48
3183 stda %d48, [%i0]ASI_USER
3184 add %i0, 8, %i0
3185 copyout_blkd22:
3186 subcc %i4, 8, %i4
3187 blu,pn %ncc, copyout_blkdone
3188 faligndata %d22, %d24, %d48
3189 stda %d48, [%i0]ASI_USER
3190 add %i0, 8, %i0
3191 copyout_blkd24:
3192 subcc %i4, 8, %i4
3193 blu,pn %ncc, copyout_blkdone
3194 faligndata %d24, %d26, %d48
3195 stda %d48, [%i0]ASI_USER
3196 add %i0, 8, %i0
3197 copyout_blkd26:
3198 subcc %i4, 8, %i4
3199 blu,pn %ncc, copyout_blkdone
3200 faligndata %d26, %d28, %d48
3201 stda %d48, [%i0]ASI_USER
3202 add %i0, 8, %i0
3203 copyout_blkd28:
3204 subcc %i4, 8, %i4
3205 blu,pn %ncc, copyout_blkdone
3206 faligndata %d28, %d30, %d48
3207 stda %d48, [%i0]ASI_USER
3208 add %i0, 8, %i0
3209 copyout_blkd30:
3210 subcc %i4, 8, %i4
3211 blu,pn %ncc, copyout_blkdone
3212 fsrc1 %d30, %d0
3213 ba,a,pt %ncc, copyout_blkleft
3214 copyout_blkd32:
3215 subcc %i4, 8, %i4
3216 blu,pn %ncc, copyout_blkdone
3217 faligndata %d32, %d34, %d48
3218 stda %d48, [%i0]ASI_USER
3219 add %i0, 8, %i0
3220 copyout_blkd34:
3221 subcc %i4, 8, %i4
3222 blu,pn %ncc, copyout_blkdone
3223 faligndata %d34, %d36, %d48
3224 stda %d48, [%i0]ASI_USER
3225 add %i0, 8, %i0
3226 copyout_blkd36:
3227 subcc %i4, 8, %i4
3228 blu,pn %ncc, copyout_blkdone
3229 faligndata %d36, %d38, %d48
3230 stda %d48, [%i0]ASI_USER
3231 add %i0, 8, %i0
3232 copyout_blkd38:
3233 subcc %i4, 8, %i4
3234 blu,pn %ncc, copyout_blkdone
3235 faligndata %d38, %d40, %d48
3236 stda %d48, [%i0]ASI_USER
3237 add %i0, 8, %i0
3238 copyout_blkd40:
3239 subcc %i4, 8, %i4
3240 blu,pn %ncc, copyout_blkdone
3241 faligndata %d40, %d42, %d48
3242 stda %d48, [%i0]ASI_USER
3243 add %i0, 8, %i0
3244 copyout_blkd42:
3245 subcc %i4, 8, %i4
3246 blu,pn %ncc, copyout_blkdone
3247 faligndata %d42, %d44, %d48
3248 stda %d48, [%i0]ASI_USER
3249 add %i0, 8, %i0
3250 copyout_blkd44:
3251 subcc %i4, 8, %i4
3252 blu,pn %ncc, copyout_blkdone
3253 faligndata %d44, %d46, %d48
3254 stda %d48, [%i0]ASI_USER
3255 add %i0, 8, %i0
3256 copyout_blkd46:
3257 subcc %i4, 8, %i4
3258 blu,pn %ncc, copyout_blkdone
3259 fsrc1 %d46, %d0
3260
3261 copyout_blkleft:
3262 1:
3263 ldd [%l7], %d2
3264 add %l7, 8, %l7
3265 subcc %i4, 8, %i4
3266 faligndata %d0, %d2, %d8
3267 stda %d8, [%i0]ASI_USER
3268 blu,pn %ncc, copyout_blkdone
3269 add %i0, 8, %i0
3270 ldd [%l7], %d0
3271 add %l7, 8, %l7
3272 subcc %i4, 8, %i4
3273 faligndata %d2, %d0, %d8
3274 stda %d8, [%i0]ASI_USER
3275 bgeu,pt %ncc, 1b
3276 add %i0, 8, %i0
3277
3278 copyout_blkdone:
3279 tst %i2
3280 bz,pt %ncc, .copyout_exit
3281 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0
3282
3283 7: ldub [%i1], %i4
3284 inc %i1
3285 stba %i4, [%i0]ASI_USER
3286 inc %i0
3287 deccc %i2
3288 bgu %ncc, 7b
3289 nop
3290
3291 .copyout_exit:
3292 membar #StoreLoad|#StoreStore
3293 btst FPUSED_FLAG, SAVED_LOFAULT
3294 bz 1f
3295 nop
3296
3297 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
3298 wr %o2, 0, %gsr ! restore gsr
3299
3300 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
3301 btst FPRS_FEF, %o3
3302 bz 4f
3303 nop
3304
3305 ! restore fpregs from stack
3306 membar #Sync
3307 add %fp, STACK_BIAS - 257, %o2
3308 and %o2, -64, %o2
3309 ldda [%o2]ASI_BLK_P, %d0
3310 add %o2, 64, %o2
3311 ldda [%o2]ASI_BLK_P, %d16
3312 add %o2, 64, %o2
3313 ldda [%o2]ASI_BLK_P, %d32
3314 add %o2, 64, %o2
3315 ldda [%o2]ASI_BLK_P, %d48
3316 membar #Sync
3317
3318 ba,pt %ncc, 1f
3319 wr %o3, 0, %fprs ! restore fprs
3320
3321 4:
3322 FZERO ! zero all of the fpregs
3323 wr %o3, 0, %fprs ! restore fprs
3324
3325 1:
3326 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
3327 membar #Sync ! sync error barrier
3328 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3329 ret
3330 restore %g0, 0, %o0
3331
3332 .copyout_err:
3333 ldn [THREAD_REG + T_COPYOPS], %o4
3334 brz %o4, 2f
3335 nop
3336 ldn [%o4 + CP_COPYOUT], %g2
3337 jmp %g2
3338 nop
3339 2:
3340 retl
3341 mov -1, %o0
3342 SET_SIZE(copyout)
3343
3344
3345 ENTRY(xcopyout)
3346 sethi %hi(.xcopyout_err), REAL_LOFAULT
3347 b .do_copyout
3348 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
3349 .xcopyout_err:
3350 ldn [THREAD_REG + T_COPYOPS], %o4
3351 brz %o4, 2f
3352 nop
3353 ldn [%o4 + CP_XCOPYOUT], %g2
3354 jmp %g2
3355 nop
3356 2:
3357 retl
3358 mov %g1, %o0
3359 SET_SIZE(xcopyout)
3360
3361 ENTRY(xcopyout_little)
3362 sethi %hi(.little_err), %o4
3363 ldn [THREAD_REG + T_LOFAULT], %o5
3364 or %o4, %lo(.little_err), %o4
3365 membar #Sync ! sync error barrier
3366 stn %o4, [THREAD_REG + T_LOFAULT]
3367
3368 subcc %g0, %o2, %o3
3369 add %o0, %o2, %o0
3370 bz,pn %ncc, 2f ! check for zero bytes
3371 sub %o2, 1, %o4
3372 add %o0, %o4, %o0 ! start w/last byte
3373 add %o1, %o2, %o1
3374 ldub [%o0+%o3], %o4
3375
3376 1: stba %o4, [%o1+%o3]ASI_AIUSL
3377 inccc %o3
3378 sub %o0, 2, %o0 ! get next byte
3379 bcc,a,pt %ncc, 1b
3380 ldub [%o0+%o3], %o4
3381
3382 2: membar #Sync ! sync error barrier
3383 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3384 retl
3385 mov %g0, %o0 ! return (0)
3386 SET_SIZE(xcopyout_little)
3387
3388 /*
3389 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
3390 */
3391
3392 ENTRY(copyin)
3393 sethi %hi(.copyin_err), REAL_LOFAULT
3394 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
3395
3396 .do_copyin:
3397 !
3398 ! Check the length and bail if zero.
3399 !
3400 tst %o2
3401 bnz,pt %ncc, 1f
3402 nop
3403 retl
3404 clr %o0
3405 1:
3406 sethi %hi(copyio_fault), %o4
3407 or %o4, %lo(copyio_fault), %o4
3408 sethi %hi(copyio_fault_nowindow), %o3
3409 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
3410 or %o3, %lo(copyio_fault_nowindow), %o3
3411 membar #Sync
3412 stn %o3, [THREAD_REG + T_LOFAULT]
3413
3414 mov %o0, SAVE_SRC
3415 mov %o1, SAVE_DST
3416 mov %o2, SAVE_COUNT
3417
3418 !
3419 ! Check to see if we're more than SMALL_LIMIT.
3420 !
3421 subcc %o2, SMALL_LIMIT, %o3
3422 bgu,a,pt %ncc, .dci_ns
3423 or %o0, %o1, %o3
3424 !
3425 ! What was previously ".small_copyin"
3426 !
3427 .dcibcp:
3428 sub %g0, %o2, %o3 ! setup for copy loop
3429 add %o0, %o2, %o0
3430 add %o1, %o2, %o1
3431 ba,pt %ncc, .dcicl
3432 lduba [%o0 + %o3]ASI_USER, %o4
3433 !
3434 ! %o0 and %o1 point at the end and remain pointing at the end
3435 ! of their buffers. We pull things out by adding %o3 (which is
3436 ! the negation of the length) to the buffer end which gives us
3437 ! the curent location in the buffers. By incrementing %o3 we walk
3438 ! through both buffers without having to bump each buffer's
3439 ! pointer. A very fast 4 instruction loop.
3440 !
3441 .align 16
3442 .dcicl:
3443 stb %o4, [%o1 + %o3]
3444 inccc %o3
3445 bl,a,pt %ncc, .dcicl
3446 lduba [%o0 + %o3]ASI_USER, %o4
3447 !
3448 ! We're done. Go home.
3449 !
3450 membar #Sync
3451 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
3452 retl
3453 clr %o0
3454 !
3455 ! Try aligned copies from here.
3456 !
3457 .dci_ns:
3458 !
3459 ! See if we're single byte aligned. If we are, check the
3460 ! limit for single byte copies. If we're smaller, or equal,
3461 ! bounce to the byte for byte copy loop. Otherwise do it in
3462 ! HW (if enabled).
3463 !
3464 btst 1, %o3
3465 bz,a,pt %icc, .dcih8
3466 btst 7, %o3
3467 !
3468 ! We're single byte aligned.
3469 !
3470 subcc %o2, VIS_COPY_THRESHOLD, %o3
3471 bleu,pt %ncc, .dcibcp
3472 sethi %hi(hw_copy_limit_1), %o3
3473 ld [%o3 + %lo(hw_copy_limit_1)], %o3
3474 !
3475 ! Is HW copy on? If not do everything byte for byte.
3476 !
3477 tst %o3
3478 bz,pn %icc, .dcibcp
3479 subcc %o3, %o2, %o3
3480 !
3481 ! Are we bigger than the HW limit? If not
3482 ! go to byte for byte.
3483 !
3484 bge,pt %ncc, .dcibcp
3485 nop
3486 !
3487 ! We're big enough and copy is on. Do it with HW.
3488 !
3489 ba,pt %ncc, .big_copyin
3490 nop
3491 .dcih8:
3492 !
3493 ! 8 byte aligned?
3494 !
3495 bnz,a %ncc, .dcih4
3496 btst 3, %o3
3497 !
3498 ! We're eight byte aligned.
3499 !
3500 subcc %o2, VIS_COPY_THRESHOLD, %o3
3501 bleu,pt %ncc, .dcis8
3502 sethi %hi(hw_copy_limit_8), %o3
3503 ld [%o3 + %lo(hw_copy_limit_8)], %o3
3504 !
3505 ! Is HW assist on? If not, do it with the aligned copy.
3506 !
3507 tst %o3
3508 bz,pn %icc, .dcis8
3509 subcc %o3, %o2, %o3
3510 bge %ncc, .dcis8
3511 nop
3512 ba,pt %ncc, .big_copyin
3513 nop
3514 .dcis8:
3515 !
3516 ! Housekeeping for copy loops. Uses same idea as in the byte for
3517 ! byte copy loop above.
3518 !
3519 add %o0, %o2, %o0
3520 add %o1, %o2, %o1
3521 sub %g0, %o2, %o3
3522 ba,pt %ncc, .didebc
3523 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy
3524 !
3525 ! 4 byte aligned?
3526 !
3527 .dcih4:
3528 bnz %ncc, .dcih2
3529 subcc %o2, VIS_COPY_THRESHOLD, %o3
3530 bleu,pt %ncc, .dcis4
3531 sethi %hi(hw_copy_limit_4), %o3
3532 ld [%o3 + %lo(hw_copy_limit_4)], %o3
3533 !
3534 ! Is HW assist on? If not, do it with the aligned copy.
3535 !
3536 tst %o3
3537 bz,pn %icc, .dcis4
3538 subcc %o3, %o2, %o3
3539 !
3540 ! We're negative if our size is less than or equal to hw_copy_limit_4.
3541 !
3542 bge %ncc, .dcis4
3543 nop
3544 ba,pt %ncc, .big_copyin
3545 nop
3546 .dcis4:
3547 !
3548 ! Housekeeping for copy loops. Uses same idea as in the byte
3549 ! for byte copy loop above.
3550 !
3551 add %o0, %o2, %o0
3552 add %o1, %o2, %o1
3553 sub %g0, %o2, %o3
3554 ba,pt %ncc, .didfbc
3555 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy
3556 .dcih2:
3557 !
3558 ! We're two byte aligned. Check for "smallness"
3559 ! done in delay at .dcih4
3560 !
3561 bleu,pt %ncc, .dcis2
3562 sethi %hi(hw_copy_limit_2), %o3
3563 ld [%o3 + %lo(hw_copy_limit_2)], %o3
3564 !
3565 ! Is HW assist on? If not, do it with the aligned copy.
3566 !
3567 tst %o3
3568 bz,pn %icc, .dcis2
3569 subcc %o3, %o2, %o3
3570 !
3571 ! Are we larger than the HW limit?
3572 !
3573 bge %ncc, .dcis2
3574 nop
3575 !
3576 ! HW assist is on and we're large enough to use it.
3577 !
3578 ba,pt %ncc, .big_copyin
3579 nop
3580 !
3581 ! Housekeeping for copy loops. Uses same idea as in the byte
3582 ! for byte copy loop above.
3583 !
3584 .dcis2:
3585 add %o0, %o2, %o0
3586 add %o1, %o2, %o1
3587 sub %g0, %o2, %o3
3588 ba,pt %ncc, .didtbc
3589 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy
3590 !
3591 .small_copyin:
3592 !
3593 ! Why are we doing this AGAIN? There are certain conditions in
3594 ! big copyin that will cause us to forgo the HW assisted copys
3595 ! and bounce back to a non-hw assisted copy. This dispatches
3596 ! those copies. Note that we branch around this in the main line
3597 ! code.
3598 !
3599 ! We make no check for limits or HW enablement here. We've
3600 ! already been told that we're a poster child so just go off
3601 ! and do it.
3602 !
3603 or %o0, %o1, %o3
3604 btst 1, %o3
3605 bnz %icc, .dcibcp ! Most likely
3606 btst 7, %o3
3607 bz %icc, .dcis8
3608 btst 3, %o3
3609 bz %icc, .dcis4
3610 nop
3611 ba,pt %ncc, .dcis2
3612 nop
3613 !
3614 ! Eight byte aligned copies. A steal from the original .small_copyin
3615 ! with modifications. %o2 is number of 8 byte chunks to copy. When
3616 ! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more
3617 ! to copy.
3618 !
3619 .align 32
3620 .didebc:
3621 ldxa [%o0 + %o3]ASI_USER, %o4
3622 deccc %o2
3623 stx %o4, [%o1 + %o3]
3624 bg,pt %ncc, .didebc
3625 addcc %o3, 8, %o3
3626 !
3627 ! End of copy loop. Most 8 byte aligned copies end here.
3628 !
3629 bz,pt %ncc, .dcifh
3630 nop
3631 !
3632 ! Something is left. Do it byte for byte.
3633 !
3634 ba,pt %ncc, .dcicl
3635 lduba [%o0 + %o3]ASI_USER, %o4
3636 !
3637 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
3638 !
3639 .align 32
3640 .didfbc:
3641 lduwa [%o0 + %o3]ASI_USER, %o4
3642 deccc %o2
3643 st %o4, [%o1 + %o3]
3644 bg,pt %ncc, .didfbc
3645 addcc %o3, 4, %o3
3646 !
3647 ! End of copy loop. Most 4 byte aligned copies end here.
3648 !
3649 bz,pt %ncc, .dcifh
3650 nop
3651 !
3652 ! Something is left. Do it byte for byte.
3653 !
3654 ba,pt %ncc, .dcicl
3655 lduba [%o0 + %o3]ASI_USER, %o4
3656 !
3657 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
3658 ! copy.
3659 !
3660 .align 32
3661 .didtbc:
3662 lduha [%o0 + %o3]ASI_USER, %o4
3663 deccc %o2
3664 sth %o4, [%o1 + %o3]
3665 bg,pt %ncc, .didtbc
3666 addcc %o3, 2, %o3
3667 !
3668 ! End of copy loop. Most 2 byte aligned copies end here.
3669 !
3670 bz,pt %ncc, .dcifh
3671 nop
3672 !
3673 ! Deal with the last byte
3674 !
3675 lduba [%o0 + %o3]ASI_USER, %o4
3676 stb %o4, [%o1 + %o3]
3677 .dcifh:
3678 membar #Sync
3679 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3680 retl
3681 clr %o0
3682
3683 .big_copyin:
3684 !
3685 ! Are we using the FP registers?
3686 !
3687 rd %fprs, %o3 ! check for unused fp
3688 btst FPRS_FEF, %o3
3689 bnz %ncc, .copyin_fpregs_inuse
3690 nop
3691 !
3692 ! We're going off to do a block copy.
3693 ! Switch fault hendlers and grab a window. We
3694 ! don't do a membar #Sync since we've done only
3695 ! kernel data to this point.
3696 !
3697 stn %o4, [THREAD_REG + T_LOFAULT]
3698 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3699 !
3700 ! %o3 is %i3 after the save...
3701 !
3702 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
3703 ba,pt %ncc, .do_blockcopyin
3704 wr %g0, FPRS_FEF, %fprs
3705 .copyin_fpregs_inuse:
3706 !
3707 ! We're here if the FP regs are in use. Need to see if the request
3708 ! exceeds our suddenly larger minimum.
3709 !
3710 cmp %i2, VIS_COPY_THRESHOLD+(64*4)
3711 bl %ncc, .small_copyin
3712 nop
3713 !
3714 ! We're going off and do a block copy.
3715 ! Change to the heavy duty fault handler and grab a window first.
3716 ! New handler is passed in
3717 !
3718 stn %o4, [THREAD_REG + T_LOFAULT]
3719 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3720 !
3721 ! %o3 is now %i3
3722 !
3723 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
3724
3725 ! save in-use fpregs on stack
3726 wr %g0, FPRS_FEF, %fprs
3727 membar #Sync
3728 add %fp, STACK_BIAS - 257, %o2
3729 and %o2, -64, %o2
3730 stda %d0, [%o2]ASI_BLK_P
3731 add %o2, 64, %o2
3732 stda %d16, [%o2]ASI_BLK_P
3733 add %o2, 64, %o2
3734 stda %d32, [%o2]ASI_BLK_P
3735 add %o2, 64, %o2
3736 stda %d48, [%o2]ASI_BLK_P
3737 membar #Sync
3738
3739 .do_blockcopyin:
3740 membar #StoreStore|#StoreLoad|#LoadStore
3741
3742 rd %gsr, %o2
3743 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
3744
3745 ! Set the lower bit in the saved t_lofault to indicate
3746 ! that we need to clear the %fprs register on the way
3747 ! out
3748 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
3749
3750 ! Swap src/dst since the code below is memcpy code
3751 ! and memcpy/bcopy have different calling sequences
3752 mov %i1, %i5
3753 mov %i0, %i1
3754 mov %i5, %i0
3755
3756 !!! This code is nearly identical to the version in the sun4u
3757 !!! libc_psr. Most bugfixes made to that file should be
3758 !!! merged into this routine.
3759
3760 andcc %i0, 7, %o3
3761 bz copyin_blkcpy
3762 sub %o3, 8, %o3
3763 neg %o3
3764 sub %i2, %o3, %i2
3765
3766 ! Align Destination on double-word boundary
3767
3768 2: lduba [%i1]ASI_USER, %o4
3769 inc %i1
3770 inc %i0
3771 deccc %o3
3772 bgu %ncc, 2b
3773 stb %o4, [%i0-1]
3774 copyin_blkcpy:
3775 andcc %i0, 63, %i3
3776 bz,pn %ncc, copyin_blalign ! now block aligned
3777 sub %i3, 64, %i3
3778 neg %i3 ! bytes till block aligned
3779 sub %i2, %i3, %i2 ! update %i2 with new count
3780
3781 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
3782 ! double word copies.
3783
3784 alignaddr %i1, %g0, %g1
3785 ldda [%g1]ASI_USER, %d0
3786 add %g1, 8, %g1
3787 6:
3788 ldda [%g1]ASI_USER, %d2
3789 add %g1, 8, %g1
3790 subcc %i3, 8, %i3
3791 faligndata %d0, %d2, %d8
3792 std %d8, [%i0]
3793 add %i1, 8, %i1
3794 bz,pn %ncc, copyin_blalign
3795 add %i0, 8, %i0
3796 ldda [%g1]ASI_USER, %d0
3797 add %g1, 8, %g1
3798 subcc %i3, 8, %i3
3799 faligndata %d2, %d0, %d8
3800 std %d8, [%i0]
3801 add %i1, 8, %i1
3802 bgu,pn %ncc, 6b
3803 add %i0, 8, %i0
3804
3805 copyin_blalign:
3806 membar #StoreLoad
3807 ! %i2 = total length
3808 ! %i3 = blocks (length - 64) / 64
3809 ! %i4 = doubles remaining (length - blocks)
3810 sub %i2, 64, %i3
3811 andn %i3, 63, %i3
3812 sub %i2, %i3, %i4
3813 andn %i4, 7, %i4
3814 sub %i4, 16, %i4
3815 sub %i2, %i4, %i2
3816 sub %i2, %i3, %i2
3817
3818 andn %i1, 0x3f, %l7 ! blk aligned address
3819 alignaddr %i1, %g0, %g0 ! gen %gsr
3820
3821 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5
3822 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3
3823 add %i1, %i4, %i1
3824 add %i1, %i3, %i1
3825
3826 ldda [%l7]ASI_BLK_AIUS, %d0
3827 add %l7, 64, %l7
3828 ldda [%l7]ASI_BLK_AIUS, %d16
3829 add %l7, 64, %l7
3830 ldda [%l7]ASI_BLK_AIUS, %d32
3831 add %l7, 64, %l7
3832 sub %i3, 128, %i3
3833
3834 ! switch statement to get us to the right 8 byte blk within a
3835 ! 64 byte block
3836
3837 cmp %i5, 4
3838 bgeu,a copyin_hlf
3839 cmp %i5, 6
3840 cmp %i5, 2
3841 bgeu,a copyin_sqtr
3842 nop
3843 cmp %i5, 1
3844 be,a copyin_seg1
3845 nop
3846 ba,pt %ncc, copyin_seg0
3847 nop
3848 copyin_sqtr:
3849 be,a copyin_seg2
3850 nop
3851 ba,pt %ncc, copyin_seg3
3852 nop
3853
3854 copyin_hlf:
3855 bgeu,a copyin_fqtr
3856 nop
3857 cmp %i5, 5
3858 be,a copyin_seg5
3859 nop
3860 ba,pt %ncc, copyin_seg4
3861 nop
3862 copyin_fqtr:
3863 be,a copyin_seg6
3864 nop
3865 ba,pt %ncc, copyin_seg7
3866 nop
3867
3868 copyin_seg0:
3869 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3870 FALIGN_D0
3871 ldda [%l7]ASI_BLK_AIUS, %d0
3872 stda %d48, [%i0]ASI_BLK_P
3873 add %l7, 64, %l7
3874 subcc %i3, 64, %i3
3875 bz,pn %ncc, 0f
3876 add %i0, 64, %i0
3877 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3878 FALIGN_D16
3879 ldda [%l7]ASI_BLK_AIUS, %d16
3880 stda %d48, [%i0]ASI_BLK_P
3881 add %l7, 64, %l7
3882 subcc %i3, 64, %i3
3883 bz,pn %ncc, 1f
3884 add %i0, 64, %i0
3885 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3886 FALIGN_D32
3887 ldda [%l7]ASI_BLK_AIUS, %d32
3888 stda %d48, [%i0]ASI_BLK_P
3889 add %l7, 64, %l7
3890 subcc %i3, 64, %i3
3891 bz,pn %ncc, 2f
3892 add %i0, 64, %i0
3893 ba,a,pt %ncc, copyin_seg0
3894
3895 0:
3896 FALIGN_D16
3897 stda %d48, [%i0]ASI_BLK_P
3898 add %i0, 64, %i0
3899 membar #Sync
3900 FALIGN_D32
3901 stda %d48, [%i0]ASI_BLK_P
3902 ba,pt %ncc, copyin_blkd0
3903 add %i0, 64, %i0
3904
3905 1:
3906 FALIGN_D32
3907 stda %d48, [%i0]ASI_BLK_P
3908 add %i0, 64, %i0
3909 membar #Sync
3910 FALIGN_D0
3911 stda %d48, [%i0]ASI_BLK_P
3912 ba,pt %ncc, copyin_blkd16
3913 add %i0, 64, %i0
3914
3915 2:
3916 FALIGN_D0
3917 stda %d48, [%i0]ASI_BLK_P
3918 add %i0, 64, %i0
3919 membar #Sync
3920 FALIGN_D16
3921 stda %d48, [%i0]ASI_BLK_P
3922 ba,pt %ncc, copyin_blkd32
3923 add %i0, 64, %i0
3924
3925 copyin_seg1:
3926 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3927 FALIGN_D2
3928 ldda [%l7]ASI_BLK_AIUS, %d0
3929 stda %d48, [%i0]ASI_BLK_P
3930 add %l7, 64, %l7
3931 subcc %i3, 64, %i3
3932 bz,pn %ncc, 0f
3933 add %i0, 64, %i0
3934 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3935 FALIGN_D18
3936 ldda [%l7]ASI_BLK_AIUS, %d16
3937 stda %d48, [%i0]ASI_BLK_P
3938 add %l7, 64, %l7
3939 subcc %i3, 64, %i3
3940 bz,pn %ncc, 1f
3941 add %i0, 64, %i0
3942 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3943 FALIGN_D34
3944 ldda [%l7]ASI_BLK_AIUS, %d32
3945 stda %d48, [%i0]ASI_BLK_P
3946 add %l7, 64, %l7
3947 subcc %i3, 64, %i3
3948 bz,pn %ncc, 2f
3949 add %i0, 64, %i0
3950 ba,a,pt %ncc, copyin_seg1
3951 0:
3952 FALIGN_D18
3953 stda %d48, [%i0]ASI_BLK_P
3954 add %i0, 64, %i0
3955 membar #Sync
3956 FALIGN_D34
3957 stda %d48, [%i0]ASI_BLK_P
3958 ba,pt %ncc, copyin_blkd2
3959 add %i0, 64, %i0
3960
3961 1:
3962 FALIGN_D34
3963 stda %d48, [%i0]ASI_BLK_P
3964 add %i0, 64, %i0
3965 membar #Sync
3966 FALIGN_D2
3967 stda %d48, [%i0]ASI_BLK_P
3968 ba,pt %ncc, copyin_blkd18
3969 add %i0, 64, %i0
3970
3971 2:
3972 FALIGN_D2
3973 stda %d48, [%i0]ASI_BLK_P
3974 add %i0, 64, %i0
3975 membar #Sync
3976 FALIGN_D18
3977 stda %d48, [%i0]ASI_BLK_P
3978 ba,pt %ncc, copyin_blkd34
3979 add %i0, 64, %i0
3980 copyin_seg2:
3981 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3982 FALIGN_D4
3983 ldda [%l7]ASI_BLK_AIUS, %d0
3984 stda %d48, [%i0]ASI_BLK_P
3985 add %l7, 64, %l7
3986 subcc %i3, 64, %i3
3987 bz,pn %ncc, 0f
3988 add %i0, 64, %i0
3989 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3990 FALIGN_D20
3991 ldda [%l7]ASI_BLK_AIUS, %d16
3992 stda %d48, [%i0]ASI_BLK_P
3993 add %l7, 64, %l7
3994 subcc %i3, 64, %i3
3995 bz,pn %ncc, 1f
3996 add %i0, 64, %i0
3997 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3998 FALIGN_D36
3999 ldda [%l7]ASI_BLK_AIUS, %d32
4000 stda %d48, [%i0]ASI_BLK_P
4001 add %l7, 64, %l7
4002 subcc %i3, 64, %i3
4003 bz,pn %ncc, 2f
4004 add %i0, 64, %i0
4005 ba,a,pt %ncc, copyin_seg2
4006
4007 0:
4008 FALIGN_D20
4009 stda %d48, [%i0]ASI_BLK_P
4010 add %i0, 64, %i0
4011 membar #Sync
4012 FALIGN_D36
4013 stda %d48, [%i0]ASI_BLK_P
4014 ba,pt %ncc, copyin_blkd4
4015 add %i0, 64, %i0
4016
4017 1:
4018 FALIGN_D36
4019 stda %d48, [%i0]ASI_BLK_P
4020 add %i0, 64, %i0
4021 membar #Sync
4022 FALIGN_D4
4023 stda %d48, [%i0]ASI_BLK_P
4024 ba,pt %ncc, copyin_blkd20
4025 add %i0, 64, %i0
4026
4027 2:
4028 FALIGN_D4
4029 stda %d48, [%i0]ASI_BLK_P
4030 add %i0, 64, %i0
4031 membar #Sync
4032 FALIGN_D20
4033 stda %d48, [%i0]ASI_BLK_P
4034 ba,pt %ncc, copyin_blkd36
4035 add %i0, 64, %i0
4036
4037 copyin_seg3:
4038 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4039 FALIGN_D6
4040 ldda [%l7]ASI_BLK_AIUS, %d0
4041 stda %d48, [%i0]ASI_BLK_P
4042 add %l7, 64, %l7
4043 subcc %i3, 64, %i3
4044 bz,pn %ncc, 0f
4045 add %i0, 64, %i0
4046 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4047 FALIGN_D22
4048 ldda [%l7]ASI_BLK_AIUS, %d16
4049 stda %d48, [%i0]ASI_BLK_P
4050 add %l7, 64, %l7
4051 subcc %i3, 64, %i3
4052 bz,pn %ncc, 1f
4053 add %i0, 64, %i0
4054 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4055 FALIGN_D38
4056 ldda [%l7]ASI_BLK_AIUS, %d32
4057 stda %d48, [%i0]ASI_BLK_P
4058 add %l7, 64, %l7
4059 subcc %i3, 64, %i3
4060 bz,pn %ncc, 2f
4061 add %i0, 64, %i0
4062 ba,a,pt %ncc, copyin_seg3
4063
4064 0:
4065 FALIGN_D22
4066 stda %d48, [%i0]ASI_BLK_P
4067 add %i0, 64, %i0
4068 membar #Sync
4069 FALIGN_D38
4070 stda %d48, [%i0]ASI_BLK_P
4071 ba,pt %ncc, copyin_blkd6
4072 add %i0, 64, %i0
4073
4074 1:
4075 FALIGN_D38
4076 stda %d48, [%i0]ASI_BLK_P
4077 add %i0, 64, %i0
4078 membar #Sync
4079 FALIGN_D6
4080 stda %d48, [%i0]ASI_BLK_P
4081 ba,pt %ncc, copyin_blkd22
4082 add %i0, 64, %i0
4083
4084 2:
4085 FALIGN_D6
4086 stda %d48, [%i0]ASI_BLK_P
4087 add %i0, 64, %i0
4088 membar #Sync
4089 FALIGN_D22
4090 stda %d48, [%i0]ASI_BLK_P
4091 ba,pt %ncc, copyin_blkd38
4092 add %i0, 64, %i0
4093
4094 copyin_seg4:
4095 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4096 FALIGN_D8
4097 ldda [%l7]ASI_BLK_AIUS, %d0
4098 stda %d48, [%i0]ASI_BLK_P
4099 add %l7, 64, %l7
4100 subcc %i3, 64, %i3
4101 bz,pn %ncc, 0f
4102 add %i0, 64, %i0
4103 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4104 FALIGN_D24
4105 ldda [%l7]ASI_BLK_AIUS, %d16
4106 stda %d48, [%i0]ASI_BLK_P
4107 add %l7, 64, %l7
4108 subcc %i3, 64, %i3
4109 bz,pn %ncc, 1f
4110 add %i0, 64, %i0
4111 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4112 FALIGN_D40
4113 ldda [%l7]ASI_BLK_AIUS, %d32
4114 stda %d48, [%i0]ASI_BLK_P
4115 add %l7, 64, %l7
4116 subcc %i3, 64, %i3
4117 bz,pn %ncc, 2f
4118 add %i0, 64, %i0
4119 ba,a,pt %ncc, copyin_seg4
4120
4121 0:
4122 FALIGN_D24
4123 stda %d48, [%i0]ASI_BLK_P
4124 add %i0, 64, %i0
4125 membar #Sync
4126 FALIGN_D40
4127 stda %d48, [%i0]ASI_BLK_P
4128 ba,pt %ncc, copyin_blkd8
4129 add %i0, 64, %i0
4130
4131 1:
4132 FALIGN_D40
4133 stda %d48, [%i0]ASI_BLK_P
4134 add %i0, 64, %i0
4135 membar #Sync
4136 FALIGN_D8
4137 stda %d48, [%i0]ASI_BLK_P
4138 ba,pt %ncc, copyin_blkd24
4139 add %i0, 64, %i0
4140
4141 2:
4142 FALIGN_D8
4143 stda %d48, [%i0]ASI_BLK_P
4144 add %i0, 64, %i0
4145 membar #Sync
4146 FALIGN_D24
4147 stda %d48, [%i0]ASI_BLK_P
4148 ba,pt %ncc, copyin_blkd40
4149 add %i0, 64, %i0
4150
4151 copyin_seg5:
4152 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4153 FALIGN_D10
4154 ldda [%l7]ASI_BLK_AIUS, %d0
4155 stda %d48, [%i0]ASI_BLK_P
4156 add %l7, 64, %l7
4157 subcc %i3, 64, %i3
4158 bz,pn %ncc, 0f
4159 add %i0, 64, %i0
4160 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4161 FALIGN_D26
4162 ldda [%l7]ASI_BLK_AIUS, %d16
4163 stda %d48, [%i0]ASI_BLK_P
4164 add %l7, 64, %l7
4165 subcc %i3, 64, %i3
4166 bz,pn %ncc, 1f
4167 add %i0, 64, %i0
4168 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4169 FALIGN_D42
4170 ldda [%l7]ASI_BLK_AIUS, %d32
4171 stda %d48, [%i0]ASI_BLK_P
4172 add %l7, 64, %l7
4173 subcc %i3, 64, %i3
4174 bz,pn %ncc, 2f
4175 add %i0, 64, %i0
4176 ba,a,pt %ncc, copyin_seg5
4177
4178 0:
4179 FALIGN_D26
4180 stda %d48, [%i0]ASI_BLK_P
4181 add %i0, 64, %i0
4182 membar #Sync
4183 FALIGN_D42
4184 stda %d48, [%i0]ASI_BLK_P
4185 ba,pt %ncc, copyin_blkd10
4186 add %i0, 64, %i0
4187
4188 1:
4189 FALIGN_D42
4190 stda %d48, [%i0]ASI_BLK_P
4191 add %i0, 64, %i0
4192 membar #Sync
4193 FALIGN_D10
4194 stda %d48, [%i0]ASI_BLK_P
4195 ba,pt %ncc, copyin_blkd26
4196 add %i0, 64, %i0
4197
4198 2:
4199 FALIGN_D10
4200 stda %d48, [%i0]ASI_BLK_P
4201 add %i0, 64, %i0
4202 membar #Sync
4203 FALIGN_D26
4204 stda %d48, [%i0]ASI_BLK_P
4205 ba,pt %ncc, copyin_blkd42
4206 add %i0, 64, %i0
4207
4208 copyin_seg6:
4209 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4210 FALIGN_D12
4211 ldda [%l7]ASI_BLK_AIUS, %d0
4212 stda %d48, [%i0]ASI_BLK_P
4213 add %l7, 64, %l7
4214 subcc %i3, 64, %i3
4215 bz,pn %ncc, 0f
4216 add %i0, 64, %i0
4217 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4218 FALIGN_D28
4219 ldda [%l7]ASI_BLK_AIUS, %d16
4220 stda %d48, [%i0]ASI_BLK_P
4221 add %l7, 64, %l7
4222 subcc %i3, 64, %i3
4223 bz,pn %ncc, 1f
4224 add %i0, 64, %i0
4225 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4226 FALIGN_D44
4227 ldda [%l7]ASI_BLK_AIUS, %d32
4228 stda %d48, [%i0]ASI_BLK_P
4229 add %l7, 64, %l7
4230 subcc %i3, 64, %i3
4231 bz,pn %ncc, 2f
4232 add %i0, 64, %i0
4233 ba,a,pt %ncc, copyin_seg6
4234
4235 0:
4236 FALIGN_D28
4237 stda %d48, [%i0]ASI_BLK_P
4238 add %i0, 64, %i0
4239 membar #Sync
4240 FALIGN_D44
4241 stda %d48, [%i0]ASI_BLK_P
4242 ba,pt %ncc, copyin_blkd12
4243 add %i0, 64, %i0
4244
4245 1:
4246 FALIGN_D44
4247 stda %d48, [%i0]ASI_BLK_P
4248 add %i0, 64, %i0
4249 membar #Sync
4250 FALIGN_D12
4251 stda %d48, [%i0]ASI_BLK_P
4252 ba,pt %ncc, copyin_blkd28
4253 add %i0, 64, %i0
4254
4255 2:
4256 FALIGN_D12
4257 stda %d48, [%i0]ASI_BLK_P
4258 add %i0, 64, %i0
4259 membar #Sync
4260 FALIGN_D28
4261 stda %d48, [%i0]ASI_BLK_P
4262 ba,pt %ncc, copyin_blkd44
4263 add %i0, 64, %i0
4264
4265 copyin_seg7:
4266 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4267 FALIGN_D14
4268 ldda [%l7]ASI_BLK_AIUS, %d0
4269 stda %d48, [%i0]ASI_BLK_P
4270 add %l7, 64, %l7
4271 subcc %i3, 64, %i3
4272 bz,pn %ncc, 0f
4273 add %i0, 64, %i0
4274 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4275 FALIGN_D30
4276 ldda [%l7]ASI_BLK_AIUS, %d16
4277 stda %d48, [%i0]ASI_BLK_P
4278 add %l7, 64, %l7
4279 subcc %i3, 64, %i3
4280 bz,pn %ncc, 1f
4281 add %i0, 64, %i0
4282 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4283 FALIGN_D46
4284 ldda [%l7]ASI_BLK_AIUS, %d32
4285 stda %d48, [%i0]ASI_BLK_P
4286 add %l7, 64, %l7
4287 subcc %i3, 64, %i3
4288 bz,pn %ncc, 2f
4289 add %i0, 64, %i0
4290 ba,a,pt %ncc, copyin_seg7
4291
4292 0:
4293 FALIGN_D30
4294 stda %d48, [%i0]ASI_BLK_P
4295 add %i0, 64, %i0
4296 membar #Sync
4297 FALIGN_D46
4298 stda %d48, [%i0]ASI_BLK_P
4299 ba,pt %ncc, copyin_blkd14
4300 add %i0, 64, %i0
4301
4302 1:
4303 FALIGN_D46
4304 stda %d48, [%i0]ASI_BLK_P
4305 add %i0, 64, %i0
4306 membar #Sync
4307 FALIGN_D14
4308 stda %d48, [%i0]ASI_BLK_P
4309 ba,pt %ncc, copyin_blkd30
4310 add %i0, 64, %i0
4311
4312 2:
4313 FALIGN_D14
4314 stda %d48, [%i0]ASI_BLK_P
4315 add %i0, 64, %i0
4316 membar #Sync
4317 FALIGN_D30
4318 stda %d48, [%i0]ASI_BLK_P
4319 ba,pt %ncc, copyin_blkd46
4320 add %i0, 64, %i0
4321
4322
4323 !
4324 ! dribble out the last partial block
4325 !
4326 copyin_blkd0:
4327 subcc %i4, 8, %i4
4328 blu,pn %ncc, copyin_blkdone
4329 faligndata %d0, %d2, %d48
4330 std %d48, [%i0]
4331 add %i0, 8, %i0
4332 copyin_blkd2:
4333 subcc %i4, 8, %i4
4334 blu,pn %ncc, copyin_blkdone
4335 faligndata %d2, %d4, %d48
4336 std %d48, [%i0]
4337 add %i0, 8, %i0
4338 copyin_blkd4:
4339 subcc %i4, 8, %i4
4340 blu,pn %ncc, copyin_blkdone
4341 faligndata %d4, %d6, %d48
4342 std %d48, [%i0]
4343 add %i0, 8, %i0
4344 copyin_blkd6:
4345 subcc %i4, 8, %i4
4346 blu,pn %ncc, copyin_blkdone
4347 faligndata %d6, %d8, %d48
4348 std %d48, [%i0]
4349 add %i0, 8, %i0
4350 copyin_blkd8:
4351 subcc %i4, 8, %i4
4352 blu,pn %ncc, copyin_blkdone
4353 faligndata %d8, %d10, %d48
4354 std %d48, [%i0]
4355 add %i0, 8, %i0
4356 copyin_blkd10:
4357 subcc %i4, 8, %i4
4358 blu,pn %ncc, copyin_blkdone
4359 faligndata %d10, %d12, %d48
4360 std %d48, [%i0]
4361 add %i0, 8, %i0
4362 copyin_blkd12:
4363 subcc %i4, 8, %i4
4364 blu,pn %ncc, copyin_blkdone
4365 faligndata %d12, %d14, %d48
4366 std %d48, [%i0]
4367 add %i0, 8, %i0
4368 copyin_blkd14:
4369 subcc %i4, 8, %i4
4370 blu,pn %ncc, copyin_blkdone
4371 fsrc1 %d14, %d0
4372 ba,a,pt %ncc, copyin_blkleft
4373
4374 copyin_blkd16:
4375 subcc %i4, 8, %i4
4376 blu,pn %ncc, copyin_blkdone
4377 faligndata %d16, %d18, %d48
4378 std %d48, [%i0]
4379 add %i0, 8, %i0
4380 copyin_blkd18:
4381 subcc %i4, 8, %i4
4382 blu,pn %ncc, copyin_blkdone
4383 faligndata %d18, %d20, %d48
4384 std %d48, [%i0]
4385 add %i0, 8, %i0
4386 copyin_blkd20:
4387 subcc %i4, 8, %i4
4388 blu,pn %ncc, copyin_blkdone
4389 faligndata %d20, %d22, %d48
4390 std %d48, [%i0]
4391 add %i0, 8, %i0
4392 copyin_blkd22:
4393 subcc %i4, 8, %i4
4394 blu,pn %ncc, copyin_blkdone
4395 faligndata %d22, %d24, %d48
4396 std %d48, [%i0]
4397 add %i0, 8, %i0
4398 copyin_blkd24:
4399 subcc %i4, 8, %i4
4400 blu,pn %ncc, copyin_blkdone
4401 faligndata %d24, %d26, %d48
4402 std %d48, [%i0]
4403 add %i0, 8, %i0
4404 copyin_blkd26:
4405 subcc %i4, 8, %i4
4406 blu,pn %ncc, copyin_blkdone
4407 faligndata %d26, %d28, %d48
4408 std %d48, [%i0]
4409 add %i0, 8, %i0
4410 copyin_blkd28:
4411 subcc %i4, 8, %i4
4412 blu,pn %ncc, copyin_blkdone
4413 faligndata %d28, %d30, %d48
4414 std %d48, [%i0]
4415 add %i0, 8, %i0
4416 copyin_blkd30:
4417 subcc %i4, 8, %i4
4418 blu,pn %ncc, copyin_blkdone
4419 fsrc1 %d30, %d0
4420 ba,a,pt %ncc, copyin_blkleft
4421 copyin_blkd32:
4422 subcc %i4, 8, %i4
4423 blu,pn %ncc, copyin_blkdone
4424 faligndata %d32, %d34, %d48
4425 std %d48, [%i0]
4426 add %i0, 8, %i0
4427 copyin_blkd34:
4428 subcc %i4, 8, %i4
4429 blu,pn %ncc, copyin_blkdone
4430 faligndata %d34, %d36, %d48
4431 std %d48, [%i0]
4432 add %i0, 8, %i0
4433 copyin_blkd36:
4434 subcc %i4, 8, %i4
4435 blu,pn %ncc, copyin_blkdone
4436 faligndata %d36, %d38, %d48
4437 std %d48, [%i0]
4438 add %i0, 8, %i0
4439 copyin_blkd38:
4440 subcc %i4, 8, %i4
4441 blu,pn %ncc, copyin_blkdone
4442 faligndata %d38, %d40, %d48
4443 std %d48, [%i0]
4444 add %i0, 8, %i0
4445 copyin_blkd40:
4446 subcc %i4, 8, %i4
4447 blu,pn %ncc, copyin_blkdone
4448 faligndata %d40, %d42, %d48
4449 std %d48, [%i0]
4450 add %i0, 8, %i0
4451 copyin_blkd42:
4452 subcc %i4, 8, %i4
4453 blu,pn %ncc, copyin_blkdone
4454 faligndata %d42, %d44, %d48
4455 std %d48, [%i0]
4456 add %i0, 8, %i0
4457 copyin_blkd44:
4458 subcc %i4, 8, %i4
4459 blu,pn %ncc, copyin_blkdone
4460 faligndata %d44, %d46, %d48
4461 std %d48, [%i0]
4462 add %i0, 8, %i0
4463 copyin_blkd46:
4464 subcc %i4, 8, %i4
4465 blu,pn %ncc, copyin_blkdone
4466 fsrc1 %d46, %d0
4467
4468 copyin_blkleft:
4469 1:
4470 ldda [%l7]ASI_USER, %d2
4471 add %l7, 8, %l7
4472 subcc %i4, 8, %i4
4473 faligndata %d0, %d2, %d8
4474 std %d8, [%i0]
4475 blu,pn %ncc, copyin_blkdone
4476 add %i0, 8, %i0
4477 ldda [%l7]ASI_USER, %d0
4478 add %l7, 8, %l7
4479 subcc %i4, 8, %i4
4480 faligndata %d2, %d0, %d8
4481 std %d8, [%i0]
4482 bgeu,pt %ncc, 1b
4483 add %i0, 8, %i0
4484
4485 copyin_blkdone:
4486 tst %i2
4487 bz,pt %ncc, .copyin_exit
4488 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0
4489
4490 7: lduba [%i1]ASI_USER, %i4
4491 inc %i1
4492 inc %i0
4493 deccc %i2
4494 bgu %ncc, 7b
4495 stb %i4, [%i0 - 1]
4496
4497 .copyin_exit:
4498 membar #StoreLoad|#StoreStore
4499 btst FPUSED_FLAG, SAVED_LOFAULT
4500 bz %icc, 1f
4501 nop
4502
4503 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
4504 wr %o2, 0, %gsr
4505
4506 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
4507 btst FPRS_FEF, %o3
4508 bz %icc, 4f
4509 nop
4510
4511 ! restore fpregs from stack
4512 membar #Sync
4513 add %fp, STACK_BIAS - 257, %o2
4514 and %o2, -64, %o2
4515 ldda [%o2]ASI_BLK_P, %d0
4516 add %o2, 64, %o2
4517 ldda [%o2]ASI_BLK_P, %d16
4518 add %o2, 64, %o2
4519 ldda [%o2]ASI_BLK_P, %d32
4520 add %o2, 64, %o2
4521 ldda [%o2]ASI_BLK_P, %d48
4522 membar #Sync
4523
4524 ba,pt %ncc, 1f
4525 wr %o3, 0, %fprs ! restore fprs
4526
4527 4:
4528 FZERO ! zero all of the fpregs
4529 wr %o3, 0, %fprs ! restore fprs
4530
4531 1:
4532 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
4533 membar #Sync ! sync error barrier
4534 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
4535 ret
4536 restore %g0, 0, %o0
4537 .copyin_err:
4538 ldn [THREAD_REG + T_COPYOPS], %o4
4539 brz %o4, 2f
4540 nop
4541 ldn [%o4 + CP_COPYIN], %g2
4542 jmp %g2
4543 nop
4544 2:
4545 retl
4546 mov -1, %o0
4547 SET_SIZE(copyin)
4548
4549 ENTRY(xcopyin)
4550 sethi %hi(.xcopyin_err), REAL_LOFAULT
4551 b .do_copyin
4552 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
4553 .xcopyin_err:
4554 ldn [THREAD_REG + T_COPYOPS], %o4
4555 brz %o4, 2f
4556 nop
4557 ldn [%o4 + CP_XCOPYIN], %g2
4558 jmp %g2
4559 nop
4560 2:
4561 retl
4562 mov %g1, %o0
4563 SET_SIZE(xcopyin)
4564
4565 ENTRY(xcopyin_little)
4566 sethi %hi(.little_err), %o4
4567 ldn [THREAD_REG + T_LOFAULT], %o5
4568 or %o4, %lo(.little_err), %o4
4569 membar #Sync ! sync error barrier
4570 stn %o4, [THREAD_REG + T_LOFAULT]
4571
4572 subcc %g0, %o2, %o3
4573 add %o0, %o2, %o0
4574 bz,pn %ncc, 2f ! check for zero bytes
4575 sub %o2, 1, %o4
4576 add %o0, %o4, %o0 ! start w/last byte
4577 add %o1, %o2, %o1
4578 lduba [%o0+%o3]ASI_AIUSL, %o4
4579
4580 1: stb %o4, [%o1+%o3]
4581 inccc %o3
4582 sub %o0, 2, %o0 ! get next byte
4583 bcc,a,pt %ncc, 1b
4584 lduba [%o0+%o3]ASI_AIUSL, %o4
4585
4586 2: membar #Sync ! sync error barrier
4587 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
4588 retl
4589 mov %g0, %o0 ! return (0)
4590
4591 .little_err:
4592 membar #Sync ! sync error barrier
4593 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
4594 retl
4595 mov %g1, %o0
4596 SET_SIZE(xcopyin_little)
4597
4598
4599 /*
4600 * Copy a block of storage - must not overlap (from + len <= to).
4601 * No fault handler installed (to be called under on_fault())
4602 */
4603
4604 ENTRY(copyin_noerr)
4605 sethi %hi(.copyio_noerr), REAL_LOFAULT
4606 b .do_copyin
4607 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
4608 .copyio_noerr:
4609 jmp SAVED_LOFAULT
4610 nop
4611 SET_SIZE(copyin_noerr)
4612
4613 /*
4614 * Copy a block of storage - must not overlap (from + len <= to).
4615 * No fault handler installed (to be called under on_fault())
4616 */
4617
4618 ENTRY(copyout_noerr)
4619 sethi %hi(.copyio_noerr), REAL_LOFAULT
4620 b .do_copyout
4621 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
4622 SET_SIZE(copyout_noerr)
4623
4624 .align 4
4625 DGDEF(use_hw_bcopy)
4626 .word 1
4627 DGDEF(use_hw_copyio)
4628 .word 1
4629 DGDEF(use_hw_bzero)
4630 .word 1
4631 DGDEF(hw_copy_limit_1)
4632 .word 0
4633 DGDEF(hw_copy_limit_2)
4634 .word 0
4635 DGDEF(hw_copy_limit_4)
4636 .word 0
4637 DGDEF(hw_copy_limit_8)
4638 .word 0
4639
4640 .align 64
4641 .section ".text"
4642
4643
4644 /*
4645 * hwblkclr - clears block-aligned, block-multiple-sized regions that are
4646 * longer than 256 bytes in length using spitfire's block stores. If
4647 * the criteria for using this routine are not met then it calls bzero
4648 * and returns 1. Otherwise 0 is returned indicating success.
4649 * Caller is responsible for ensuring use_hw_bzero is true and that
4650 * kpreempt_disable() has been called.
4651 */
4652 ! %i0 - start address
4653 ! %i1 - length of region (multiple of 64)
4654 ! %l0 - saved fprs
4655 ! %l1 - pointer to saved %d0 block
4656 ! %l2 - saved curthread->t_lwp
4657
4658 ENTRY(hwblkclr)
4659 ! get another window w/space for one aligned block of saved fpregs
4660 save %sp, -SA(MINFRAME + 2*64), %sp
4661
4662 ! Must be block-aligned
4663 andcc %i0, (64-1), %g0
4664 bnz,pn %ncc, 1f
4665 nop
4666
4667 ! ... and must be 256 bytes or more
4668 cmp %i1, 256
4669 blu,pn %ncc, 1f
4670 nop
4671
4672 ! ... and length must be a multiple of 64
4673 andcc %i1, (64-1), %g0
4674 bz,pn %ncc, 2f
4675 nop
4676
4677 1: ! punt, call bzero but notify the caller that bzero was used
4678 mov %i0, %o0
4679 call bzero
4680 mov %i1, %o1
4681 ret
4682 restore %g0, 1, %o0 ! return (1) - did not use block operations
4683
4684 2: rd %fprs, %l0 ! check for unused fp
4685 btst FPRS_FEF, %l0
4686 bz 1f
4687 nop
4688
4689 ! save in-use fpregs on stack
4690 membar #Sync
4691 add %fp, STACK_BIAS - 65, %l1
4692 and %l1, -64, %l1
4693 stda %d0, [%l1]ASI_BLK_P
4694
4695 1: membar #StoreStore|#StoreLoad|#LoadStore
4696 wr %g0, FPRS_FEF, %fprs
4697 wr %g0, ASI_BLK_P, %asi
4698
4699 ! Clear block
4700 fzero %d0
4701 fzero %d2
4702 fzero %d4
4703 fzero %d6
4704 fzero %d8
4705 fzero %d10
4706 fzero %d12
4707 fzero %d14
4708
4709 mov 256, %i3
4710 ba .pz_doblock
4711 nop
4712
4713 .pz_blkstart:
4714 ! stda %d0, [%i0+192]%asi ! in dly slot of branch that got us here
4715 stda %d0, [%i0+128]%asi
4716 stda %d0, [%i0+64]%asi
4717 stda %d0, [%i0]%asi
4718 .pz_zinst:
4719 add %i0, %i3, %i0
4720 sub %i1, %i3, %i1
4721 .pz_doblock:
4722 cmp %i1, 256
4723 bgeu,a %ncc, .pz_blkstart
4724 stda %d0, [%i0+192]%asi
4725
4726 cmp %i1, 64
4727 blu %ncc, .pz_finish
4728
4729 andn %i1, (64-1), %i3
4730 srl %i3, 4, %i2 ! using blocks, 1 instr / 16 words
4731 set .pz_zinst, %i4
4732 sub %i4, %i2, %i4
4733 jmp %i4
4734 nop
4735
4736 .pz_finish:
4737 membar #Sync
4738 btst FPRS_FEF, %l0
4739 bz,a .pz_finished
4740 wr %l0, 0, %fprs ! restore fprs
4741
4742 ! restore fpregs from stack
4743 ldda [%l1]ASI_BLK_P, %d0
4744 membar #Sync
4745 wr %l0, 0, %fprs ! restore fprs
4746
4747 .pz_finished:
4748 ret
4749 restore %g0, 0, %o0 ! return (bzero or not)
4750 SET_SIZE(hwblkclr)
4751
4752 /*
4753 * Copy 32 bytes of data from src (%o0) to dst (%o1)
4754 * using physical addresses.
4755 */
4756 ENTRY_NP(hw_pa_bcopy32)
4757 rdpr %pstate, %g1
4758 andn %g1, PSTATE_IE, %g2
4759 wrpr %g0, %g2, %pstate
4760
4761 ldxa [%o0]ASI_MEM, %o2
4762 add %o0, 8, %o0
4763 ldxa [%o0]ASI_MEM, %o3
4764 add %o0, 8, %o0
4765 ldxa [%o0]ASI_MEM, %o4
4766 add %o0, 8, %o0
4767 ldxa [%o0]ASI_MEM, %o5
4768 stxa %o2, [%o1]ASI_MEM
4769 add %o1, 8, %o1
4770 stxa %o3, [%o1]ASI_MEM
4771 add %o1, 8, %o1
4772 stxa %o4, [%o1]ASI_MEM
4773 add %o1, 8, %o1
4774 stxa %o5, [%o1]ASI_MEM
4775
4776 membar #Sync
4777 retl
4778 wrpr %g0, %g1, %pstate
4779 SET_SIZE(hw_pa_bcopy32)
--- EOF ---