1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <sys/param.h>
30 #include <sys/errno.h>
31 #include <sys/asm_linkage.h>
32 #include <sys/vtrace.h>
33 #include <sys/machthread.h>
34 #include <sys/clock.h>
35 #include <sys/asi.h>
36 #include <sys/fsr.h>
37 #include <sys/privregs.h>
38
39 #if !defined(lint)
40 #include "assym.h"
41 #endif /* lint */
42
43
44 /*
45 * Pseudo-code to aid in understanding the control flow of the
46 * bcopy routine.
47 *
48 * On entry to bcopy:
49 *
50 * %l6 = curthread->t_lofault;
51 * used_block_copy = FALSE; ! %l6 |= 1
52 * if (%l6 != NULL) {
53 * curthread->t_lofault = .copyerr;
54 * caller_error_handler = TRUE ! %l6 |= 2
55 * }
56 *
57 * if (length < VIS_COPY)
58 * goto regular_copy;
59 *
60 * if (!use_vis)
61 * goto_regular_copy;
62 *
63 * if (curthread->t_lwp == NULL) {
64 * ! Kernel threads do not have pcb's in which to store
65 * ! the floating point state, disallow preemption during
66 * ! the copy.
67 * kpreempt_disable(curthread);
68 * }
69 *
70 * old_fprs = %fprs;
71 * old_gsr = %gsr;
72 * if (%fprs.fef) {
73 * ! If we need to save 4 blocks of fpregs then make sure
74 * ! the length is still appropriate for that extra overhead.
75 * if (length < (large_length + (64 * 4))) {
76 * if (curthread->t_lwp == NULL)
77 * kpreempt_enable(curthread);
78 * goto regular_copy;
79 * }
80 * %fprs.fef = 1;
81 * save current fpregs on stack using blockstore
82 * } else {
83 * %fprs.fef = 1;
84 * }
85 *
86 * used_block_copy = 1; ! %l6 |= 1
87 * do_blockcopy_here;
88 *
89 * In lofault handler:
90 * curthread->t_lofault = .copyerr2;
91 * Continue on with the normal exit handler
92 *
93 * On exit:
94 * call_kpreempt = 0;
95 * if (used_block_copy) { ! %l6 & 1
96 * %gsr = old_gsr;
97 * if (old_fprs & FPRS_FEF)
98 * restore fpregs from stack using blockload
99 * else
100 * zero fpregs
101 * %fprs = old_fprs;
102 * if (curthread->t_lwp == NULL) {
103 * kpreempt_enable(curthread);
104 * call_kpreempt = 1;
105 * }
106 * }
107 * curthread->t_lofault = (%l6 & ~3);
108 * if (call_kpreempt)
109 * kpreempt(%pil);
110 * return (0)
111 *
112 * In second lofault handler (.copyerr2):
113 * We've tried to restore fp state from the stack and failed. To
114 * prevent from returning with a corrupted fp state, we will panic.
115 */
116
117 /*
118 * Notes on preserving existing fp state:
119 *
120 * When a copyOP decides to use fp we may have to preserve existing
121 * floating point state. It is not the caller's state that we need to
122 * preserve - the rest of the kernel does not use fp and, anyway, fp
123 * registers are volatile across a call. Some examples:
124 *
125 * - userland has fp state and is interrupted (device interrupt
126 * or trap) and within the interrupt/trap handling we use
127 * bcopy()
128 * - another (higher level) interrupt or trap handler uses bcopy
129 * while a bcopy from an earlier interrupt is still active
130 * - an asynchronous error trap occurs while fp state exists (in
131 * userland or in kernel copy) and the tl0 component of the handling
132 * uses bcopy
133 * - a user process with fp state incurs a copy-on-write fault and
134 * hwblkpagecopy always uses fp
135 *
136 * We therefore need a per-call place in which to preserve fp state -
137 * using our stack is ideal (and since fp copy cannot be leaf optimized
138 * because of calls it makes, this is no hardship).
139 *
140 * To make sure that floating point state is always saved and restored
141 * correctly, the following "big rules" must be followed when the floating
142 * point registers will be used:
143 *
144 * 1. %l6 always holds the caller's lofault handler. Also in this register,
145 * Bit 1 (FPUSED_FLAG) indicates that the floating point registers are in
146 * use. Bit 2 (BCOPY_FLAG) indicates that the call was to bcopy.
147 *
148 * 2. The FPUSED flag indicates that all FP state has been successfully stored
149 * on the stack. It should not be set until this save has been completed.
150 *
151 * 3. The FPUSED flag should not be cleared on exit until all FP state has
152 * been restored from the stack. If an error occurs while restoring
153 * data from the stack, the error handler can check this flag to see if
154 * a restore is necessary.
155 *
156 * 4. Code run under the new lofault handler must be kept to a minimum. In
157 * particular, any calls to kpreempt() should not be made until after the
158 * lofault handler has been restored.
159 */
160
161 /*
162 * This shadows sys/machsystm.h which can't be included due to the lack of
163 * _ASM guards in include files it references. Change it here, change it there.
164 */
165 #define VIS_COPY_THRESHOLD 900
166
167 /*
168 * Less then or equal this number of bytes we will always copy byte-for-byte
169 */
170 #define SMALL_LIMIT 7
171
172 /*
173 * Flags set in the lower bits of the t_lofault address:
174 * FPUSED_FLAG: The FP registers were in use and must be restored
175 * BCOPY_FLAG: Set for bcopy calls, cleared for kcopy calls
176 * COPY_FLAGS: Both of the above
177 *
178 * Other flags:
179 * KPREEMPT_FLAG: kpreempt needs to be called
180 */
181 #define FPUSED_FLAG 1
182 #define BCOPY_FLAG 2
183 #define COPY_FLAGS (FPUSED_FLAG | BCOPY_FLAG)
184 #define KPREEMPT_FLAG 4
185
186 /*
187 * Size of stack frame in order to accomodate a 64-byte aligned
188 * floating-point register save area and 2 32-bit temp locations.
189 */
190 #define HWCOPYFRAMESIZE ((64 * 5) + (2 * 4))
191
192 #define SAVED_FPREGS_OFFSET (64 * 5)
193 #define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 4)
194 #define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 4)
195
196 /*
197 * Common macros used by the various versions of the block copy
198 * routines in this file.
199 */
200
201 #define FZERO \
202 fzero %f0 ;\
203 fzero %f2 ;\
204 faddd %f0, %f2, %f4 ;\
205 fmuld %f0, %f2, %f6 ;\
206 faddd %f0, %f2, %f8 ;\
207 fmuld %f0, %f2, %f10 ;\
208 faddd %f0, %f2, %f12 ;\
209 fmuld %f0, %f2, %f14 ;\
210 faddd %f0, %f2, %f16 ;\
211 fmuld %f0, %f2, %f18 ;\
212 faddd %f0, %f2, %f20 ;\
213 fmuld %f0, %f2, %f22 ;\
214 faddd %f0, %f2, %f24 ;\
215 fmuld %f0, %f2, %f26 ;\
216 faddd %f0, %f2, %f28 ;\
217 fmuld %f0, %f2, %f30 ;\
218 faddd %f0, %f2, %f32 ;\
219 fmuld %f0, %f2, %f34 ;\
220 faddd %f0, %f2, %f36 ;\
221 fmuld %f0, %f2, %f38 ;\
222 faddd %f0, %f2, %f40 ;\
223 fmuld %f0, %f2, %f42 ;\
224 faddd %f0, %f2, %f44 ;\
225 fmuld %f0, %f2, %f46 ;\
226 faddd %f0, %f2, %f48 ;\
227 fmuld %f0, %f2, %f50 ;\
228 faddd %f0, %f2, %f52 ;\
229 fmuld %f0, %f2, %f54 ;\
230 faddd %f0, %f2, %f56 ;\
231 fmuld %f0, %f2, %f58 ;\
232 faddd %f0, %f2, %f60 ;\
233 fmuld %f0, %f2, %f62
234
235
236 #define FALIGN_D0 \
237 faligndata %d0, %d2, %d48 ;\
238 faligndata %d2, %d4, %d50 ;\
239 faligndata %d4, %d6, %d52 ;\
240 faligndata %d6, %d8, %d54 ;\
241 faligndata %d8, %d10, %d56 ;\
242 faligndata %d10, %d12, %d58 ;\
243 faligndata %d12, %d14, %d60 ;\
244 faligndata %d14, %d16, %d62
245
246 #define FALIGN_D16 \
247 faligndata %d16, %d18, %d48 ;\
248 faligndata %d18, %d20, %d50 ;\
249 faligndata %d20, %d22, %d52 ;\
250 faligndata %d22, %d24, %d54 ;\
251 faligndata %d24, %d26, %d56 ;\
252 faligndata %d26, %d28, %d58 ;\
253 faligndata %d28, %d30, %d60 ;\
254 faligndata %d30, %d32, %d62
255
256 #define FALIGN_D32 \
257 faligndata %d32, %d34, %d48 ;\
258 faligndata %d34, %d36, %d50 ;\
259 faligndata %d36, %d38, %d52 ;\
260 faligndata %d38, %d40, %d54 ;\
261 faligndata %d40, %d42, %d56 ;\
262 faligndata %d42, %d44, %d58 ;\
263 faligndata %d44, %d46, %d60 ;\
264 faligndata %d46, %d0, %d62
265
266 #define FALIGN_D2 \
267 faligndata %d2, %d4, %d48 ;\
268 faligndata %d4, %d6, %d50 ;\
269 faligndata %d6, %d8, %d52 ;\
270 faligndata %d8, %d10, %d54 ;\
271 faligndata %d10, %d12, %d56 ;\
272 faligndata %d12, %d14, %d58 ;\
273 faligndata %d14, %d16, %d60 ;\
274 faligndata %d16, %d18, %d62
275
276 #define FALIGN_D18 \
277 faligndata %d18, %d20, %d48 ;\
278 faligndata %d20, %d22, %d50 ;\
279 faligndata %d22, %d24, %d52 ;\
280 faligndata %d24, %d26, %d54 ;\
281 faligndata %d26, %d28, %d56 ;\
282 faligndata %d28, %d30, %d58 ;\
283 faligndata %d30, %d32, %d60 ;\
284 faligndata %d32, %d34, %d62
285
286 #define FALIGN_D34 \
287 faligndata %d34, %d36, %d48 ;\
288 faligndata %d36, %d38, %d50 ;\
289 faligndata %d38, %d40, %d52 ;\
290 faligndata %d40, %d42, %d54 ;\
291 faligndata %d42, %d44, %d56 ;\
292 faligndata %d44, %d46, %d58 ;\
293 faligndata %d46, %d0, %d60 ;\
294 faligndata %d0, %d2, %d62
295
296 #define FALIGN_D4 \
297 faligndata %d4, %d6, %d48 ;\
298 faligndata %d6, %d8, %d50 ;\
299 faligndata %d8, %d10, %d52 ;\
300 faligndata %d10, %d12, %d54 ;\
301 faligndata %d12, %d14, %d56 ;\
302 faligndata %d14, %d16, %d58 ;\
303 faligndata %d16, %d18, %d60 ;\
304 faligndata %d18, %d20, %d62
305
306 #define FALIGN_D20 \
307 faligndata %d20, %d22, %d48 ;\
308 faligndata %d22, %d24, %d50 ;\
309 faligndata %d24, %d26, %d52 ;\
310 faligndata %d26, %d28, %d54 ;\
311 faligndata %d28, %d30, %d56 ;\
312 faligndata %d30, %d32, %d58 ;\
313 faligndata %d32, %d34, %d60 ;\
314 faligndata %d34, %d36, %d62
315
316 #define FALIGN_D36 \
317 faligndata %d36, %d38, %d48 ;\
318 faligndata %d38, %d40, %d50 ;\
319 faligndata %d40, %d42, %d52 ;\
320 faligndata %d42, %d44, %d54 ;\
321 faligndata %d44, %d46, %d56 ;\
322 faligndata %d46, %d0, %d58 ;\
323 faligndata %d0, %d2, %d60 ;\
324 faligndata %d2, %d4, %d62
325
326 #define FALIGN_D6 \
327 faligndata %d6, %d8, %d48 ;\
328 faligndata %d8, %d10, %d50 ;\
329 faligndata %d10, %d12, %d52 ;\
330 faligndata %d12, %d14, %d54 ;\
331 faligndata %d14, %d16, %d56 ;\
332 faligndata %d16, %d18, %d58 ;\
333 faligndata %d18, %d20, %d60 ;\
334 faligndata %d20, %d22, %d62
335
336 #define FALIGN_D22 \
337 faligndata %d22, %d24, %d48 ;\
338 faligndata %d24, %d26, %d50 ;\
339 faligndata %d26, %d28, %d52 ;\
340 faligndata %d28, %d30, %d54 ;\
341 faligndata %d30, %d32, %d56 ;\
342 faligndata %d32, %d34, %d58 ;\
343 faligndata %d34, %d36, %d60 ;\
344 faligndata %d36, %d38, %d62
345
346 #define FALIGN_D38 \
347 faligndata %d38, %d40, %d48 ;\
348 faligndata %d40, %d42, %d50 ;\
349 faligndata %d42, %d44, %d52 ;\
350 faligndata %d44, %d46, %d54 ;\
351 faligndata %d46, %d0, %d56 ;\
352 faligndata %d0, %d2, %d58 ;\
353 faligndata %d2, %d4, %d60 ;\
354 faligndata %d4, %d6, %d62
355
356 #define FALIGN_D8 \
357 faligndata %d8, %d10, %d48 ;\
358 faligndata %d10, %d12, %d50 ;\
359 faligndata %d12, %d14, %d52 ;\
360 faligndata %d14, %d16, %d54 ;\
361 faligndata %d16, %d18, %d56 ;\
362 faligndata %d18, %d20, %d58 ;\
363 faligndata %d20, %d22, %d60 ;\
364 faligndata %d22, %d24, %d62
365
366 #define FALIGN_D24 \
367 faligndata %d24, %d26, %d48 ;\
368 faligndata %d26, %d28, %d50 ;\
369 faligndata %d28, %d30, %d52 ;\
370 faligndata %d30, %d32, %d54 ;\
371 faligndata %d32, %d34, %d56 ;\
372 faligndata %d34, %d36, %d58 ;\
373 faligndata %d36, %d38, %d60 ;\
374 faligndata %d38, %d40, %d62
375
376 #define FALIGN_D40 \
377 faligndata %d40, %d42, %d48 ;\
378 faligndata %d42, %d44, %d50 ;\
379 faligndata %d44, %d46, %d52 ;\
380 faligndata %d46, %d0, %d54 ;\
381 faligndata %d0, %d2, %d56 ;\
382 faligndata %d2, %d4, %d58 ;\
383 faligndata %d4, %d6, %d60 ;\
384 faligndata %d6, %d8, %d62
385
386 #define FALIGN_D10 \
387 faligndata %d10, %d12, %d48 ;\
388 faligndata %d12, %d14, %d50 ;\
389 faligndata %d14, %d16, %d52 ;\
390 faligndata %d16, %d18, %d54 ;\
391 faligndata %d18, %d20, %d56 ;\
392 faligndata %d20, %d22, %d58 ;\
393 faligndata %d22, %d24, %d60 ;\
394 faligndata %d24, %d26, %d62
395
396 #define FALIGN_D26 \
397 faligndata %d26, %d28, %d48 ;\
398 faligndata %d28, %d30, %d50 ;\
399 faligndata %d30, %d32, %d52 ;\
400 faligndata %d32, %d34, %d54 ;\
401 faligndata %d34, %d36, %d56 ;\
402 faligndata %d36, %d38, %d58 ;\
403 faligndata %d38, %d40, %d60 ;\
404 faligndata %d40, %d42, %d62
405
406 #define FALIGN_D42 \
407 faligndata %d42, %d44, %d48 ;\
408 faligndata %d44, %d46, %d50 ;\
409 faligndata %d46, %d0, %d52 ;\
410 faligndata %d0, %d2, %d54 ;\
411 faligndata %d2, %d4, %d56 ;\
412 faligndata %d4, %d6, %d58 ;\
413 faligndata %d6, %d8, %d60 ;\
414 faligndata %d8, %d10, %d62
415
416 #define FALIGN_D12 \
417 faligndata %d12, %d14, %d48 ;\
418 faligndata %d14, %d16, %d50 ;\
419 faligndata %d16, %d18, %d52 ;\
420 faligndata %d18, %d20, %d54 ;\
421 faligndata %d20, %d22, %d56 ;\
422 faligndata %d22, %d24, %d58 ;\
423 faligndata %d24, %d26, %d60 ;\
424 faligndata %d26, %d28, %d62
425
426 #define FALIGN_D28 \
427 faligndata %d28, %d30, %d48 ;\
428 faligndata %d30, %d32, %d50 ;\
429 faligndata %d32, %d34, %d52 ;\
430 faligndata %d34, %d36, %d54 ;\
431 faligndata %d36, %d38, %d56 ;\
432 faligndata %d38, %d40, %d58 ;\
433 faligndata %d40, %d42, %d60 ;\
434 faligndata %d42, %d44, %d62
435
436 #define FALIGN_D44 \
437 faligndata %d44, %d46, %d48 ;\
438 faligndata %d46, %d0, %d50 ;\
439 faligndata %d0, %d2, %d52 ;\
440 faligndata %d2, %d4, %d54 ;\
441 faligndata %d4, %d6, %d56 ;\
442 faligndata %d6, %d8, %d58 ;\
443 faligndata %d8, %d10, %d60 ;\
444 faligndata %d10, %d12, %d62
445
446 #define FALIGN_D14 \
447 faligndata %d14, %d16, %d48 ;\
448 faligndata %d16, %d18, %d50 ;\
449 faligndata %d18, %d20, %d52 ;\
450 faligndata %d20, %d22, %d54 ;\
451 faligndata %d22, %d24, %d56 ;\
452 faligndata %d24, %d26, %d58 ;\
453 faligndata %d26, %d28, %d60 ;\
454 faligndata %d28, %d30, %d62
455
456 #define FALIGN_D30 \
457 faligndata %d30, %d32, %d48 ;\
458 faligndata %d32, %d34, %d50 ;\
459 faligndata %d34, %d36, %d52 ;\
460 faligndata %d36, %d38, %d54 ;\
461 faligndata %d38, %d40, %d56 ;\
462 faligndata %d40, %d42, %d58 ;\
463 faligndata %d42, %d44, %d60 ;\
464 faligndata %d44, %d46, %d62
465
466 #define FALIGN_D46 \
467 faligndata %d46, %d0, %d48 ;\
468 faligndata %d0, %d2, %d50 ;\
469 faligndata %d2, %d4, %d52 ;\
470 faligndata %d4, %d6, %d54 ;\
471 faligndata %d6, %d8, %d56 ;\
472 faligndata %d8, %d10, %d58 ;\
473 faligndata %d10, %d12, %d60 ;\
474 faligndata %d12, %d14, %d62
475
476
477 /*
478 * Copy a block of storage, returning an error code if `from' or
479 * `to' takes a kernel pagefault which cannot be resolved.
480 * Returns errno value on pagefault error, 0 if all ok
481 */
482
483
484
485 #if defined(lint)
486
487 /* ARGSUSED */
488 int
489 kcopy(const void *from, void *to, size_t count)
490 { return(0); }
491
492 #else /* lint */
493
494 .seg ".text"
495 .align 4
496
497 ENTRY(kcopy)
498
499 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
500 set .copyerr, %l6 ! copyerr is lofault value
501 ldn [THREAD_REG + T_LOFAULT], %l7 ! save existing handler
502 membar #Sync ! sync error barrier (see copy.s)
503 stn %l6, [THREAD_REG + T_LOFAULT] ! set t_lofault
504 !
505 ! Note that we carefully do *not* flag the setting of
506 ! t_lofault.
507 !
508 ba,pt %ncc, .do_copy ! common code
509 mov %l7, %l6
510
511 /*
512 * We got here because of a fault during kcopy or bcopy if a fault
513 * handler existed when bcopy was called.
514 * Errno value is in %g1.
515 */
516 .copyerr:
517 set .copyerr2, %l1
518 membar #Sync ! sync error barrier
519 stn %l1, [THREAD_REG + T_LOFAULT] ! set t_lofault
520 btst FPUSED_FLAG, %l6
521 bz %icc, 1f
522 and %l6, BCOPY_FLAG, %l1 ! copy flag to %l1
523
524 membar #Sync
525
526 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
527 wr %o2, 0, %gsr
528
529 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
530 btst FPRS_FEF, %o3
531 bz %icc, 4f
532 nop
533
534 ! restore fpregs from stack
535 membar #Sync
536 add %fp, STACK_BIAS - 257, %o2
537 and %o2, -64, %o2
538 ldda [%o2]ASI_BLK_P, %d0
539 add %o2, 64, %o2
540 ldda [%o2]ASI_BLK_P, %d16
541 add %o2, 64, %o2
542 ldda [%o2]ASI_BLK_P, %d32
543 add %o2, 64, %o2
544 ldda [%o2]ASI_BLK_P, %d48
545 membar #Sync
546
547 ba,pt %ncc, 2f
548 wr %o3, 0, %fprs ! restore fprs
549
550 4:
551 FZERO ! zero all of the fpregs
552 wr %o3, 0, %fprs ! restore fprs
553
554 2: ldn [THREAD_REG + T_LWP], %o2
555 tst %o2
556 bnz,pt %ncc, 1f
557 nop
558
559 ldsb [THREAD_REG + T_PREEMPT], %l0
560 deccc %l0
561 bnz,pn %ncc, 1f
562 stb %l0, [THREAD_REG + T_PREEMPT]
563
564 ! Check for a kernel preemption request
565 ldn [THREAD_REG + T_CPU], %l0
566 ldub [%l0 + CPU_KPRUNRUN], %l0
567 tst %l0
568 bnz,a,pt %ncc, 1f ! Need to call kpreempt?
569 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag
570
571 !
572 ! Need to cater for the different expectations of kcopy
573 ! and bcopy. kcopy will *always* set a t_lofault handler
574 ! If it fires, we're expected to just return the error code
575 ! and *not* to invoke any existing error handler. As far as
576 ! bcopy is concerned, we only set t_lofault if there was an
577 ! existing lofault handler. In that case we're expected to
578 ! invoke the previously existing handler after restting the
579 ! t_lofault value.
580 !
581 1:
582 andn %l6, COPY_FLAGS, %l6 ! remove flags from lofault address
583 membar #Sync ! sync error barrier
584 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
585
586 ! call kpreempt if necessary
587 btst KPREEMPT_FLAG, %l1
588 bz,pt %icc, 2f
589 nop
590 call kpreempt
591 rdpr %pil, %o0 ! pass %pil
592 2:
593 btst BCOPY_FLAG, %l1
594 bnz,pn %ncc, 3f
595 nop
596 ret
597 restore %g1, 0, %o0
598
599 3:
600 !
601 ! We're here via bcopy. There *must* have been an error handler
602 ! in place otheerwise we would have died a nasty death already.
603 !
604 jmp %l6 ! goto real handler
605 restore %g0, 0, %o0 ! dispose of copy window
606
607 /*
608 * We got here because of a fault in .copyerr. We can't safely restore fp
609 * state, so we panic.
610 */
611 fp_panic_msg:
612 .asciz "Unable to restore fp state after copy operation"
613
614 .align 4
615 .copyerr2:
616 set fp_panic_msg, %o0
617 call panic
618 nop
619 SET_SIZE(kcopy)
620 #endif /* lint */
621
622
623 /*
624 * Copy a block of storage - must not overlap (from + len <= to).
625 * Registers: l6 - saved t_lofault
626 *
627 * Copy a page of memory.
628 * Assumes double word alignment and a count >= 256.
629 */
630 #if defined(lint)
631
632 /* ARGSUSED */
633 void
634 bcopy(const void *from, void *to, size_t count)
635 {}
636
637 #else /* lint */
638
639 ENTRY(bcopy)
640
641 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
642 ldn [THREAD_REG + T_LOFAULT], %l6 ! save t_lofault
643 tst %l6
644 !
645 ! We've already captured whether t_lofault was zero on entry.
646 ! We need to mark ourselves as being from bcopy since both
647 ! kcopy and bcopy use the same code path. If BCOPY_FLAG is set
648 ! and the saved lofault was zero, we won't reset lofault on
649 ! returning.
650 !
651 or %l6, BCOPY_FLAG, %l6
652 bz,pt %ncc, .do_copy
653 sethi %hi(.copyerr), %o2
654 or %o2, %lo(.copyerr), %o2
655 membar #Sync ! sync error barrier
656 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector
657
658 .do_copy:
659 cmp %i2, 12 ! for small counts
660 blu %ncc, .bytecp ! just copy bytes
661 .empty
662
663 cmp %i2, VIS_COPY_THRESHOLD ! for large counts
664 blu,pt %ncc, .bcb_punt
665 .empty
666
667 !
668 ! Check to see if VIS acceleration is enabled
669 !
670 sethi %hi(use_hw_bcopy), %o2
671 ld [%o2 + %lo(use_hw_bcopy)], %o2
672 tst %o2
673 bz,pn %icc, .bcb_punt
674 nop
675
676 subcc %i1, %i0, %i3
677 bneg,a,pn %ncc, 1f
678 neg %i3
679 1:
680 /*
681 * Compare against 256 since we should be checking block addresses
682 * and (dest & ~63) - (src & ~63) can be 3 blocks even if
683 * src = dest + (64 * 3) + 63.
684 */
685 cmp %i3, 256
686 blu,pn %ncc, .bcb_punt
687 nop
688
689 ldn [THREAD_REG + T_LWP], %o3
690 tst %o3
691 bnz,pt %ncc, 1f
692 nop
693
694 ! kpreempt_disable();
695 ldsb [THREAD_REG + T_PREEMPT], %o2
696 inc %o2
697 stb %o2, [THREAD_REG + T_PREEMPT]
698
699 1:
700 rd %fprs, %o2 ! check for unused fp
701 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
702 btst FPRS_FEF, %o2
703 bz,a %icc, .do_blockcopy
704 wr %g0, FPRS_FEF, %fprs
705
706 .bcb_fpregs_inuse:
707 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger
708 bgeu %ncc, 1f ! if we have to save the fpregs)
709 nop
710
711 tst %o3
712 bnz,pt %ncc, .bcb_punt
713 nop
714
715 ldsb [THREAD_REG + T_PREEMPT], %l0
716 deccc %l0
717 bnz,pn %icc, .bcb_punt
718 stb %l0, [THREAD_REG + T_PREEMPT]
719
720 ! Check for a kernel preemption request
721 ldn [THREAD_REG + T_CPU], %l0
722 ldub [%l0 + CPU_KPRUNRUN], %l0
723 tst %l0
724 bz,pt %icc, .bcb_punt
725 nop
726
727 ! Attempt to preempt
728 call kpreempt
729 rdpr %pil, %o0 ! pass %pil
730
731 ba,pt %ncc, .bcb_punt
732 nop
733
734 1:
735 wr %g0, FPRS_FEF, %fprs
736
737 ! save in-use fpregs on stack
738 membar #Sync
739 add %fp, STACK_BIAS - 257, %o2
740 and %o2, -64, %o2
741 stda %d0, [%o2]ASI_BLK_P
742 add %o2, 64, %o2
743 stda %d16, [%o2]ASI_BLK_P
744 add %o2, 64, %o2
745 stda %d32, [%o2]ASI_BLK_P
746 add %o2, 64, %o2
747 stda %d48, [%o2]ASI_BLK_P
748 membar #Sync
749
750 .do_blockcopy:
751 membar #StoreStore|#StoreLoad|#LoadStore
752
753 rd %gsr, %o2
754 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
755
756 ! Set the lower bit in the saved t_lofault to indicate
757 ! that we need to clear the %fprs register on the way
758 ! out
759 or %l6, FPUSED_FLAG, %l6
760
761 ! Swap src/dst since the code below is memcpy code
762 ! and memcpy/bcopy have different calling sequences
763 mov %i1, %i5
764 mov %i0, %i1
765 mov %i5, %i0
766
767 !!! This code is nearly identical to the version in the sun4u
768 !!! libc_psr. Most bugfixes made to that file should be
769 !!! merged into this routine.
770
771 andcc %i0, 7, %o3
772 bz,pt %ncc, blkcpy
773 sub %o3, 8, %o3
774 neg %o3
775 sub %i2, %o3, %i2
776
777 ! Align Destination on double-word boundary
778
779 2: ldub [%i1], %o4
780 inc %i1
781 inc %i0
782 deccc %o3
783 bgu %ncc, 2b
784 stb %o4, [%i0 - 1]
785 blkcpy:
786 andcc %i0, 63, %i3
787 bz,pn %ncc, blalign ! now block aligned
788 sub %i3, 64, %i3
789 neg %i3 ! bytes till block aligned
790 sub %i2, %i3, %i2 ! update %i2 with new count
791
792 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
793 ! double word copies.
794
795 alignaddr %i1, %g0, %g1
796 ldd [%g1], %d0
797 add %g1, 8, %g1
798 6:
799 ldd [%g1], %d2
800 add %g1, 8, %g1
801 subcc %i3, 8, %i3
802 faligndata %d0, %d2, %d8
803 std %d8, [%i0]
804 add %i1, 8, %i1
805 bz,pn %ncc, blalign
806 add %i0, 8, %i0
807 ldd [%g1], %d0
808 add %g1, 8, %g1
809 subcc %i3, 8, %i3
810 faligndata %d2, %d0, %d8
811 std %d8, [%i0]
812 add %i1, 8, %i1
813 bgu,pn %ncc, 6b
814 add %i0, 8, %i0
815
816 blalign:
817 membar #StoreLoad
818 ! %i2 = total length
819 ! %i3 = blocks (length - 64) / 64
820 ! %i4 = doubles remaining (length - blocks)
821 sub %i2, 64, %i3
822 andn %i3, 63, %i3
823 sub %i2, %i3, %i4
824 andn %i4, 7, %i4
825 sub %i4, 16, %i4
826 sub %i2, %i4, %i2
827 sub %i2, %i3, %i2
828
829 andn %i1, 0x3f, %l7 ! blk aligned address
830 alignaddr %i1, %g0, %g0 ! gen %gsr
831
832 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5
833 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3
834 add %i1, %i4, %i1
835 add %i1, %i3, %i1
836
837 ldda [%l7]ASI_BLK_P, %d0
838 add %l7, 64, %l7
839 ldda [%l7]ASI_BLK_P, %d16
840 add %l7, 64, %l7
841 ldda [%l7]ASI_BLK_P, %d32
842 add %l7, 64, %l7
843 sub %i3, 128, %i3
844
845 ! switch statement to get us to the right 8 byte blk within a
846 ! 64 byte block
847 cmp %i5, 4
848 bgeu,a hlf
849 cmp %i5, 6
850 cmp %i5, 2
851 bgeu,a sqtr
852 nop
853 cmp %i5, 1
854 be,a seg1
855 nop
856 ba,pt %ncc, seg0
857 nop
858 sqtr:
859 be,a seg2
860 nop
861 ba,pt %ncc, seg3
862 nop
863
864 hlf:
865 bgeu,a fqtr
866 nop
867 cmp %i5, 5
868 be,a seg5
869 nop
870 ba,pt %ncc, seg4
871 nop
872 fqtr:
873 be,a seg6
874 nop
875 ba,pt %ncc, seg7
876 nop
877
878
879 seg0:
880 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
881 FALIGN_D0
882 ldda [%l7]ASI_BLK_P, %d0
883 stda %d48, [%i0]ASI_BLK_P
884 add %l7, 64, %l7
885 subcc %i3, 64, %i3
886 bz,pn %ncc, 0f
887 add %i0, 64, %i0
888 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
889 FALIGN_D16
890 ldda [%l7]ASI_BLK_P, %d16
891 stda %d48, [%i0]ASI_BLK_P
892 add %l7, 64, %l7
893 subcc %i3, 64, %i3
894 bz,pn %ncc, 1f
895 add %i0, 64, %i0
896 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
897 FALIGN_D32
898 ldda [%l7]ASI_BLK_P, %d32
899 stda %d48, [%i0]ASI_BLK_P
900 add %l7, 64, %l7
901 subcc %i3, 64, %i3
902 bz,pn %ncc, 2f
903 add %i0, 64, %i0
904 ba,a,pt %ncc, seg0
905
906 0:
907 FALIGN_D16
908 stda %d48, [%i0]ASI_BLK_P
909 add %i0, 64, %i0
910 membar #Sync
911 FALIGN_D32
912 stda %d48, [%i0]ASI_BLK_P
913 ba,pt %ncc, blkd0
914 add %i0, 64, %i0
915
916 1:
917 FALIGN_D32
918 stda %d48, [%i0]ASI_BLK_P
919 add %i0, 64, %i0
920 membar #Sync
921 FALIGN_D0
922 stda %d48, [%i0]ASI_BLK_P
923 ba,pt %ncc, blkd16
924 add %i0, 64, %i0
925
926 2:
927 FALIGN_D0
928 stda %d48, [%i0]ASI_BLK_P
929 add %i0, 64, %i0
930 membar #Sync
931 FALIGN_D16
932 stda %d48, [%i0]ASI_BLK_P
933 ba,pt %ncc, blkd32
934 add %i0, 64, %i0
935
936 seg1:
937 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
938 FALIGN_D2
939 ldda [%l7]ASI_BLK_P, %d0
940 stda %d48, [%i0]ASI_BLK_P
941 add %l7, 64, %l7
942 subcc %i3, 64, %i3
943 bz,pn %ncc, 0f
944 add %i0, 64, %i0
945 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
946 FALIGN_D18
947 ldda [%l7]ASI_BLK_P, %d16
948 stda %d48, [%i0]ASI_BLK_P
949 add %l7, 64, %l7
950 subcc %i3, 64, %i3
951 bz,pn %ncc, 1f
952 add %i0, 64, %i0
953 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
954 FALIGN_D34
955 ldda [%l7]ASI_BLK_P, %d32
956 stda %d48, [%i0]ASI_BLK_P
957 add %l7, 64, %l7
958 subcc %i3, 64, %i3
959 bz,pn %ncc, 2f
960 add %i0, 64, %i0
961 ba,a,pt %ncc, seg1
962 0:
963 FALIGN_D18
964 stda %d48, [%i0]ASI_BLK_P
965 add %i0, 64, %i0
966 membar #Sync
967 FALIGN_D34
968 stda %d48, [%i0]ASI_BLK_P
969 ba,pt %ncc, blkd2
970 add %i0, 64, %i0
971
972 1:
973 FALIGN_D34
974 stda %d48, [%i0]ASI_BLK_P
975 add %i0, 64, %i0
976 membar #Sync
977 FALIGN_D2
978 stda %d48, [%i0]ASI_BLK_P
979 ba,pt %ncc, blkd18
980 add %i0, 64, %i0
981
982 2:
983 FALIGN_D2
984 stda %d48, [%i0]ASI_BLK_P
985 add %i0, 64, %i0
986 membar #Sync
987 FALIGN_D18
988 stda %d48, [%i0]ASI_BLK_P
989 ba,pt %ncc, blkd34
990 add %i0, 64, %i0
991
992 seg2:
993 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
994 FALIGN_D4
995 ldda [%l7]ASI_BLK_P, %d0
996 stda %d48, [%i0]ASI_BLK_P
997 add %l7, 64, %l7
998 subcc %i3, 64, %i3
999 bz,pn %ncc, 0f
1000 add %i0, 64, %i0
1001 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1002 FALIGN_D20
1003 ldda [%l7]ASI_BLK_P, %d16
1004 stda %d48, [%i0]ASI_BLK_P
1005 add %l7, 64, %l7
1006 subcc %i3, 64, %i3
1007 bz,pn %ncc, 1f
1008 add %i0, 64, %i0
1009 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1010 FALIGN_D36
1011 ldda [%l7]ASI_BLK_P, %d32
1012 stda %d48, [%i0]ASI_BLK_P
1013 add %l7, 64, %l7
1014 subcc %i3, 64, %i3
1015 bz,pn %ncc, 2f
1016 add %i0, 64, %i0
1017 ba,a,pt %ncc, seg2
1018
1019 0:
1020 FALIGN_D20
1021 stda %d48, [%i0]ASI_BLK_P
1022 add %i0, 64, %i0
1023 membar #Sync
1024 FALIGN_D36
1025 stda %d48, [%i0]ASI_BLK_P
1026 ba,pt %ncc, blkd4
1027 add %i0, 64, %i0
1028
1029 1:
1030 FALIGN_D36
1031 stda %d48, [%i0]ASI_BLK_P
1032 add %i0, 64, %i0
1033 membar #Sync
1034 FALIGN_D4
1035 stda %d48, [%i0]ASI_BLK_P
1036 ba,pt %ncc, blkd20
1037 add %i0, 64, %i0
1038
1039 2:
1040 FALIGN_D4
1041 stda %d48, [%i0]ASI_BLK_P
1042 add %i0, 64, %i0
1043 membar #Sync
1044 FALIGN_D20
1045 stda %d48, [%i0]ASI_BLK_P
1046 ba,pt %ncc, blkd36
1047 add %i0, 64, %i0
1048
1049 seg3:
1050 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1051 FALIGN_D6
1052 ldda [%l7]ASI_BLK_P, %d0
1053 stda %d48, [%i0]ASI_BLK_P
1054 add %l7, 64, %l7
1055 subcc %i3, 64, %i3
1056 bz,pn %ncc, 0f
1057 add %i0, 64, %i0
1058 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1059 FALIGN_D22
1060 ldda [%l7]ASI_BLK_P, %d16
1061 stda %d48, [%i0]ASI_BLK_P
1062 add %l7, 64, %l7
1063 subcc %i3, 64, %i3
1064 bz,pn %ncc, 1f
1065 add %i0, 64, %i0
1066 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1067 FALIGN_D38
1068 ldda [%l7]ASI_BLK_P, %d32
1069 stda %d48, [%i0]ASI_BLK_P
1070 add %l7, 64, %l7
1071 subcc %i3, 64, %i3
1072 bz,pn %ncc, 2f
1073 add %i0, 64, %i0
1074 ba,a,pt %ncc, seg3
1075
1076 0:
1077 FALIGN_D22
1078 stda %d48, [%i0]ASI_BLK_P
1079 add %i0, 64, %i0
1080 membar #Sync
1081 FALIGN_D38
1082 stda %d48, [%i0]ASI_BLK_P
1083 ba,pt %ncc, blkd6
1084 add %i0, 64, %i0
1085
1086 1:
1087 FALIGN_D38
1088 stda %d48, [%i0]ASI_BLK_P
1089 add %i0, 64, %i0
1090 membar #Sync
1091 FALIGN_D6
1092 stda %d48, [%i0]ASI_BLK_P
1093 ba,pt %ncc, blkd22
1094 add %i0, 64, %i0
1095
1096 2:
1097 FALIGN_D6
1098 stda %d48, [%i0]ASI_BLK_P
1099 add %i0, 64, %i0
1100 membar #Sync
1101 FALIGN_D22
1102 stda %d48, [%i0]ASI_BLK_P
1103 ba,pt %ncc, blkd38
1104 add %i0, 64, %i0
1105
1106 seg4:
1107 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1108 FALIGN_D8
1109 ldda [%l7]ASI_BLK_P, %d0
1110 stda %d48, [%i0]ASI_BLK_P
1111 add %l7, 64, %l7
1112 subcc %i3, 64, %i3
1113 bz,pn %ncc, 0f
1114 add %i0, 64, %i0
1115 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1116 FALIGN_D24
1117 ldda [%l7]ASI_BLK_P, %d16
1118 stda %d48, [%i0]ASI_BLK_P
1119 add %l7, 64, %l7
1120 subcc %i3, 64, %i3
1121 bz,pn %ncc, 1f
1122 add %i0, 64, %i0
1123 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1124 FALIGN_D40
1125 ldda [%l7]ASI_BLK_P, %d32
1126 stda %d48, [%i0]ASI_BLK_P
1127 add %l7, 64, %l7
1128 subcc %i3, 64, %i3
1129 bz,pn %ncc, 2f
1130 add %i0, 64, %i0
1131 ba,a,pt %ncc, seg4
1132
1133 0:
1134 FALIGN_D24
1135 stda %d48, [%i0]ASI_BLK_P
1136 add %i0, 64, %i0
1137 membar #Sync
1138 FALIGN_D40
1139 stda %d48, [%i0]ASI_BLK_P
1140 ba,pt %ncc, blkd8
1141 add %i0, 64, %i0
1142
1143 1:
1144 FALIGN_D40
1145 stda %d48, [%i0]ASI_BLK_P
1146 add %i0, 64, %i0
1147 membar #Sync
1148 FALIGN_D8
1149 stda %d48, [%i0]ASI_BLK_P
1150 ba,pt %ncc, blkd24
1151 add %i0, 64, %i0
1152
1153 2:
1154 FALIGN_D8
1155 stda %d48, [%i0]ASI_BLK_P
1156 add %i0, 64, %i0
1157 membar #Sync
1158 FALIGN_D24
1159 stda %d48, [%i0]ASI_BLK_P
1160 ba,pt %ncc, blkd40
1161 add %i0, 64, %i0
1162
1163 seg5:
1164 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1165 FALIGN_D10
1166 ldda [%l7]ASI_BLK_P, %d0
1167 stda %d48, [%i0]ASI_BLK_P
1168 add %l7, 64, %l7
1169 subcc %i3, 64, %i3
1170 bz,pn %ncc, 0f
1171 add %i0, 64, %i0
1172 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1173 FALIGN_D26
1174 ldda [%l7]ASI_BLK_P, %d16
1175 stda %d48, [%i0]ASI_BLK_P
1176 add %l7, 64, %l7
1177 subcc %i3, 64, %i3
1178 bz,pn %ncc, 1f
1179 add %i0, 64, %i0
1180 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1181 FALIGN_D42
1182 ldda [%l7]ASI_BLK_P, %d32
1183 stda %d48, [%i0]ASI_BLK_P
1184 add %l7, 64, %l7
1185 subcc %i3, 64, %i3
1186 bz,pn %ncc, 2f
1187 add %i0, 64, %i0
1188 ba,a,pt %ncc, seg5
1189
1190 0:
1191 FALIGN_D26
1192 stda %d48, [%i0]ASI_BLK_P
1193 add %i0, 64, %i0
1194 membar #Sync
1195 FALIGN_D42
1196 stda %d48, [%i0]ASI_BLK_P
1197 ba,pt %ncc, blkd10
1198 add %i0, 64, %i0
1199
1200 1:
1201 FALIGN_D42
1202 stda %d48, [%i0]ASI_BLK_P
1203 add %i0, 64, %i0
1204 membar #Sync
1205 FALIGN_D10
1206 stda %d48, [%i0]ASI_BLK_P
1207 ba,pt %ncc, blkd26
1208 add %i0, 64, %i0
1209
1210 2:
1211 FALIGN_D10
1212 stda %d48, [%i0]ASI_BLK_P
1213 add %i0, 64, %i0
1214 membar #Sync
1215 FALIGN_D26
1216 stda %d48, [%i0]ASI_BLK_P
1217 ba,pt %ncc, blkd42
1218 add %i0, 64, %i0
1219
1220 seg6:
1221 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1222 FALIGN_D12
1223 ldda [%l7]ASI_BLK_P, %d0
1224 stda %d48, [%i0]ASI_BLK_P
1225 add %l7, 64, %l7
1226 subcc %i3, 64, %i3
1227 bz,pn %ncc, 0f
1228 add %i0, 64, %i0
1229 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1230 FALIGN_D28
1231 ldda [%l7]ASI_BLK_P, %d16
1232 stda %d48, [%i0]ASI_BLK_P
1233 add %l7, 64, %l7
1234 subcc %i3, 64, %i3
1235 bz,pn %ncc, 1f
1236 add %i0, 64, %i0
1237 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1238 FALIGN_D44
1239 ldda [%l7]ASI_BLK_P, %d32
1240 stda %d48, [%i0]ASI_BLK_P
1241 add %l7, 64, %l7
1242 subcc %i3, 64, %i3
1243 bz,pn %ncc, 2f
1244 add %i0, 64, %i0
1245 ba,a,pt %ncc, seg6
1246
1247 0:
1248 FALIGN_D28
1249 stda %d48, [%i0]ASI_BLK_P
1250 add %i0, 64, %i0
1251 membar #Sync
1252 FALIGN_D44
1253 stda %d48, [%i0]ASI_BLK_P
1254 ba,pt %ncc, blkd12
1255 add %i0, 64, %i0
1256
1257 1:
1258 FALIGN_D44
1259 stda %d48, [%i0]ASI_BLK_P
1260 add %i0, 64, %i0
1261 membar #Sync
1262 FALIGN_D12
1263 stda %d48, [%i0]ASI_BLK_P
1264 ba,pt %ncc, blkd28
1265 add %i0, 64, %i0
1266
1267 2:
1268 FALIGN_D12
1269 stda %d48, [%i0]ASI_BLK_P
1270 add %i0, 64, %i0
1271 membar #Sync
1272 FALIGN_D28
1273 stda %d48, [%i0]ASI_BLK_P
1274 ba,pt %ncc, blkd44
1275 add %i0, 64, %i0
1276
1277 seg7:
1278 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1279 FALIGN_D14
1280 ldda [%l7]ASI_BLK_P, %d0
1281 stda %d48, [%i0]ASI_BLK_P
1282 add %l7, 64, %l7
1283 subcc %i3, 64, %i3
1284 bz,pn %ncc, 0f
1285 add %i0, 64, %i0
1286 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1287 FALIGN_D30
1288 ldda [%l7]ASI_BLK_P, %d16
1289 stda %d48, [%i0]ASI_BLK_P
1290 add %l7, 64, %l7
1291 subcc %i3, 64, %i3
1292 bz,pn %ncc, 1f
1293 add %i0, 64, %i0
1294 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1295 FALIGN_D46
1296 ldda [%l7]ASI_BLK_P, %d32
1297 stda %d48, [%i0]ASI_BLK_P
1298 add %l7, 64, %l7
1299 subcc %i3, 64, %i3
1300 bz,pn %ncc, 2f
1301 add %i0, 64, %i0
1302 ba,a,pt %ncc, seg7
1303
1304 0:
1305 FALIGN_D30
1306 stda %d48, [%i0]ASI_BLK_P
1307 add %i0, 64, %i0
1308 membar #Sync
1309 FALIGN_D46
1310 stda %d48, [%i0]ASI_BLK_P
1311 ba,pt %ncc, blkd14
1312 add %i0, 64, %i0
1313
1314 1:
1315 FALIGN_D46
1316 stda %d48, [%i0]ASI_BLK_P
1317 add %i0, 64, %i0
1318 membar #Sync
1319 FALIGN_D14
1320 stda %d48, [%i0]ASI_BLK_P
1321 ba,pt %ncc, blkd30
1322 add %i0, 64, %i0
1323
1324 2:
1325 FALIGN_D14
1326 stda %d48, [%i0]ASI_BLK_P
1327 add %i0, 64, %i0
1328 membar #Sync
1329 FALIGN_D30
1330 stda %d48, [%i0]ASI_BLK_P
1331 ba,pt %ncc, blkd46
1332 add %i0, 64, %i0
1333
1334
1335 !
1336 ! dribble out the last partial block
1337 !
1338 blkd0:
1339 subcc %i4, 8, %i4
1340 blu,pn %ncc, blkdone
1341 faligndata %d0, %d2, %d48
1342 std %d48, [%i0]
1343 add %i0, 8, %i0
1344 blkd2:
1345 subcc %i4, 8, %i4
1346 blu,pn %ncc, blkdone
1347 faligndata %d2, %d4, %d48
1348 std %d48, [%i0]
1349 add %i0, 8, %i0
1350 blkd4:
1351 subcc %i4, 8, %i4
1352 blu,pn %ncc, blkdone
1353 faligndata %d4, %d6, %d48
1354 std %d48, [%i0]
1355 add %i0, 8, %i0
1356 blkd6:
1357 subcc %i4, 8, %i4
1358 blu,pn %ncc, blkdone
1359 faligndata %d6, %d8, %d48
1360 std %d48, [%i0]
1361 add %i0, 8, %i0
1362 blkd8:
1363 subcc %i4, 8, %i4
1364 blu,pn %ncc, blkdone
1365 faligndata %d8, %d10, %d48
1366 std %d48, [%i0]
1367 add %i0, 8, %i0
1368 blkd10:
1369 subcc %i4, 8, %i4
1370 blu,pn %ncc, blkdone
1371 faligndata %d10, %d12, %d48
1372 std %d48, [%i0]
1373 add %i0, 8, %i0
1374 blkd12:
1375 subcc %i4, 8, %i4
1376 blu,pn %ncc, blkdone
1377 faligndata %d12, %d14, %d48
1378 std %d48, [%i0]
1379 add %i0, 8, %i0
1380 blkd14:
1381 subcc %i4, 8, %i4
1382 blu,pn %ncc, blkdone
1383 fsrc1 %d14, %d0
1384 ba,a,pt %ncc, blkleft
1385
1386 blkd16:
1387 subcc %i4, 8, %i4
1388 blu,pn %ncc, blkdone
1389 faligndata %d16, %d18, %d48
1390 std %d48, [%i0]
1391 add %i0, 8, %i0
1392 blkd18:
1393 subcc %i4, 8, %i4
1394 blu,pn %ncc, blkdone
1395 faligndata %d18, %d20, %d48
1396 std %d48, [%i0]
1397 add %i0, 8, %i0
1398 blkd20:
1399 subcc %i4, 8, %i4
1400 blu,pn %ncc, blkdone
1401 faligndata %d20, %d22, %d48
1402 std %d48, [%i0]
1403 add %i0, 8, %i0
1404 blkd22:
1405 subcc %i4, 8, %i4
1406 blu,pn %ncc, blkdone
1407 faligndata %d22, %d24, %d48
1408 std %d48, [%i0]
1409 add %i0, 8, %i0
1410 blkd24:
1411 subcc %i4, 8, %i4
1412 blu,pn %ncc, blkdone
1413 faligndata %d24, %d26, %d48
1414 std %d48, [%i0]
1415 add %i0, 8, %i0
1416 blkd26:
1417 subcc %i4, 8, %i4
1418 blu,pn %ncc, blkdone
1419 faligndata %d26, %d28, %d48
1420 std %d48, [%i0]
1421 add %i0, 8, %i0
1422 blkd28:
1423 subcc %i4, 8, %i4
1424 blu,pn %ncc, blkdone
1425 faligndata %d28, %d30, %d48
1426 std %d48, [%i0]
1427 add %i0, 8, %i0
1428 blkd30:
1429 subcc %i4, 8, %i4
1430 blu,pn %ncc, blkdone
1431 fsrc1 %d30, %d0
1432 ba,a,pt %ncc, blkleft
1433 blkd32:
1434 subcc %i4, 8, %i4
1435 blu,pn %ncc, blkdone
1436 faligndata %d32, %d34, %d48
1437 std %d48, [%i0]
1438 add %i0, 8, %i0
1439 blkd34:
1440 subcc %i4, 8, %i4
1441 blu,pn %ncc, blkdone
1442 faligndata %d34, %d36, %d48
1443 std %d48, [%i0]
1444 add %i0, 8, %i0
1445 blkd36:
1446 subcc %i4, 8, %i4
1447 blu,pn %ncc, blkdone
1448 faligndata %d36, %d38, %d48
1449 std %d48, [%i0]
1450 add %i0, 8, %i0
1451 blkd38:
1452 subcc %i4, 8, %i4
1453 blu,pn %ncc, blkdone
1454 faligndata %d38, %d40, %d48
1455 std %d48, [%i0]
1456 add %i0, 8, %i0
1457 blkd40:
1458 subcc %i4, 8, %i4
1459 blu,pn %ncc, blkdone
1460 faligndata %d40, %d42, %d48
1461 std %d48, [%i0]
1462 add %i0, 8, %i0
1463 blkd42:
1464 subcc %i4, 8, %i4
1465 blu,pn %ncc, blkdone
1466 faligndata %d42, %d44, %d48
1467 std %d48, [%i0]
1468 add %i0, 8, %i0
1469 blkd44:
1470 subcc %i4, 8, %i4
1471 blu,pn %ncc, blkdone
1472 faligndata %d44, %d46, %d48
1473 std %d48, [%i0]
1474 add %i0, 8, %i0
1475 blkd46:
1476 subcc %i4, 8, %i4
1477 blu,pn %ncc, blkdone
1478 fsrc1 %d46, %d0
1479
1480 blkleft:
1481 1:
1482 ldd [%l7], %d2
1483 add %l7, 8, %l7
1484 subcc %i4, 8, %i4
1485 faligndata %d0, %d2, %d8
1486 std %d8, [%i0]
1487 blu,pn %ncc, blkdone
1488 add %i0, 8, %i0
1489 ldd [%l7], %d0
1490 add %l7, 8, %l7
1491 subcc %i4, 8, %i4
1492 faligndata %d2, %d0, %d8
1493 std %d8, [%i0]
1494 bgeu,pt %ncc, 1b
1495 add %i0, 8, %i0
1496
1497 blkdone:
1498 tst %i2
1499 bz,pt %ncc, .bcb_exit
1500 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0
1501
1502 7: ldub [%i1], %i4
1503 inc %i1
1504 inc %i0
1505 deccc %i2
1506 bgu,pt %ncc, 7b
1507 stb %i4, [%i0 - 1]
1508
1509 .bcb_exit:
1510 membar #StoreLoad|#StoreStore
1511 btst FPUSED_FLAG, %l6
1512 bz %icc, 1f
1513 and %l6, COPY_FLAGS, %l1 ! Store flags in %l1
1514 ! We can't clear the flags from %l6 yet.
1515 ! If there's an error, .copyerr will
1516 ! need them
1517
1518 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
1519 wr %o2, 0, %gsr
1520
1521 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
1522 btst FPRS_FEF, %o3
1523 bz %icc, 4f
1524 nop
1525
1526 ! restore fpregs from stack
1527 membar #Sync
1528 add %fp, STACK_BIAS - 257, %o2
1529 and %o2, -64, %o2
1530 ldda [%o2]ASI_BLK_P, %d0
1531 add %o2, 64, %o2
1532 ldda [%o2]ASI_BLK_P, %d16
1533 add %o2, 64, %o2
1534 ldda [%o2]ASI_BLK_P, %d32
1535 add %o2, 64, %o2
1536 ldda [%o2]ASI_BLK_P, %d48
1537 membar #Sync
1538
1539 ba,pt %ncc, 2f
1540 wr %o3, 0, %fprs ! restore fprs
1541
1542 4:
1543 FZERO ! zero all of the fpregs
1544 wr %o3, 0, %fprs ! restore fprs
1545
1546 2: ldn [THREAD_REG + T_LWP], %o2
1547 tst %o2
1548 bnz,pt %ncc, 1f
1549 nop
1550
1551 ldsb [THREAD_REG + T_PREEMPT], %l0
1552 deccc %l0
1553 bnz,pn %ncc, 1f
1554 stb %l0, [THREAD_REG + T_PREEMPT]
1555
1556 ! Check for a kernel preemption request
1557 ldn [THREAD_REG + T_CPU], %l0
1558 ldub [%l0 + CPU_KPRUNRUN], %l0
1559 tst %l0
1560 bnz,a,pt %ncc, 1f ! Need to call kpreempt?
1561 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag
1562
1563 1:
1564 btst BCOPY_FLAG, %l1
1565 bz,pn %icc, 3f
1566 andncc %l6, COPY_FLAGS, %l6
1567
1568 !
1569 ! Here via bcopy. Check to see if the handler was NULL.
1570 ! If so, just return quietly. Otherwise, reset the
1571 ! handler and go home.
1572 !
1573 bnz,pn %ncc, 3f
1574 nop
1575
1576 !
1577 ! Null handler. Check for kpreempt flag, call if necessary,
1578 ! then return.
1579 !
1580 btst KPREEMPT_FLAG, %l1
1581 bz,pt %icc, 2f
1582 nop
1583 call kpreempt
1584 rdpr %pil, %o0 ! pass %pil
1585 2:
1586 ret
1587 restore %g0, 0, %o0
1588
1589 !
1590 ! Here via kcopy or bcopy with a handler.Reset the
1591 ! fault handler.
1592 !
1593 3:
1594 membar #Sync
1595 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1596
1597 ! call kpreempt if necessary
1598 btst KPREEMPT_FLAG, %l1
1599 bz,pt %icc, 4f
1600 nop
1601 call kpreempt
1602 rdpr %pil, %o0
1603 4:
1604 ret
1605 restore %g0, 0, %o0
1606
1607 .bcb_punt:
1608 !
1609 ! use aligned transfers where possible
1610 !
1611 xor %i0, %i1, %o4 ! xor from and to address
1612 btst 7, %o4 ! if lower three bits zero
1613 bz %icc, .aldoubcp ! can align on double boundary
1614 .empty ! assembler complaints about label
1615
1616 xor %i0, %i1, %o4 ! xor from and to address
1617 btst 3, %o4 ! if lower two bits zero
1618 bz %icc, .alwordcp ! can align on word boundary
1619 btst 3, %i0 ! delay slot, from address unaligned?
1620 !
1621 ! use aligned reads and writes where possible
1622 ! this differs from wordcp in that it copes
1623 ! with odd alignment between source and destnation
1624 ! using word reads and writes with the proper shifts
1625 ! in between to align transfers to and from memory
1626 ! i0 - src address, i1 - dest address, i2 - count
1627 ! i3, i4 - tmps for used generating complete word
1628 ! i5 (word to write)
1629 ! l0 size in bits of upper part of source word (US)
1630 ! l1 size in bits of lower part of source word (LS = 32 - US)
1631 ! l2 size in bits of upper part of destination word (UD)
1632 ! l3 size in bits of lower part of destination word (LD = 32 - UD)
1633 ! l4 number of bytes leftover after aligned transfers complete
1634 ! l5 the number 32
1635 !
1636 mov 32, %l5 ! load an oft-needed constant
1637 bz .align_dst_only
1638 btst 3, %i1 ! is destnation address aligned?
1639 clr %i4 ! clear registers used in either case
1640 bz %icc, .align_src_only
1641 clr %l0
1642 !
1643 ! both source and destination addresses are unaligned
1644 !
1645 1: ! align source
1646 ldub [%i0], %i3 ! read a byte from source address
1647 add %i0, 1, %i0 ! increment source address
1648 or %i4, %i3, %i4 ! or in with previous bytes (if any)
1649 btst 3, %i0 ! is source aligned?
1650 add %l0, 8, %l0 ! increment size of upper source (US)
1651 bnz,a 1b
1652 sll %i4, 8, %i4 ! make room for next byte
1653
1654 sub %l5, %l0, %l1 ! generate shift left count (LS)
1655 sll %i4, %l1, %i4 ! prepare to get rest
1656 ld [%i0], %i3 ! read a word
1657 add %i0, 4, %i0 ! increment source address
1658 srl %i3, %l0, %i5 ! upper src bits into lower dst bits
1659 or %i4, %i5, %i5 ! merge
1660 mov 24, %l3 ! align destination
1661 1:
1662 srl %i5, %l3, %i4 ! prepare to write a single byte
1663 stb %i4, [%i1] ! write a byte
1664 add %i1, 1, %i1 ! increment destination address
1665 sub %i2, 1, %i2 ! decrement count
1666 btst 3, %i1 ! is destination aligned?
1667 bnz,a 1b
1668 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD)
1669 sub %l5, %l3, %l2 ! generate shift left count (UD)
1670 sll %i5, %l2, %i5 ! move leftover into upper bytes
1671 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left
1672 bgu %ncc, .more_needed ! need more to fill than we have
1673 nop
1674
1675 sll %i3, %l1, %i3 ! clear upper used byte(s)
1676 srl %i3, %l1, %i3
1677 ! get the odd bytes between alignments
1678 sub %l0, %l2, %l0 ! regenerate shift count
1679 sub %l5, %l0, %l1 ! generate new shift left count (LS)
1680 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
1681 andn %i2, 3, %i2 ! # of aligned bytes that can be moved
1682 srl %i3, %l0, %i4
1683 or %i5, %i4, %i5
1684 st %i5, [%i1] ! write a word
1685 subcc %i2, 4, %i2 ! decrement count
1686 bz %ncc, .unalign_out
1687 add %i1, 4, %i1 ! increment destination address
1688
1689 b 2f
1690 sll %i3, %l1, %i5 ! get leftover into upper bits
1691 .more_needed:
1692 sll %i3, %l0, %i3 ! save remaining byte(s)
1693 srl %i3, %l0, %i3
1694 sub %l2, %l0, %l1 ! regenerate shift count
1695 sub %l5, %l1, %l0 ! generate new shift left count
1696 sll %i3, %l1, %i4 ! move to fill empty space
1697 b 3f
1698 or %i5, %i4, %i5 ! merge to complete word
1699 !
1700 ! the source address is aligned and destination is not
1701 !
1702 .align_dst_only:
1703 ld [%i0], %i4 ! read a word
1704 add %i0, 4, %i0 ! increment source address
1705 mov 24, %l0 ! initial shift alignment count
1706 1:
1707 srl %i4, %l0, %i3 ! prepare to write a single byte
1708 stb %i3, [%i1] ! write a byte
1709 add %i1, 1, %i1 ! increment destination address
1710 sub %i2, 1, %i2 ! decrement count
1711 btst 3, %i1 ! is destination aligned?
1712 bnz,a 1b
1713 sub %l0, 8, %l0 ! delay slot, decrement shift count
1714 .xfer:
1715 sub %l5, %l0, %l1 ! generate shift left count
1716 sll %i4, %l1, %i5 ! get leftover
1717 3:
1718 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
1719 andn %i2, 3, %i2 ! # of aligned bytes that can be moved
1720 2:
1721 ld [%i0], %i3 ! read a source word
1722 add %i0, 4, %i0 ! increment source address
1723 srl %i3, %l0, %i4 ! upper src bits into lower dst bits
1724 or %i5, %i4, %i5 ! merge with upper dest bits (leftover)
1725 st %i5, [%i1] ! write a destination word
1726 subcc %i2, 4, %i2 ! decrement count
1727 bz %ncc, .unalign_out ! check if done
1728 add %i1, 4, %i1 ! increment destination address
1729 b 2b ! loop
1730 sll %i3, %l1, %i5 ! get leftover
1731 .unalign_out:
1732 tst %l4 ! any bytes leftover?
1733 bz %ncc, .cpdone
1734 .empty ! allow next instruction in delay slot
1735 1:
1736 sub %l0, 8, %l0 ! decrement shift
1737 srl %i3, %l0, %i4 ! upper src byte into lower dst byte
1738 stb %i4, [%i1] ! write a byte
1739 subcc %l4, 1, %l4 ! decrement count
1740 bz %ncc, .cpdone ! done?
1741 add %i1, 1, %i1 ! increment destination
1742 tst %l0 ! any more previously read bytes
1743 bnz %ncc, 1b ! we have leftover bytes
1744 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants
1745 b .dbytecp ! let dbytecp do the rest
1746 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
1747 !
1748 ! the destination address is aligned and the source is not
1749 !
1750 .align_src_only:
1751 ldub [%i0], %i3 ! read a byte from source address
1752 add %i0, 1, %i0 ! increment source address
1753 or %i4, %i3, %i4 ! or in with previous bytes (if any)
1754 btst 3, %i0 ! is source aligned?
1755 add %l0, 8, %l0 ! increment shift count (US)
1756 bnz,a .align_src_only
1757 sll %i4, 8, %i4 ! make room for next byte
1758 b,a .xfer
1759 !
1760 ! if from address unaligned for double-word moves,
1761 ! move bytes till it is, if count is < 56 it could take
1762 ! longer to align the thing than to do the transfer
1763 ! in word size chunks right away
1764 !
1765 .aldoubcp:
1766 cmp %i2, 56 ! if count < 56, use wordcp, it takes
1767 blu,a %ncc, .alwordcp ! longer to align doubles than words
1768 mov 3, %o0 ! mask for word alignment
1769 call .alignit ! copy bytes until aligned
1770 mov 7, %o0 ! mask for double alignment
1771 !
1772 ! source and destination are now double-word aligned
1773 ! i3 has aligned count returned by alignit
1774 !
1775 and %i2, 7, %i2 ! unaligned leftover count
1776 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
1777 5:
1778 ldx [%i0+%i1], %o4 ! read from address
1779 stx %o4, [%i1] ! write at destination address
1780 subcc %i3, 8, %i3 ! dec count
1781 bgu %ncc, 5b
1782 add %i1, 8, %i1 ! delay slot, inc to address
1783 cmp %i2, 4 ! see if we can copy a word
1784 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp
1785 .empty
1786 !
1787 ! for leftover bytes we fall into wordcp, if needed
1788 !
1789 .wordcp:
1790 and %i2, 3, %i2 ! unaligned leftover count
1791 5:
1792 ld [%i0+%i1], %o4 ! read from address
1793 st %o4, [%i1] ! write at destination address
1794 subcc %i3, 4, %i3 ! dec count
1795 bgu %ncc, 5b
1796 add %i1, 4, %i1 ! delay slot, inc to address
1797 b,a .dbytecp
1798
1799 ! we come here to align copies on word boundaries
1800 .alwordcp:
1801 call .alignit ! go word-align it
1802 mov 3, %o0 ! bits that must be zero to be aligned
1803 b .wordcp
1804 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
1805
1806 !
1807 ! byte copy, works with any alignment
1808 !
1809 .bytecp:
1810 b .dbytecp
1811 sub %i0, %i1, %i0 ! i0 gets difference of src and dst
1812
1813 !
1814 ! differenced byte copy, works with any alignment
1815 ! assumes dest in %i1 and (source - dest) in %i0
1816 !
1817 1:
1818 stb %o4, [%i1] ! write to address
1819 inc %i1 ! inc to address
1820 .dbytecp:
1821 deccc %i2 ! dec count
1822 bgeu,a %ncc, 1b ! loop till done
1823 ldub [%i0+%i1], %o4 ! read from address
1824 !
1825 ! FPUSED_FLAG will not have been set in any path leading to
1826 ! this point. No need to deal with it.
1827 !
1828 .cpdone:
1829 btst BCOPY_FLAG, %l6
1830 bz,pn %icc, 2f
1831 andncc %l6, BCOPY_FLAG, %l6
1832 !
1833 ! Here via bcopy. Check to see if the handler was NULL.
1834 ! If so, just return quietly. Otherwise, reset the
1835 ! handler and go home.
1836 !
1837 bnz,pn %ncc, 2f
1838 nop
1839 !
1840 ! Null handler.
1841 !
1842 ret
1843 restore %g0, 0, %o0
1844 !
1845 ! Here via kcopy or bcopy with a handler.Reset the
1846 ! fault handler.
1847 !
1848 2:
1849 membar #Sync
1850 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1851 ret
1852 restore %g0, 0, %o0 ! return (0)
1853
1854 /*
1855 * Common code used to align transfers on word and doubleword
1856 * boudaries. Aligns source and destination and returns a count
1857 * of aligned bytes to transfer in %i3
1858 */
1859 1:
1860 inc %i0 ! inc from
1861 stb %o4, [%i1] ! write a byte
1862 inc %i1 ! inc to
1863 dec %i2 ! dec count
1864 .alignit:
1865 btst %o0, %i0 ! %o0 is bit mask to check for alignment
1866 bnz,a 1b
1867 ldub [%i0], %o4 ! read next byte
1868
1869 retl
1870 andn %i2, %o0, %i3 ! return size of aligned bytes
1871 SET_SIZE(bcopy)
1872
1873 #endif /* lint */
1874
1875 /*
1876 * Block copy with possibly overlapped operands.
1877 */
1878
1879 #if defined(lint)
1880
1881 /*ARGSUSED*/
1882 void
1883 ovbcopy(const void *from, void *to, size_t count)
1884 {}
1885
1886 #else /* lint */
1887
1888 ENTRY(ovbcopy)
1889 tst %o2 ! check count
1890 bgu,a %ncc, 1f ! nothing to do or bad arguments
1891 subcc %o0, %o1, %o3 ! difference of from and to address
1892
1893 retl ! return
1894 nop
1895 1:
1896 bneg,a %ncc, 2f
1897 neg %o3 ! if < 0, make it positive
1898 2: cmp %o2, %o3 ! cmp size and abs(from - to)
1899 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy,
1900 .empty ! no overlap
1901 cmp %o0, %o1 ! compare from and to addresses
1902 blu %ncc, .ov_bkwd ! if from < to, copy backwards
1903 nop
1904 !
1905 ! Copy forwards.
1906 !
1907 .ov_fwd:
1908 ldub [%o0], %o3 ! read from address
1909 inc %o0 ! inc from address
1910 stb %o3, [%o1] ! write to address
1911 deccc %o2 ! dec count
1912 bgu %ncc, .ov_fwd ! loop till done
1913 inc %o1 ! inc to address
1914
1915 retl ! return
1916 nop
1917 !
1918 ! Copy backwards.
1919 !
1920 .ov_bkwd:
1921 deccc %o2 ! dec count
1922 ldub [%o0 + %o2], %o3 ! get byte at end of src
1923 bgu %ncc, .ov_bkwd ! loop till done
1924 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst
1925
1926 retl ! return
1927 nop
1928 SET_SIZE(ovbcopy)
1929
1930 #endif /* lint */
1931
1932 /*
1933 * hwblkpagecopy()
1934 *
1935 * Copies exactly one page. This routine assumes the caller (ppcopy)
1936 * has already disabled kernel preemption and has checked
1937 * use_hw_bcopy.
1938 */
1939 #ifdef lint
1940 /*ARGSUSED*/
1941 void
1942 hwblkpagecopy(const void *src, void *dst)
1943 { }
1944 #else /* lint */
1945 ENTRY(hwblkpagecopy)
1946 ! get another window w/space for three aligned blocks of saved fpregs
1947 save %sp, -SA(MINFRAME + 4*64), %sp
1948
1949 ! %i0 - source address (arg)
1950 ! %i1 - destination address (arg)
1951 ! %i2 - length of region (not arg)
1952 ! %l0 - saved fprs
1953 ! %l1 - pointer to saved fpregs
1954
1955 rd %fprs, %l0 ! check for unused fp
1956 btst FPRS_FEF, %l0
1957 bz 1f
1958 membar #Sync
1959
1960 ! save in-use fpregs on stack
1961 add %fp, STACK_BIAS - 193, %l1
1962 and %l1, -64, %l1
1963 stda %d0, [%l1]ASI_BLK_P
1964 add %l1, 64, %l3
1965 stda %d16, [%l3]ASI_BLK_P
1966 add %l3, 64, %l3
1967 stda %d32, [%l3]ASI_BLK_P
1968 membar #Sync
1969
1970 1: wr %g0, FPRS_FEF, %fprs
1971 ldda [%i0]ASI_BLK_P, %d0
1972 add %i0, 64, %i0
1973 set PAGESIZE - 64, %i2
1974
1975 2: ldda [%i0]ASI_BLK_P, %d16
1976 fsrc1 %d0, %d32
1977 fsrc1 %d2, %d34
1978 fsrc1 %d4, %d36
1979 fsrc1 %d6, %d38
1980 fsrc1 %d8, %d40
1981 fsrc1 %d10, %d42
1982 fsrc1 %d12, %d44
1983 fsrc1 %d14, %d46
1984 stda %d32, [%i1]ASI_BLK_P
1985 add %i0, 64, %i0
1986 subcc %i2, 64, %i2
1987 bz,pn %ncc, 3f
1988 add %i1, 64, %i1
1989 ldda [%i0]ASI_BLK_P, %d0
1990 fsrc1 %d16, %d32
1991 fsrc1 %d18, %d34
1992 fsrc1 %d20, %d36
1993 fsrc1 %d22, %d38
1994 fsrc1 %d24, %d40
1995 fsrc1 %d26, %d42
1996 fsrc1 %d28, %d44
1997 fsrc1 %d30, %d46
1998 stda %d32, [%i1]ASI_BLK_P
1999 add %i0, 64, %i0
2000 sub %i2, 64, %i2
2001 ba,pt %ncc, 2b
2002 add %i1, 64, %i1
2003
2004 3: membar #Sync
2005 btst FPRS_FEF, %l0
2006 bz 4f
2007 stda %d16, [%i1]ASI_BLK_P
2008
2009 ! restore fpregs from stack
2010 membar #Sync
2011 ldda [%l1]ASI_BLK_P, %d0
2012 add %l1, 64, %l3
2013 ldda [%l3]ASI_BLK_P, %d16
2014 add %l3, 64, %l3
2015 ldda [%l3]ASI_BLK_P, %d32
2016
2017 4: wr %l0, 0, %fprs ! restore fprs
2018 membar #Sync
2019 ret
2020 restore %g0, 0, %o0
2021 SET_SIZE(hwblkpagecopy)
2022 #endif /* lint */
2023
2024
2025 /*
2026 * Transfer data to and from user space -
2027 * Note that these routines can cause faults
2028 * It is assumed that the kernel has nothing at
2029 * less than KERNELBASE in the virtual address space.
2030 *
2031 * Note that copyin(9F) and copyout(9F) are part of the
2032 * DDI/DKI which specifies that they return '-1' on "errors."
2033 *
2034 * Sigh.
2035 *
2036 * So there's two extremely similar routines - xcopyin() and xcopyout()
2037 * which return the errno that we've faithfully computed. This
2038 * allows other callers (e.g. uiomove(9F)) to work correctly.
2039 * Given that these are used pretty heavily, we expand the calling
2040 * sequences inline for all flavours (rather than making wrappers).
2041 *
2042 * There are also stub routines for xcopyout_little and xcopyin_little,
2043 * which currently are intended to handle requests of <= 16 bytes from
2044 * do_unaligned. Future enhancement to make them handle 8k pages efficiently
2045 * is left as an exercise...
2046 */
2047
2048 /*
2049 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
2050 *
2051 * General theory of operation:
2052 *
2053 * The only difference between default_copy{in,out} and
2054 * default_xcopy{in,out} is in the error handling routine they invoke
2055 * when a memory access error is seen. default_xcopyOP returns the errno
2056 * while default_copyOP returns -1 (see above). copy{in,out}_noerr set
2057 * a special flag (by oring the value 2 into the fault handler address)
2058 * if they are called with a fault handler already in place. That flag
2059 * causes the default handlers to trampoline to the previous handler
2060 * upon an error.
2061 *
2062 * None of the copyops routines grab a window until it's decided that
2063 * we need to do a HW block copy operation. This saves a window
2064 * spill/fill when we're called during socket ops. The typical IO
2065 * path won't cause spill/fill traps.
2066 *
2067 * This code uses a set of 4 limits for the maximum size that will
2068 * be copied given a particular input/output address alignment.
2069 * the default limits are:
2070 *
2071 * single byte aligned - 900 (hw_copy_limit_1)
2072 * two byte aligned - 1800 (hw_copy_limit_2)
2073 * four byte aligned - 3600 (hw_copy_limit_4)
2074 * eight byte aligned - 7200 (hw_copy_limit_8)
2075 *
2076 * If the value for a particular limit is zero, the copy will be done
2077 * via the copy loops rather than VIS.
2078 *
2079 * Flow:
2080 *
2081 * If count == zero return zero.
2082 *
2083 * Store the previous lo_fault handler into %g6.
2084 * Place our secondary lofault handler into %g5.
2085 * Place the address of our nowindow fault handler into %o3.
2086 * Place the address of the windowed fault handler into %o4.
2087 * --> We'll use this handler if we end up grabbing a window
2088 * --> before we use VIS instructions.
2089 *
2090 * If count is less than or equal to SMALL_LIMIT (7) we
2091 * always do a byte for byte copy.
2092 *
2093 * If count is > SMALL_LIMIT, we check the alignment of the input
2094 * and output pointers. Based on the alignment we check count
2095 * against a soft limit of VIS_COPY_THRESHOLD (900 on spitfire). If
2096 * we're larger than VIS_COPY_THRESHOLD, we check against a limit based
2097 * on detected alignment. If we exceed the alignment value we copy
2098 * via VIS instructions.
2099 *
2100 * If we don't exceed one of the limits, we store -count in %o3,
2101 * we store the number of chunks (8, 4, 2 or 1 byte) operated
2102 * on in our basic copy loop in %o2. Following this we branch
2103 * to the appropriate copy loop and copy that many chunks.
2104 * Since we've been adding the chunk size to %o3 each time through
2105 * as well as decrementing %o2, we can tell if any data is
2106 * is left to be copied by examining %o3. If that is zero, we're
2107 * done and can go home. If not, we figure out what the largest
2108 * chunk size left to be copied is and branch to that copy loop
2109 * unless there's only one byte left. We load that as we're
2110 * branching to code that stores it just before we return.
2111 *
2112 * There is one potential situation in which we start to do a VIS
2113 * copy but decide to punt and return to the copy loops. There is
2114 * (in the default configuration) a window of 256 bytes between
2115 * the single byte aligned copy limit and what VIS treats as its
2116 * minimum if floating point is in use in the calling app. We need
2117 * to be prepared to handle this. See the .small_copyOP label for
2118 * details.
2119 *
2120 * Fault handlers are invoked if we reference memory that has no
2121 * current mapping. All forms share the same copyio_fault handler.
2122 * This routine handles fixing up the stack and general housecleaning.
2123 * Each copy operation has a simple fault handler that is then called
2124 * to do the work specific to the invidual operation. The handlers
2125 * for default_copyOP and copyOP_noerr are found at the end of
2126 * default_copyout. The handlers for default_xcopyOP are found at the
2127 * end of xdefault_copyin.
2128 */
2129
2130 /*
2131 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
2132 */
2133
2134 #if defined(lint)
2135
2136 /*ARGSUSED*/
2137 int
2138 copyout(const void *kaddr, void *uaddr, size_t count)
2139 { return (0); }
2140
2141 #else /* lint */
2142
2143 /*
2144 * We save the arguments in the following registers in case of a fault:
2145 * kaddr - %g2
2146 * uaddr - %g3
2147 * count - %g4
2148 */
2149 #define SAVE_SRC %g2
2150 #define SAVE_DST %g3
2151 #define SAVE_COUNT %g4
2152
2153 #define REAL_LOFAULT %g5
2154 #define SAVED_LOFAULT %g6
2155
2156 /*
2157 * Generic copyio fault handler. This is the first line of defense when a
2158 * fault occurs in (x)copyin/(x)copyout. In order for this to function
2159 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
2160 * This allows us to share common code for all the flavors of the copy
2161 * operations, including the _noerr versions.
2162 *
2163 * Note that this function will restore the original input parameters before
2164 * calling REAL_LOFAULT. So the real handler can vector to the appropriate
2165 * member of the t_copyop structure, if needed.
2166 */
2167 ENTRY(copyio_fault)
2168 btst FPUSED_FLAG, SAVED_LOFAULT
2169 bz 1f
2170 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
2171
2172 membar #Sync
2173
2174 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
2175 wr %o2, 0, %gsr ! restore gsr
2176
2177 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
2178 btst FPRS_FEF, %o3
2179 bz 4f
2180 nop
2181
2182 ! restore fpregs from stack
2183 membar #Sync
2184 add %fp, STACK_BIAS - 257, %o2
2185 and %o2, -64, %o2
2186 ldda [%o2]ASI_BLK_P, %d0
2187 add %o2, 64, %o2
2188 ldda [%o2]ASI_BLK_P, %d16
2189 add %o2, 64, %o2
2190 ldda [%o2]ASI_BLK_P, %d32
2191 add %o2, 64, %o2
2192 ldda [%o2]ASI_BLK_P, %d48
2193 membar #Sync
2194
2195 ba,pt %ncc, 1f
2196 wr %o3, 0, %fprs ! restore fprs
2197
2198 4:
2199 FZERO ! zero all of the fpregs
2200 wr %o3, 0, %fprs ! restore fprs
2201
2202 1:
2203
2204 restore
2205
2206 mov SAVE_SRC, %o0
2207 mov SAVE_DST, %o1
2208 jmp REAL_LOFAULT
2209 mov SAVE_COUNT, %o2
2210 SET_SIZE(copyio_fault)
2211
2212 ENTRY(copyio_fault_nowindow)
2213 membar #Sync
2214 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
2215
2216 mov SAVE_SRC, %o0
2217 mov SAVE_DST, %o1
2218 jmp REAL_LOFAULT
2219 mov SAVE_COUNT, %o2
2220 SET_SIZE(copyio_fault_nowindow)
2221
2222 ENTRY(copyout)
2223 sethi %hi(.copyout_err), REAL_LOFAULT
2224 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
2225
2226 .do_copyout:
2227 !
2228 ! Check the length and bail if zero.
2229 !
2230 tst %o2
2231 bnz,pt %ncc, 1f
2232 nop
2233 retl
2234 clr %o0
2235 1:
2236 sethi %hi(copyio_fault), %o4
2237 or %o4, %lo(copyio_fault), %o4
2238 sethi %hi(copyio_fault_nowindow), %o3
2239 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
2240 or %o3, %lo(copyio_fault_nowindow), %o3
2241 membar #Sync
2242 stn %o3, [THREAD_REG + T_LOFAULT]
2243
2244 mov %o0, SAVE_SRC
2245 mov %o1, SAVE_DST
2246 mov %o2, SAVE_COUNT
2247
2248 !
2249 ! Check to see if we're more than SMALL_LIMIT (7 bytes).
2250 ! Run in leaf mode, using the %o regs as our input regs.
2251 !
2252 subcc %o2, SMALL_LIMIT, %o3
2253 bgu,a,pt %ncc, .dco_ns
2254 or %o0, %o1, %o3
2255 !
2256 ! What was previously ".small_copyout"
2257 ! Do full differenced copy.
2258 !
2259 .dcobcp:
2260 sub %g0, %o2, %o3 ! negate count
2261 add %o0, %o2, %o0 ! make %o0 point at the end
2262 add %o1, %o2, %o1 ! make %o1 point at the end
2263 ba,pt %ncc, .dcocl
2264 ldub [%o0 + %o3], %o4 ! load first byte
2265 !
2266 ! %o0 and %o2 point at the end and remain pointing at the end
2267 ! of their buffers. We pull things out by adding %o3 (which is
2268 ! the negation of the length) to the buffer end which gives us
2269 ! the curent location in the buffers. By incrementing %o3 we walk
2270 ! through both buffers without having to bump each buffer's
2271 ! pointer. A very fast 4 instruction loop.
2272 !
2273 .align 16
2274 .dcocl:
2275 stba %o4, [%o1 + %o3]ASI_USER
2276 inccc %o3
2277 bl,a,pt %ncc, .dcocl
2278 ldub [%o0 + %o3], %o4
2279 !
2280 ! We're done. Go home.
2281 !
2282 membar #Sync
2283 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
2284 retl
2285 clr %o0
2286 !
2287 ! Try aligned copies from here.
2288 !
2289 .dco_ns:
2290 ! %o0 = kernel addr (to be copied from)
2291 ! %o1 = user addr (to be copied to)
2292 ! %o2 = length
2293 ! %o3 = %o1 | %o2 (used for alignment checking)
2294 ! %o4 is alternate lo_fault
2295 ! %o5 is original lo_fault
2296 !
2297 ! See if we're single byte aligned. If we are, check the
2298 ! limit for single byte copies. If we're smaller or equal,
2299 ! bounce to the byte for byte copy loop. Otherwise do it in
2300 ! HW (if enabled).
2301 !
2302 btst 1, %o3
2303 bz,pt %icc, .dcoh8
2304 btst 7, %o3
2305 !
2306 ! Single byte aligned. Do we do it via HW or via
2307 ! byte for byte? Do a quick no memory reference
2308 ! check to pick up small copies.
2309 !
2310 subcc %o2, VIS_COPY_THRESHOLD, %o3
2311 bleu,pt %ncc, .dcobcp
2312 sethi %hi(hw_copy_limit_1), %o3
2313 !
2314 ! Big enough that we need to check the HW limit for
2315 ! this size copy.
2316 !
2317 ld [%o3 + %lo(hw_copy_limit_1)], %o3
2318 !
2319 ! Is HW copy on? If not, do everything byte for byte.
2320 !
2321 tst %o3
2322 bz,pn %icc, .dcobcp
2323 subcc %o3, %o2, %o3
2324 !
2325 ! If we're less than or equal to the single byte copy limit,
2326 ! bop to the copy loop.
2327 !
2328 bge,pt %ncc, .dcobcp
2329 nop
2330 !
2331 ! We're big enough and copy is on. Do it with HW.
2332 !
2333 ba,pt %ncc, .big_copyout
2334 nop
2335 .dcoh8:
2336 !
2337 ! 8 byte aligned?
2338 !
2339 bnz,a %ncc, .dcoh4
2340 btst 3, %o3
2341 !
2342 ! See if we're in the "small range".
2343 ! If so, go off and do the copy.
2344 ! If not, load the hard limit. %o3 is
2345 ! available for reuse.
2346 !
2347 subcc %o2, VIS_COPY_THRESHOLD, %o3
2348 bleu,pt %ncc, .dcos8
2349 sethi %hi(hw_copy_limit_8), %o3
2350 ld [%o3 + %lo(hw_copy_limit_8)], %o3
2351 !
2352 ! If it's zero, there's no HW bcopy.
2353 ! Bop off to the aligned copy.
2354 !
2355 tst %o3
2356 bz,pn %icc, .dcos8
2357 subcc %o3, %o2, %o3
2358 !
2359 ! We're negative if our size is larger than hw_copy_limit_8.
2360 !
2361 bge,pt %ncc, .dcos8
2362 nop
2363 !
2364 ! HW assist is on and we're large enough. Do it.
2365 !
2366 ba,pt %ncc, .big_copyout
2367 nop
2368 .dcos8:
2369 !
2370 ! Housekeeping for copy loops. Uses same idea as in the byte for
2371 ! byte copy loop above.
2372 !
2373 add %o0, %o2, %o0
2374 add %o1, %o2, %o1
2375 sub %g0, %o2, %o3
2376 ba,pt %ncc, .dodebc
2377 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy
2378 !
2379 ! 4 byte aligned?
2380 !
2381 .dcoh4:
2382 bnz,pn %ncc, .dcoh2
2383 !
2384 ! See if we're in the "small range".
2385 ! If so, go off an do the copy.
2386 ! If not, load the hard limit. %o3 is
2387 ! available for reuse.
2388 !
2389 subcc %o2, VIS_COPY_THRESHOLD, %o3
2390 bleu,pt %ncc, .dcos4
2391 sethi %hi(hw_copy_limit_4), %o3
2392 ld [%o3 + %lo(hw_copy_limit_4)], %o3
2393 !
2394 ! If it's zero, there's no HW bcopy.
2395 ! Bop off to the aligned copy.
2396 !
2397 tst %o3
2398 bz,pn %icc, .dcos4
2399 subcc %o3, %o2, %o3
2400 !
2401 ! We're negative if our size is larger than hw_copy_limit_4.
2402 !
2403 bge,pt %ncc, .dcos4
2404 nop
2405 !
2406 ! HW assist is on and we're large enough. Do it.
2407 !
2408 ba,pt %ncc, .big_copyout
2409 nop
2410 .dcos4:
2411 add %o0, %o2, %o0
2412 add %o1, %o2, %o1
2413 sub %g0, %o2, %o3
2414 ba,pt %ncc, .dodfbc
2415 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy
2416 !
2417 ! We must be 2 byte aligned. Off we go.
2418 ! The check for small copies was done in the
2419 ! delay at .dcoh4
2420 !
2421 .dcoh2:
2422 ble %ncc, .dcos2
2423 sethi %hi(hw_copy_limit_2), %o3
2424 ld [%o3 + %lo(hw_copy_limit_2)], %o3
2425 tst %o3
2426 bz,pn %icc, .dcos2
2427 subcc %o3, %o2, %o3
2428 bge,pt %ncc, .dcos2
2429 nop
2430 !
2431 ! HW is on and we're big enough. Do it.
2432 !
2433 ba,pt %ncc, .big_copyout
2434 nop
2435 .dcos2:
2436 add %o0, %o2, %o0
2437 add %o1, %o2, %o1
2438 sub %g0, %o2, %o3
2439 ba,pt %ncc, .dodtbc
2440 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy
2441 .small_copyout:
2442 !
2443 ! Why are we doing this AGAIN? There are certain conditions in
2444 ! big_copyout that will cause us to forego the HW assisted copies
2445 ! and bounce back to a non-HW assisted copy. This dispatches those
2446 ! copies. Note that we branch around this in the main line code.
2447 !
2448 ! We make no check for limits or HW enablement here. We've
2449 ! already been told that we're a poster child so just go off
2450 ! and do it.
2451 !
2452 or %o0, %o1, %o3
2453 btst 1, %o3
2454 bnz %icc, .dcobcp ! Most likely
2455 btst 7, %o3
2456 bz %icc, .dcos8
2457 btst 3, %o3
2458 bz %icc, .dcos4
2459 nop
2460 ba,pt %ncc, .dcos2
2461 nop
2462 .align 32
2463 .dodebc:
2464 ldx [%o0 + %o3], %o4
2465 deccc %o2
2466 stxa %o4, [%o1 + %o3]ASI_USER
2467 bg,pt %ncc, .dodebc
2468 addcc %o3, 8, %o3
2469 !
2470 ! End of copy loop. Check to see if we're done. Most
2471 ! eight byte aligned copies end here.
2472 !
2473 bz,pt %ncc, .dcofh
2474 nop
2475 !
2476 ! Something is left - do it byte for byte.
2477 !
2478 ba,pt %ncc, .dcocl
2479 ldub [%o0 + %o3], %o4 ! load next byte
2480 !
2481 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
2482 !
2483 .align 32
2484 .dodfbc:
2485 lduw [%o0 + %o3], %o4
2486 deccc %o2
2487 sta %o4, [%o1 + %o3]ASI_USER
2488 bg,pt %ncc, .dodfbc
2489 addcc %o3, 4, %o3
2490 !
2491 ! End of copy loop. Check to see if we're done. Most
2492 ! four byte aligned copies end here.
2493 !
2494 bz,pt %ncc, .dcofh
2495 nop
2496 !
2497 ! Something is left. Do it byte for byte.
2498 !
2499 ba,pt %ncc, .dcocl
2500 ldub [%o0 + %o3], %o4 ! load next byte
2501 !
2502 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
2503 ! copy.
2504 !
2505 .align 32
2506 .dodtbc:
2507 lduh [%o0 + %o3], %o4
2508 deccc %o2
2509 stha %o4, [%o1 + %o3]ASI_USER
2510 bg,pt %ncc, .dodtbc
2511 addcc %o3, 2, %o3
2512 !
2513 ! End of copy loop. Anything left?
2514 !
2515 bz,pt %ncc, .dcofh
2516 nop
2517 !
2518 ! Deal with the last byte
2519 !
2520 ldub [%o0 + %o3], %o4
2521 stba %o4, [%o1 + %o3]ASI_USER
2522 .dcofh:
2523 membar #Sync
2524 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
2525 retl
2526 clr %o0
2527
2528 .big_copyout:
2529 !
2530 ! Are we using the FP registers?
2531 !
2532 rd %fprs, %o3 ! check for unused fp
2533 btst FPRS_FEF, %o3
2534 bnz %icc, .copyout_fpregs_inuse
2535 nop
2536 !
2537 ! We're going to go off and do a block copy.
2538 ! Switch fault hendlers and grab a window. We
2539 ! don't do a membar #Sync since we've done only
2540 ! kernel data to this point.
2541 !
2542 stn %o4, [THREAD_REG + T_LOFAULT]
2543 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
2544 !
2545 ! %o3 is now %i3. Save original %fprs.
2546 !
2547 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
2548 ba,pt %ncc, .do_block_copyout ! Not in use. Go off and do it.
2549 wr %g0, FPRS_FEF, %fprs ! clear %fprs
2550 !
2551 .copyout_fpregs_inuse:
2552 !
2553 ! We're here if the FP regs are in use. Need to see if the request
2554 ! exceeds our suddenly larger minimum.
2555 !
2556 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger
2557 bl %ncc, .small_copyout
2558 nop
2559 !
2560 ! We're going to go off and do a block copy.
2561 ! Change to the heavy duty fault handler and grab a window first.
2562 !
2563 stn %o4, [THREAD_REG + T_LOFAULT]
2564 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
2565 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
2566 !
2567 ! save in-use fpregs on stack
2568 !
2569 wr %g0, FPRS_FEF, %fprs
2570 membar #Sync
2571 add %fp, STACK_BIAS - 257, %o2
2572 and %o2, -64, %o2
2573 stda %d0, [%o2]ASI_BLK_P
2574 add %o2, 64, %o2
2575 stda %d16, [%o2]ASI_BLK_P
2576 add %o2, 64, %o2
2577 stda %d32, [%o2]ASI_BLK_P
2578 add %o2, 64, %o2
2579 stda %d48, [%o2]ASI_BLK_P
2580 membar #Sync
2581
2582 .do_block_copyout:
2583 membar #StoreStore|#StoreLoad|#LoadStore
2584
2585 rd %gsr, %o2
2586 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
2587
2588 ! Set the lower bit in the saved t_lofault to indicate
2589 ! that we need to clear the %fprs register on the way
2590 ! out
2591 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
2592
2593 ! Swap src/dst since the code below is memcpy code
2594 ! and memcpy/bcopy have different calling sequences
2595 mov %i1, %i5
2596 mov %i0, %i1
2597 mov %i5, %i0
2598
2599 !!! This code is nearly identical to the version in the sun4u
2600 !!! libc_psr. Most bugfixes made to that file should be
2601 !!! merged into this routine.
2602
2603 andcc %i0, 7, %o3
2604 bz %ncc, copyout_blkcpy
2605 sub %o3, 8, %o3
2606 neg %o3
2607 sub %i2, %o3, %i2
2608
2609 ! Align Destination on double-word boundary
2610
2611 2: ldub [%i1], %o4
2612 inc %i1
2613 stba %o4, [%i0]ASI_USER
2614 deccc %o3
2615 bgu %ncc, 2b
2616 inc %i0
2617 copyout_blkcpy:
2618 andcc %i0, 63, %i3
2619 bz,pn %ncc, copyout_blalign ! now block aligned
2620 sub %i3, 64, %i3
2621 neg %i3 ! bytes till block aligned
2622 sub %i2, %i3, %i2 ! update %i2 with new count
2623
2624 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
2625 ! double word copies.
2626
2627 alignaddr %i1, %g0, %g1
2628 ldd [%g1], %d0
2629 add %g1, 8, %g1
2630 6:
2631 ldd [%g1], %d2
2632 add %g1, 8, %g1
2633 subcc %i3, 8, %i3
2634 faligndata %d0, %d2, %d8
2635 stda %d8, [%i0]ASI_USER
2636 add %i1, 8, %i1
2637 bz,pn %ncc, copyout_blalign
2638 add %i0, 8, %i0
2639 ldd [%g1], %d0
2640 add %g1, 8, %g1
2641 subcc %i3, 8, %i3
2642 faligndata %d2, %d0, %d8
2643 stda %d8, [%i0]ASI_USER
2644 add %i1, 8, %i1
2645 bgu,pn %ncc, 6b
2646 add %i0, 8, %i0
2647
2648 copyout_blalign:
2649 membar #StoreLoad
2650 ! %i2 = total length
2651 ! %i3 = blocks (length - 64) / 64
2652 ! %i4 = doubles remaining (length - blocks)
2653 sub %i2, 64, %i3
2654 andn %i3, 63, %i3
2655 sub %i2, %i3, %i4
2656 andn %i4, 7, %i4
2657 sub %i4, 16, %i4
2658 sub %i2, %i4, %i2
2659 sub %i2, %i3, %i2
2660
2661 andn %i1, 0x3f, %l7 ! blk aligned address
2662 alignaddr %i1, %g0, %g0 ! gen %gsr
2663
2664 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5
2665 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3
2666 add %i1, %i4, %i1
2667 add %i1, %i3, %i1
2668
2669 ldda [%l7]ASI_BLK_P, %d0
2670 add %l7, 64, %l7
2671 ldda [%l7]ASI_BLK_P, %d16
2672 add %l7, 64, %l7
2673 ldda [%l7]ASI_BLK_P, %d32
2674 add %l7, 64, %l7
2675 sub %i3, 128, %i3
2676
2677 ! switch statement to get us to the right 8 byte blk within a
2678 ! 64 byte block
2679
2680 cmp %i5, 4
2681 bgeu,a copyout_hlf
2682 cmp %i5, 6
2683 cmp %i5, 2
2684 bgeu,a copyout_sqtr
2685 nop
2686 cmp %i5, 1
2687 be,a copyout_seg1
2688 nop
2689 ba,pt %ncc, copyout_seg0
2690 nop
2691 copyout_sqtr:
2692 be,a copyout_seg2
2693 nop
2694 ba,pt %ncc, copyout_seg3
2695 nop
2696
2697 copyout_hlf:
2698 bgeu,a copyout_fqtr
2699 nop
2700 cmp %i5, 5
2701 be,a copyout_seg5
2702 nop
2703 ba,pt %ncc, copyout_seg4
2704 nop
2705 copyout_fqtr:
2706 be,a copyout_seg6
2707 nop
2708 ba,pt %ncc, copyout_seg7
2709 nop
2710
2711 copyout_seg0:
2712 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2713 FALIGN_D0
2714 ldda [%l7]ASI_BLK_P, %d0
2715 stda %d48, [%i0]ASI_BLK_AIUS
2716 add %l7, 64, %l7
2717 subcc %i3, 64, %i3
2718 bz,pn %ncc, 0f
2719 add %i0, 64, %i0
2720 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2721 FALIGN_D16
2722 ldda [%l7]ASI_BLK_P, %d16
2723 stda %d48, [%i0]ASI_BLK_AIUS
2724 add %l7, 64, %l7
2725 subcc %i3, 64, %i3
2726 bz,pn %ncc, 1f
2727 add %i0, 64, %i0
2728 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2729 FALIGN_D32
2730 ldda [%l7]ASI_BLK_P, %d32
2731 stda %d48, [%i0]ASI_BLK_AIUS
2732 add %l7, 64, %l7
2733 subcc %i3, 64, %i3
2734 bz,pn %ncc, 2f
2735 add %i0, 64, %i0
2736 ba,a,pt %ncc, copyout_seg0
2737
2738 0:
2739 FALIGN_D16
2740 stda %d48, [%i0]ASI_BLK_AIUS
2741 add %i0, 64, %i0
2742 membar #Sync
2743 FALIGN_D32
2744 stda %d48, [%i0]ASI_BLK_AIUS
2745 ba,pt %ncc, copyout_blkd0
2746 add %i0, 64, %i0
2747
2748 1:
2749 FALIGN_D32
2750 stda %d48, [%i0]ASI_BLK_AIUS
2751 add %i0, 64, %i0
2752 membar #Sync
2753 FALIGN_D0
2754 stda %d48, [%i0]ASI_BLK_AIUS
2755 ba,pt %ncc, copyout_blkd16
2756 add %i0, 64, %i0
2757
2758 2:
2759 FALIGN_D0
2760 stda %d48, [%i0]ASI_BLK_AIUS
2761 add %i0, 64, %i0
2762 membar #Sync
2763 FALIGN_D16
2764 stda %d48, [%i0]ASI_BLK_AIUS
2765 ba,pt %ncc, copyout_blkd32
2766 add %i0, 64, %i0
2767
2768 copyout_seg1:
2769 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2770 FALIGN_D2
2771 ldda [%l7]ASI_BLK_P, %d0
2772 stda %d48, [%i0]ASI_BLK_AIUS
2773 add %l7, 64, %l7
2774 subcc %i3, 64, %i3
2775 bz,pn %ncc, 0f
2776 add %i0, 64, %i0
2777 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2778 FALIGN_D18
2779 ldda [%l7]ASI_BLK_P, %d16
2780 stda %d48, [%i0]ASI_BLK_AIUS
2781 add %l7, 64, %l7
2782 subcc %i3, 64, %i3
2783 bz,pn %ncc, 1f
2784 add %i0, 64, %i0
2785 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2786 FALIGN_D34
2787 ldda [%l7]ASI_BLK_P, %d32
2788 stda %d48, [%i0]ASI_BLK_AIUS
2789 add %l7, 64, %l7
2790 subcc %i3, 64, %i3
2791 bz,pn %ncc, 2f
2792 add %i0, 64, %i0
2793 ba,a,pt %ncc, copyout_seg1
2794 0:
2795 FALIGN_D18
2796 stda %d48, [%i0]ASI_BLK_AIUS
2797 add %i0, 64, %i0
2798 membar #Sync
2799 FALIGN_D34
2800 stda %d48, [%i0]ASI_BLK_AIUS
2801 ba,pt %ncc, copyout_blkd2
2802 add %i0, 64, %i0
2803
2804 1:
2805 FALIGN_D34
2806 stda %d48, [%i0]ASI_BLK_AIUS
2807 add %i0, 64, %i0
2808 membar #Sync
2809 FALIGN_D2
2810 stda %d48, [%i0]ASI_BLK_AIUS
2811 ba,pt %ncc, copyout_blkd18
2812 add %i0, 64, %i0
2813
2814 2:
2815 FALIGN_D2
2816 stda %d48, [%i0]ASI_BLK_AIUS
2817 add %i0, 64, %i0
2818 membar #Sync
2819 FALIGN_D18
2820 stda %d48, [%i0]ASI_BLK_AIUS
2821 ba,pt %ncc, copyout_blkd34
2822 add %i0, 64, %i0
2823
2824 copyout_seg2:
2825 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2826 FALIGN_D4
2827 ldda [%l7]ASI_BLK_P, %d0
2828 stda %d48, [%i0]ASI_BLK_AIUS
2829 add %l7, 64, %l7
2830 subcc %i3, 64, %i3
2831 bz,pn %ncc, 0f
2832 add %i0, 64, %i0
2833 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2834 FALIGN_D20
2835 ldda [%l7]ASI_BLK_P, %d16
2836 stda %d48, [%i0]ASI_BLK_AIUS
2837 add %l7, 64, %l7
2838 subcc %i3, 64, %i3
2839 bz,pn %ncc, 1f
2840 add %i0, 64, %i0
2841 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2842 FALIGN_D36
2843 ldda [%l7]ASI_BLK_P, %d32
2844 stda %d48, [%i0]ASI_BLK_AIUS
2845 add %l7, 64, %l7
2846 subcc %i3, 64, %i3
2847 bz,pn %ncc, 2f
2848 add %i0, 64, %i0
2849 ba,a,pt %ncc, copyout_seg2
2850
2851 0:
2852 FALIGN_D20
2853 stda %d48, [%i0]ASI_BLK_AIUS
2854 add %i0, 64, %i0
2855 membar #Sync
2856 FALIGN_D36
2857 stda %d48, [%i0]ASI_BLK_AIUS
2858 ba,pt %ncc, copyout_blkd4
2859 add %i0, 64, %i0
2860
2861 1:
2862 FALIGN_D36
2863 stda %d48, [%i0]ASI_BLK_AIUS
2864 add %i0, 64, %i0
2865 membar #Sync
2866 FALIGN_D4
2867 stda %d48, [%i0]ASI_BLK_AIUS
2868 ba,pt %ncc, copyout_blkd20
2869 add %i0, 64, %i0
2870
2871 2:
2872 FALIGN_D4
2873 stda %d48, [%i0]ASI_BLK_AIUS
2874 add %i0, 64, %i0
2875 membar #Sync
2876 FALIGN_D20
2877 stda %d48, [%i0]ASI_BLK_AIUS
2878 ba,pt %ncc, copyout_blkd36
2879 add %i0, 64, %i0
2880
2881 copyout_seg3:
2882 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2883 FALIGN_D6
2884 ldda [%l7]ASI_BLK_P, %d0
2885 stda %d48, [%i0]ASI_BLK_AIUS
2886 add %l7, 64, %l7
2887 subcc %i3, 64, %i3
2888 bz,pn %ncc, 0f
2889 add %i0, 64, %i0
2890 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2891 FALIGN_D22
2892 ldda [%l7]ASI_BLK_P, %d16
2893 stda %d48, [%i0]ASI_BLK_AIUS
2894 add %l7, 64, %l7
2895 subcc %i3, 64, %i3
2896 bz,pn %ncc, 1f
2897 add %i0, 64, %i0
2898 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2899 FALIGN_D38
2900 ldda [%l7]ASI_BLK_P, %d32
2901 stda %d48, [%i0]ASI_BLK_AIUS
2902 add %l7, 64, %l7
2903 subcc %i3, 64, %i3
2904 bz,pn %ncc, 2f
2905 add %i0, 64, %i0
2906 ba,a,pt %ncc, copyout_seg3
2907
2908 0:
2909 FALIGN_D22
2910 stda %d48, [%i0]ASI_BLK_AIUS
2911 add %i0, 64, %i0
2912 membar #Sync
2913 FALIGN_D38
2914 stda %d48, [%i0]ASI_BLK_AIUS
2915 ba,pt %ncc, copyout_blkd6
2916 add %i0, 64, %i0
2917
2918 1:
2919 FALIGN_D38
2920 stda %d48, [%i0]ASI_BLK_AIUS
2921 add %i0, 64, %i0
2922 membar #Sync
2923 FALIGN_D6
2924 stda %d48, [%i0]ASI_BLK_AIUS
2925 ba,pt %ncc, copyout_blkd22
2926 add %i0, 64, %i0
2927
2928 2:
2929 FALIGN_D6
2930 stda %d48, [%i0]ASI_BLK_AIUS
2931 add %i0, 64, %i0
2932 membar #Sync
2933 FALIGN_D22
2934 stda %d48, [%i0]ASI_BLK_AIUS
2935 ba,pt %ncc, copyout_blkd38
2936 add %i0, 64, %i0
2937
2938 copyout_seg4:
2939 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2940 FALIGN_D8
2941 ldda [%l7]ASI_BLK_P, %d0
2942 stda %d48, [%i0]ASI_BLK_AIUS
2943 add %l7, 64, %l7
2944 subcc %i3, 64, %i3
2945 bz,pn %ncc, 0f
2946 add %i0, 64, %i0
2947 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2948 FALIGN_D24
2949 ldda [%l7]ASI_BLK_P, %d16
2950 stda %d48, [%i0]ASI_BLK_AIUS
2951 add %l7, 64, %l7
2952 subcc %i3, 64, %i3
2953 bz,pn %ncc, 1f
2954 add %i0, 64, %i0
2955 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2956 FALIGN_D40
2957 ldda [%l7]ASI_BLK_P, %d32
2958 stda %d48, [%i0]ASI_BLK_AIUS
2959 add %l7, 64, %l7
2960 subcc %i3, 64, %i3
2961 bz,pn %ncc, 2f
2962 add %i0, 64, %i0
2963 ba,a,pt %ncc, copyout_seg4
2964
2965 0:
2966 FALIGN_D24
2967 stda %d48, [%i0]ASI_BLK_AIUS
2968 add %i0, 64, %i0
2969 membar #Sync
2970 FALIGN_D40
2971 stda %d48, [%i0]ASI_BLK_AIUS
2972 ba,pt %ncc, copyout_blkd8
2973 add %i0, 64, %i0
2974
2975 1:
2976 FALIGN_D40
2977 stda %d48, [%i0]ASI_BLK_AIUS
2978 add %i0, 64, %i0
2979 membar #Sync
2980 FALIGN_D8
2981 stda %d48, [%i0]ASI_BLK_AIUS
2982 ba,pt %ncc, copyout_blkd24
2983 add %i0, 64, %i0
2984
2985 2:
2986 FALIGN_D8
2987 stda %d48, [%i0]ASI_BLK_AIUS
2988 add %i0, 64, %i0
2989 membar #Sync
2990 FALIGN_D24
2991 stda %d48, [%i0]ASI_BLK_AIUS
2992 ba,pt %ncc, copyout_blkd40
2993 add %i0, 64, %i0
2994
2995 copyout_seg5:
2996 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2997 FALIGN_D10
2998 ldda [%l7]ASI_BLK_P, %d0
2999 stda %d48, [%i0]ASI_BLK_AIUS
3000 add %l7, 64, %l7
3001 subcc %i3, 64, %i3
3002 bz,pn %ncc, 0f
3003 add %i0, 64, %i0
3004 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3005 FALIGN_D26
3006 ldda [%l7]ASI_BLK_P, %d16
3007 stda %d48, [%i0]ASI_BLK_AIUS
3008 add %l7, 64, %l7
3009 subcc %i3, 64, %i3
3010 bz,pn %ncc, 1f
3011 add %i0, 64, %i0
3012 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3013 FALIGN_D42
3014 ldda [%l7]ASI_BLK_P, %d32
3015 stda %d48, [%i0]ASI_BLK_AIUS
3016 add %l7, 64, %l7
3017 subcc %i3, 64, %i3
3018 bz,pn %ncc, 2f
3019 add %i0, 64, %i0
3020 ba,a,pt %ncc, copyout_seg5
3021
3022 0:
3023 FALIGN_D26
3024 stda %d48, [%i0]ASI_BLK_AIUS
3025 add %i0, 64, %i0
3026 membar #Sync
3027 FALIGN_D42
3028 stda %d48, [%i0]ASI_BLK_AIUS
3029 ba,pt %ncc, copyout_blkd10
3030 add %i0, 64, %i0
3031
3032 1:
3033 FALIGN_D42
3034 stda %d48, [%i0]ASI_BLK_AIUS
3035 add %i0, 64, %i0
3036 membar #Sync
3037 FALIGN_D10
3038 stda %d48, [%i0]ASI_BLK_AIUS
3039 ba,pt %ncc, copyout_blkd26
3040 add %i0, 64, %i0
3041
3042 2:
3043 FALIGN_D10
3044 stda %d48, [%i0]ASI_BLK_AIUS
3045 add %i0, 64, %i0
3046 membar #Sync
3047 FALIGN_D26
3048 stda %d48, [%i0]ASI_BLK_AIUS
3049 ba,pt %ncc, copyout_blkd42
3050 add %i0, 64, %i0
3051
3052 copyout_seg6:
3053 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3054 FALIGN_D12
3055 ldda [%l7]ASI_BLK_P, %d0
3056 stda %d48, [%i0]ASI_BLK_AIUS
3057 add %l7, 64, %l7
3058 subcc %i3, 64, %i3
3059 bz,pn %ncc, 0f
3060 add %i0, 64, %i0
3061 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3062 FALIGN_D28
3063 ldda [%l7]ASI_BLK_P, %d16
3064 stda %d48, [%i0]ASI_BLK_AIUS
3065 add %l7, 64, %l7
3066 subcc %i3, 64, %i3
3067 bz,pn %ncc, 1f
3068 add %i0, 64, %i0
3069 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3070 FALIGN_D44
3071 ldda [%l7]ASI_BLK_P, %d32
3072 stda %d48, [%i0]ASI_BLK_AIUS
3073 add %l7, 64, %l7
3074 subcc %i3, 64, %i3
3075 bz,pn %ncc, 2f
3076 add %i0, 64, %i0
3077 ba,a,pt %ncc, copyout_seg6
3078
3079 0:
3080 FALIGN_D28
3081 stda %d48, [%i0]ASI_BLK_AIUS
3082 add %i0, 64, %i0
3083 membar #Sync
3084 FALIGN_D44
3085 stda %d48, [%i0]ASI_BLK_AIUS
3086 ba,pt %ncc, copyout_blkd12
3087 add %i0, 64, %i0
3088
3089 1:
3090 FALIGN_D44
3091 stda %d48, [%i0]ASI_BLK_AIUS
3092 add %i0, 64, %i0
3093 membar #Sync
3094 FALIGN_D12
3095 stda %d48, [%i0]ASI_BLK_AIUS
3096 ba,pt %ncc, copyout_blkd28
3097 add %i0, 64, %i0
3098
3099 2:
3100 FALIGN_D12
3101 stda %d48, [%i0]ASI_BLK_AIUS
3102 add %i0, 64, %i0
3103 membar #Sync
3104 FALIGN_D28
3105 stda %d48, [%i0]ASI_BLK_AIUS
3106 ba,pt %ncc, copyout_blkd44
3107 add %i0, 64, %i0
3108
3109 copyout_seg7:
3110 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3111 FALIGN_D14
3112 ldda [%l7]ASI_BLK_P, %d0
3113 stda %d48, [%i0]ASI_BLK_AIUS
3114 add %l7, 64, %l7
3115 subcc %i3, 64, %i3
3116 bz,pn %ncc, 0f
3117 add %i0, 64, %i0
3118 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3119 FALIGN_D30
3120 ldda [%l7]ASI_BLK_P, %d16
3121 stda %d48, [%i0]ASI_BLK_AIUS
3122 add %l7, 64, %l7
3123 subcc %i3, 64, %i3
3124 bz,pn %ncc, 1f
3125 add %i0, 64, %i0
3126 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3127 FALIGN_D46
3128 ldda [%l7]ASI_BLK_P, %d32
3129 stda %d48, [%i0]ASI_BLK_AIUS
3130 add %l7, 64, %l7
3131 subcc %i3, 64, %i3
3132 bz,pn %ncc, 2f
3133 add %i0, 64, %i0
3134 ba,a,pt %ncc, copyout_seg7
3135
3136 0:
3137 FALIGN_D30
3138 stda %d48, [%i0]ASI_BLK_AIUS
3139 add %i0, 64, %i0
3140 membar #Sync
3141 FALIGN_D46
3142 stda %d48, [%i0]ASI_BLK_AIUS
3143 ba,pt %ncc, copyout_blkd14
3144 add %i0, 64, %i0
3145
3146 1:
3147 FALIGN_D46
3148 stda %d48, [%i0]ASI_BLK_AIUS
3149 add %i0, 64, %i0
3150 membar #Sync
3151 FALIGN_D14
3152 stda %d48, [%i0]ASI_BLK_AIUS
3153 ba,pt %ncc, copyout_blkd30
3154 add %i0, 64, %i0
3155
3156 2:
3157 FALIGN_D14
3158 stda %d48, [%i0]ASI_BLK_AIUS
3159 add %i0, 64, %i0
3160 membar #Sync
3161 FALIGN_D30
3162 stda %d48, [%i0]ASI_BLK_AIUS
3163 ba,pt %ncc, copyout_blkd46
3164 add %i0, 64, %i0
3165
3166
3167 !
3168 ! dribble out the last partial block
3169 !
3170 copyout_blkd0:
3171 subcc %i4, 8, %i4
3172 blu,pn %ncc, copyout_blkdone
3173 faligndata %d0, %d2, %d48
3174 stda %d48, [%i0]ASI_USER
3175 add %i0, 8, %i0
3176 copyout_blkd2:
3177 subcc %i4, 8, %i4
3178 blu,pn %ncc, copyout_blkdone
3179 faligndata %d2, %d4, %d48
3180 stda %d48, [%i0]ASI_USER
3181 add %i0, 8, %i0
3182 copyout_blkd4:
3183 subcc %i4, 8, %i4
3184 blu,pn %ncc, copyout_blkdone
3185 faligndata %d4, %d6, %d48
3186 stda %d48, [%i0]ASI_USER
3187 add %i0, 8, %i0
3188 copyout_blkd6:
3189 subcc %i4, 8, %i4
3190 blu,pn %ncc, copyout_blkdone
3191 faligndata %d6, %d8, %d48
3192 stda %d48, [%i0]ASI_USER
3193 add %i0, 8, %i0
3194 copyout_blkd8:
3195 subcc %i4, 8, %i4
3196 blu,pn %ncc, copyout_blkdone
3197 faligndata %d8, %d10, %d48
3198 stda %d48, [%i0]ASI_USER
3199 add %i0, 8, %i0
3200 copyout_blkd10:
3201 subcc %i4, 8, %i4
3202 blu,pn %ncc, copyout_blkdone
3203 faligndata %d10, %d12, %d48
3204 stda %d48, [%i0]ASI_USER
3205 add %i0, 8, %i0
3206 copyout_blkd12:
3207 subcc %i4, 8, %i4
3208 blu,pn %ncc, copyout_blkdone
3209 faligndata %d12, %d14, %d48
3210 stda %d48, [%i0]ASI_USER
3211 add %i0, 8, %i0
3212 copyout_blkd14:
3213 subcc %i4, 8, %i4
3214 blu,pn %ncc, copyout_blkdone
3215 fsrc1 %d14, %d0
3216 ba,a,pt %ncc, copyout_blkleft
3217
3218 copyout_blkd16:
3219 subcc %i4, 8, %i4
3220 blu,pn %ncc, copyout_blkdone
3221 faligndata %d16, %d18, %d48
3222 stda %d48, [%i0]ASI_USER
3223 add %i0, 8, %i0
3224 copyout_blkd18:
3225 subcc %i4, 8, %i4
3226 blu,pn %ncc, copyout_blkdone
3227 faligndata %d18, %d20, %d48
3228 stda %d48, [%i0]ASI_USER
3229 add %i0, 8, %i0
3230 copyout_blkd20:
3231 subcc %i4, 8, %i4
3232 blu,pn %ncc, copyout_blkdone
3233 faligndata %d20, %d22, %d48
3234 stda %d48, [%i0]ASI_USER
3235 add %i0, 8, %i0
3236 copyout_blkd22:
3237 subcc %i4, 8, %i4
3238 blu,pn %ncc, copyout_blkdone
3239 faligndata %d22, %d24, %d48
3240 stda %d48, [%i0]ASI_USER
3241 add %i0, 8, %i0
3242 copyout_blkd24:
3243 subcc %i4, 8, %i4
3244 blu,pn %ncc, copyout_blkdone
3245 faligndata %d24, %d26, %d48
3246 stda %d48, [%i0]ASI_USER
3247 add %i0, 8, %i0
3248 copyout_blkd26:
3249 subcc %i4, 8, %i4
3250 blu,pn %ncc, copyout_blkdone
3251 faligndata %d26, %d28, %d48
3252 stda %d48, [%i0]ASI_USER
3253 add %i0, 8, %i0
3254 copyout_blkd28:
3255 subcc %i4, 8, %i4
3256 blu,pn %ncc, copyout_blkdone
3257 faligndata %d28, %d30, %d48
3258 stda %d48, [%i0]ASI_USER
3259 add %i0, 8, %i0
3260 copyout_blkd30:
3261 subcc %i4, 8, %i4
3262 blu,pn %ncc, copyout_blkdone
3263 fsrc1 %d30, %d0
3264 ba,a,pt %ncc, copyout_blkleft
3265 copyout_blkd32:
3266 subcc %i4, 8, %i4
3267 blu,pn %ncc, copyout_blkdone
3268 faligndata %d32, %d34, %d48
3269 stda %d48, [%i0]ASI_USER
3270 add %i0, 8, %i0
3271 copyout_blkd34:
3272 subcc %i4, 8, %i4
3273 blu,pn %ncc, copyout_blkdone
3274 faligndata %d34, %d36, %d48
3275 stda %d48, [%i0]ASI_USER
3276 add %i0, 8, %i0
3277 copyout_blkd36:
3278 subcc %i4, 8, %i4
3279 blu,pn %ncc, copyout_blkdone
3280 faligndata %d36, %d38, %d48
3281 stda %d48, [%i0]ASI_USER
3282 add %i0, 8, %i0
3283 copyout_blkd38:
3284 subcc %i4, 8, %i4
3285 blu,pn %ncc, copyout_blkdone
3286 faligndata %d38, %d40, %d48
3287 stda %d48, [%i0]ASI_USER
3288 add %i0, 8, %i0
3289 copyout_blkd40:
3290 subcc %i4, 8, %i4
3291 blu,pn %ncc, copyout_blkdone
3292 faligndata %d40, %d42, %d48
3293 stda %d48, [%i0]ASI_USER
3294 add %i0, 8, %i0
3295 copyout_blkd42:
3296 subcc %i4, 8, %i4
3297 blu,pn %ncc, copyout_blkdone
3298 faligndata %d42, %d44, %d48
3299 stda %d48, [%i0]ASI_USER
3300 add %i0, 8, %i0
3301 copyout_blkd44:
3302 subcc %i4, 8, %i4
3303 blu,pn %ncc, copyout_blkdone
3304 faligndata %d44, %d46, %d48
3305 stda %d48, [%i0]ASI_USER
3306 add %i0, 8, %i0
3307 copyout_blkd46:
3308 subcc %i4, 8, %i4
3309 blu,pn %ncc, copyout_blkdone
3310 fsrc1 %d46, %d0
3311
3312 copyout_blkleft:
3313 1:
3314 ldd [%l7], %d2
3315 add %l7, 8, %l7
3316 subcc %i4, 8, %i4
3317 faligndata %d0, %d2, %d8
3318 stda %d8, [%i0]ASI_USER
3319 blu,pn %ncc, copyout_blkdone
3320 add %i0, 8, %i0
3321 ldd [%l7], %d0
3322 add %l7, 8, %l7
3323 subcc %i4, 8, %i4
3324 faligndata %d2, %d0, %d8
3325 stda %d8, [%i0]ASI_USER
3326 bgeu,pt %ncc, 1b
3327 add %i0, 8, %i0
3328
3329 copyout_blkdone:
3330 tst %i2
3331 bz,pt %ncc, .copyout_exit
3332 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0
3333
3334 7: ldub [%i1], %i4
3335 inc %i1
3336 stba %i4, [%i0]ASI_USER
3337 inc %i0
3338 deccc %i2
3339 bgu %ncc, 7b
3340 nop
3341
3342 .copyout_exit:
3343 membar #StoreLoad|#StoreStore
3344 btst FPUSED_FLAG, SAVED_LOFAULT
3345 bz 1f
3346 nop
3347
3348 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
3349 wr %o2, 0, %gsr ! restore gsr
3350
3351 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
3352 btst FPRS_FEF, %o3
3353 bz 4f
3354 nop
3355
3356 ! restore fpregs from stack
3357 membar #Sync
3358 add %fp, STACK_BIAS - 257, %o2
3359 and %o2, -64, %o2
3360 ldda [%o2]ASI_BLK_P, %d0
3361 add %o2, 64, %o2
3362 ldda [%o2]ASI_BLK_P, %d16
3363 add %o2, 64, %o2
3364 ldda [%o2]ASI_BLK_P, %d32
3365 add %o2, 64, %o2
3366 ldda [%o2]ASI_BLK_P, %d48
3367 membar #Sync
3368
3369 ba,pt %ncc, 1f
3370 wr %o3, 0, %fprs ! restore fprs
3371
3372 4:
3373 FZERO ! zero all of the fpregs
3374 wr %o3, 0, %fprs ! restore fprs
3375
3376 1:
3377 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
3378 membar #Sync ! sync error barrier
3379 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3380 ret
3381 restore %g0, 0, %o0
3382
3383 .copyout_err:
3384 ldn [THREAD_REG + T_COPYOPS], %o4
3385 brz %o4, 2f
3386 nop
3387 ldn [%o4 + CP_COPYOUT], %g2
3388 jmp %g2
3389 nop
3390 2:
3391 retl
3392 mov -1, %o0
3393 SET_SIZE(copyout)
3394
3395 #endif /* lint */
3396
3397
3398 #ifdef lint
3399
3400 /*ARGSUSED*/
3401 int
3402 xcopyout(const void *kaddr, void *uaddr, size_t count)
3403 { return (0); }
3404
3405 #else /* lint */
3406
3407 ENTRY(xcopyout)
3408 sethi %hi(.xcopyout_err), REAL_LOFAULT
3409 b .do_copyout
3410 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
3411 .xcopyout_err:
3412 ldn [THREAD_REG + T_COPYOPS], %o4
3413 brz %o4, 2f
3414 nop
3415 ldn [%o4 + CP_XCOPYOUT], %g2
3416 jmp %g2
3417 nop
3418 2:
3419 retl
3420 mov %g1, %o0
3421 SET_SIZE(xcopyout)
3422
3423 #endif /* lint */
3424
3425 #ifdef lint
3426
3427 /*ARGSUSED*/
3428 int
3429 xcopyout_little(const void *kaddr, void *uaddr, size_t count)
3430 { return (0); }
3431
3432 #else /* lint */
3433
3434 ENTRY(xcopyout_little)
3435 sethi %hi(.little_err), %o4
3436 ldn [THREAD_REG + T_LOFAULT], %o5
3437 or %o4, %lo(.little_err), %o4
3438 membar #Sync ! sync error barrier
3439 stn %o4, [THREAD_REG + T_LOFAULT]
3440
3441 subcc %g0, %o2, %o3
3442 add %o0, %o2, %o0
3443 bz,pn %ncc, 2f ! check for zero bytes
3444 sub %o2, 1, %o4
3445 add %o0, %o4, %o0 ! start w/last byte
3446 add %o1, %o2, %o1
3447 ldub [%o0+%o3], %o4
3448
3449 1: stba %o4, [%o1+%o3]ASI_AIUSL
3450 inccc %o3
3451 sub %o0, 2, %o0 ! get next byte
3452 bcc,a,pt %ncc, 1b
3453 ldub [%o0+%o3], %o4
3454
3455 2: membar #Sync ! sync error barrier
3456 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3457 retl
3458 mov %g0, %o0 ! return (0)
3459 SET_SIZE(xcopyout_little)
3460
3461 #endif /* lint */
3462
3463 /*
3464 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
3465 */
3466
3467 #if defined(lint)
3468
3469 /*ARGSUSED*/
3470 int
3471 copyin(const void *uaddr, void *kaddr, size_t count)
3472 { return (0); }
3473
3474 #else /* lint */
3475
3476 ENTRY(copyin)
3477 sethi %hi(.copyin_err), REAL_LOFAULT
3478 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
3479
3480 .do_copyin:
3481 !
3482 ! Check the length and bail if zero.
3483 !
3484 tst %o2
3485 bnz,pt %ncc, 1f
3486 nop
3487 retl
3488 clr %o0
3489 1:
3490 sethi %hi(copyio_fault), %o4
3491 or %o4, %lo(copyio_fault), %o4
3492 sethi %hi(copyio_fault_nowindow), %o3
3493 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
3494 or %o3, %lo(copyio_fault_nowindow), %o3
3495 membar #Sync
3496 stn %o3, [THREAD_REG + T_LOFAULT]
3497
3498 mov %o0, SAVE_SRC
3499 mov %o1, SAVE_DST
3500 mov %o2, SAVE_COUNT
3501
3502 !
3503 ! Check to see if we're more than SMALL_LIMIT.
3504 !
3505 subcc %o2, SMALL_LIMIT, %o3
3506 bgu,a,pt %ncc, .dci_ns
3507 or %o0, %o1, %o3
3508 !
3509 ! What was previously ".small_copyin"
3510 !
3511 .dcibcp:
3512 sub %g0, %o2, %o3 ! setup for copy loop
3513 add %o0, %o2, %o0
3514 add %o1, %o2, %o1
3515 ba,pt %ncc, .dcicl
3516 lduba [%o0 + %o3]ASI_USER, %o4
3517 !
3518 ! %o0 and %o1 point at the end and remain pointing at the end
3519 ! of their buffers. We pull things out by adding %o3 (which is
3520 ! the negation of the length) to the buffer end which gives us
3521 ! the curent location in the buffers. By incrementing %o3 we walk
3522 ! through both buffers without having to bump each buffer's
3523 ! pointer. A very fast 4 instruction loop.
3524 !
3525 .align 16
3526 .dcicl:
3527 stb %o4, [%o1 + %o3]
3528 inccc %o3
3529 bl,a,pt %ncc, .dcicl
3530 lduba [%o0 + %o3]ASI_USER, %o4
3531 !
3532 ! We're done. Go home.
3533 !
3534 membar #Sync
3535 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
3536 retl
3537 clr %o0
3538 !
3539 ! Try aligned copies from here.
3540 !
3541 .dci_ns:
3542 !
3543 ! See if we're single byte aligned. If we are, check the
3544 ! limit for single byte copies. If we're smaller, or equal,
3545 ! bounce to the byte for byte copy loop. Otherwise do it in
3546 ! HW (if enabled).
3547 !
3548 btst 1, %o3
3549 bz,a,pt %icc, .dcih8
3550 btst 7, %o3
3551 !
3552 ! We're single byte aligned.
3553 !
3554 subcc %o2, VIS_COPY_THRESHOLD, %o3
3555 bleu,pt %ncc, .dcibcp
3556 sethi %hi(hw_copy_limit_1), %o3
3557 ld [%o3 + %lo(hw_copy_limit_1)], %o3
3558 !
3559 ! Is HW copy on? If not do everything byte for byte.
3560 !
3561 tst %o3
3562 bz,pn %icc, .dcibcp
3563 subcc %o3, %o2, %o3
3564 !
3565 ! Are we bigger than the HW limit? If not
3566 ! go to byte for byte.
3567 !
3568 bge,pt %ncc, .dcibcp
3569 nop
3570 !
3571 ! We're big enough and copy is on. Do it with HW.
3572 !
3573 ba,pt %ncc, .big_copyin
3574 nop
3575 .dcih8:
3576 !
3577 ! 8 byte aligned?
3578 !
3579 bnz,a %ncc, .dcih4
3580 btst 3, %o3
3581 !
3582 ! We're eight byte aligned.
3583 !
3584 subcc %o2, VIS_COPY_THRESHOLD, %o3
3585 bleu,pt %ncc, .dcis8
3586 sethi %hi(hw_copy_limit_8), %o3
3587 ld [%o3 + %lo(hw_copy_limit_8)], %o3
3588 !
3589 ! Is HW assist on? If not, do it with the aligned copy.
3590 !
3591 tst %o3
3592 bz,pn %icc, .dcis8
3593 subcc %o3, %o2, %o3
3594 bge %ncc, .dcis8
3595 nop
3596 ba,pt %ncc, .big_copyin
3597 nop
3598 .dcis8:
3599 !
3600 ! Housekeeping for copy loops. Uses same idea as in the byte for
3601 ! byte copy loop above.
3602 !
3603 add %o0, %o2, %o0
3604 add %o1, %o2, %o1
3605 sub %g0, %o2, %o3
3606 ba,pt %ncc, .didebc
3607 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy
3608 !
3609 ! 4 byte aligned?
3610 !
3611 .dcih4:
3612 bnz %ncc, .dcih2
3613 subcc %o2, VIS_COPY_THRESHOLD, %o3
3614 bleu,pt %ncc, .dcis4
3615 sethi %hi(hw_copy_limit_4), %o3
3616 ld [%o3 + %lo(hw_copy_limit_4)], %o3
3617 !
3618 ! Is HW assist on? If not, do it with the aligned copy.
3619 !
3620 tst %o3
3621 bz,pn %icc, .dcis4
3622 subcc %o3, %o2, %o3
3623 !
3624 ! We're negative if our size is less than or equal to hw_copy_limit_4.
3625 !
3626 bge %ncc, .dcis4
3627 nop
3628 ba,pt %ncc, .big_copyin
3629 nop
3630 .dcis4:
3631 !
3632 ! Housekeeping for copy loops. Uses same idea as in the byte
3633 ! for byte copy loop above.
3634 !
3635 add %o0, %o2, %o0
3636 add %o1, %o2, %o1
3637 sub %g0, %o2, %o3
3638 ba,pt %ncc, .didfbc
3639 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy
3640 .dcih2:
3641 !
3642 ! We're two byte aligned. Check for "smallness"
3643 ! done in delay at .dcih4
3644 !
3645 bleu,pt %ncc, .dcis2
3646 sethi %hi(hw_copy_limit_2), %o3
3647 ld [%o3 + %lo(hw_copy_limit_2)], %o3
3648 !
3649 ! Is HW assist on? If not, do it with the aligned copy.
3650 !
3651 tst %o3
3652 bz,pn %icc, .dcis2
3653 subcc %o3, %o2, %o3
3654 !
3655 ! Are we larger than the HW limit?
3656 !
3657 bge %ncc, .dcis2
3658 nop
3659 !
3660 ! HW assist is on and we're large enough to use it.
3661 !
3662 ba,pt %ncc, .big_copyin
3663 nop
3664 !
3665 ! Housekeeping for copy loops. Uses same idea as in the byte
3666 ! for byte copy loop above.
3667 !
3668 .dcis2:
3669 add %o0, %o2, %o0
3670 add %o1, %o2, %o1
3671 sub %g0, %o2, %o3
3672 ba,pt %ncc, .didtbc
3673 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy
3674 !
3675 .small_copyin:
3676 !
3677 ! Why are we doing this AGAIN? There are certain conditions in
3678 ! big copyin that will cause us to forgo the HW assisted copys
3679 ! and bounce back to a non-hw assisted copy. This dispatches
3680 ! those copies. Note that we branch around this in the main line
3681 ! code.
3682 !
3683 ! We make no check for limits or HW enablement here. We've
3684 ! already been told that we're a poster child so just go off
3685 ! and do it.
3686 !
3687 or %o0, %o1, %o3
3688 btst 1, %o3
3689 bnz %icc, .dcibcp ! Most likely
3690 btst 7, %o3
3691 bz %icc, .dcis8
3692 btst 3, %o3
3693 bz %icc, .dcis4
3694 nop
3695 ba,pt %ncc, .dcis2
3696 nop
3697 !
3698 ! Eight byte aligned copies. A steal from the original .small_copyin
3699 ! with modifications. %o2 is number of 8 byte chunks to copy. When
3700 ! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more
3701 ! to copy.
3702 !
3703 .align 32
3704 .didebc:
3705 ldxa [%o0 + %o3]ASI_USER, %o4
3706 deccc %o2
3707 stx %o4, [%o1 + %o3]
3708 bg,pt %ncc, .didebc
3709 addcc %o3, 8, %o3
3710 !
3711 ! End of copy loop. Most 8 byte aligned copies end here.
3712 !
3713 bz,pt %ncc, .dcifh
3714 nop
3715 !
3716 ! Something is left. Do it byte for byte.
3717 !
3718 ba,pt %ncc, .dcicl
3719 lduba [%o0 + %o3]ASI_USER, %o4
3720 !
3721 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
3722 !
3723 .align 32
3724 .didfbc:
3725 lduwa [%o0 + %o3]ASI_USER, %o4
3726 deccc %o2
3727 st %o4, [%o1 + %o3]
3728 bg,pt %ncc, .didfbc
3729 addcc %o3, 4, %o3
3730 !
3731 ! End of copy loop. Most 4 byte aligned copies end here.
3732 !
3733 bz,pt %ncc, .dcifh
3734 nop
3735 !
3736 ! Something is left. Do it byte for byte.
3737 !
3738 ba,pt %ncc, .dcicl
3739 lduba [%o0 + %o3]ASI_USER, %o4
3740 !
3741 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
3742 ! copy.
3743 !
3744 .align 32
3745 .didtbc:
3746 lduha [%o0 + %o3]ASI_USER, %o4
3747 deccc %o2
3748 sth %o4, [%o1 + %o3]
3749 bg,pt %ncc, .didtbc
3750 addcc %o3, 2, %o3
3751 !
3752 ! End of copy loop. Most 2 byte aligned copies end here.
3753 !
3754 bz,pt %ncc, .dcifh
3755 nop
3756 !
3757 ! Deal with the last byte
3758 !
3759 lduba [%o0 + %o3]ASI_USER, %o4
3760 stb %o4, [%o1 + %o3]
3761 .dcifh:
3762 membar #Sync
3763 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3764 retl
3765 clr %o0
3766
3767 .big_copyin:
3768 !
3769 ! Are we using the FP registers?
3770 !
3771 rd %fprs, %o3 ! check for unused fp
3772 btst FPRS_FEF, %o3
3773 bnz %ncc, .copyin_fpregs_inuse
3774 nop
3775 !
3776 ! We're going off to do a block copy.
3777 ! Switch fault hendlers and grab a window. We
3778 ! don't do a membar #Sync since we've done only
3779 ! kernel data to this point.
3780 !
3781 stn %o4, [THREAD_REG + T_LOFAULT]
3782 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3783 !
3784 ! %o3 is %i3 after the save...
3785 !
3786 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
3787 ba,pt %ncc, .do_blockcopyin
3788 wr %g0, FPRS_FEF, %fprs
3789 .copyin_fpregs_inuse:
3790 !
3791 ! We're here if the FP regs are in use. Need to see if the request
3792 ! exceeds our suddenly larger minimum.
3793 !
3794 cmp %i2, VIS_COPY_THRESHOLD+(64*4)
3795 bl %ncc, .small_copyin
3796 nop
3797 !
3798 ! We're going off and do a block copy.
3799 ! Change to the heavy duty fault handler and grab a window first.
3800 ! New handler is passed in
3801 !
3802 stn %o4, [THREAD_REG + T_LOFAULT]
3803 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3804 !
3805 ! %o3 is now %i3
3806 !
3807 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
3808
3809 ! save in-use fpregs on stack
3810 wr %g0, FPRS_FEF, %fprs
3811 membar #Sync
3812 add %fp, STACK_BIAS - 257, %o2
3813 and %o2, -64, %o2
3814 stda %d0, [%o2]ASI_BLK_P
3815 add %o2, 64, %o2
3816 stda %d16, [%o2]ASI_BLK_P
3817 add %o2, 64, %o2
3818 stda %d32, [%o2]ASI_BLK_P
3819 add %o2, 64, %o2
3820 stda %d48, [%o2]ASI_BLK_P
3821 membar #Sync
3822
3823 .do_blockcopyin:
3824 membar #StoreStore|#StoreLoad|#LoadStore
3825
3826 rd %gsr, %o2
3827 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
3828
3829 ! Set the lower bit in the saved t_lofault to indicate
3830 ! that we need to clear the %fprs register on the way
3831 ! out
3832 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
3833
3834 ! Swap src/dst since the code below is memcpy code
3835 ! and memcpy/bcopy have different calling sequences
3836 mov %i1, %i5
3837 mov %i0, %i1
3838 mov %i5, %i0
3839
3840 !!! This code is nearly identical to the version in the sun4u
3841 !!! libc_psr. Most bugfixes made to that file should be
3842 !!! merged into this routine.
3843
3844 andcc %i0, 7, %o3
3845 bz copyin_blkcpy
3846 sub %o3, 8, %o3
3847 neg %o3
3848 sub %i2, %o3, %i2
3849
3850 ! Align Destination on double-word boundary
3851
3852 2: lduba [%i1]ASI_USER, %o4
3853 inc %i1
3854 inc %i0
3855 deccc %o3
3856 bgu %ncc, 2b
3857 stb %o4, [%i0-1]
3858 copyin_blkcpy:
3859 andcc %i0, 63, %i3
3860 bz,pn %ncc, copyin_blalign ! now block aligned
3861 sub %i3, 64, %i3
3862 neg %i3 ! bytes till block aligned
3863 sub %i2, %i3, %i2 ! update %i2 with new count
3864
3865 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
3866 ! double word copies.
3867
3868 alignaddr %i1, %g0, %g1
3869 ldda [%g1]ASI_USER, %d0
3870 add %g1, 8, %g1
3871 6:
3872 ldda [%g1]ASI_USER, %d2
3873 add %g1, 8, %g1
3874 subcc %i3, 8, %i3
3875 faligndata %d0, %d2, %d8
3876 std %d8, [%i0]
3877 add %i1, 8, %i1
3878 bz,pn %ncc, copyin_blalign
3879 add %i0, 8, %i0
3880 ldda [%g1]ASI_USER, %d0
3881 add %g1, 8, %g1
3882 subcc %i3, 8, %i3
3883 faligndata %d2, %d0, %d8
3884 std %d8, [%i0]
3885 add %i1, 8, %i1
3886 bgu,pn %ncc, 6b
3887 add %i0, 8, %i0
3888
3889 copyin_blalign:
3890 membar #StoreLoad
3891 ! %i2 = total length
3892 ! %i3 = blocks (length - 64) / 64
3893 ! %i4 = doubles remaining (length - blocks)
3894 sub %i2, 64, %i3
3895 andn %i3, 63, %i3
3896 sub %i2, %i3, %i4
3897 andn %i4, 7, %i4
3898 sub %i4, 16, %i4
3899 sub %i2, %i4, %i2
3900 sub %i2, %i3, %i2
3901
3902 andn %i1, 0x3f, %l7 ! blk aligned address
3903 alignaddr %i1, %g0, %g0 ! gen %gsr
3904
3905 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5
3906 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3
3907 add %i1, %i4, %i1
3908 add %i1, %i3, %i1
3909
3910 ldda [%l7]ASI_BLK_AIUS, %d0
3911 add %l7, 64, %l7
3912 ldda [%l7]ASI_BLK_AIUS, %d16
3913 add %l7, 64, %l7
3914 ldda [%l7]ASI_BLK_AIUS, %d32
3915 add %l7, 64, %l7
3916 sub %i3, 128, %i3
3917
3918 ! switch statement to get us to the right 8 byte blk within a
3919 ! 64 byte block
3920
3921 cmp %i5, 4
3922 bgeu,a copyin_hlf
3923 cmp %i5, 6
3924 cmp %i5, 2
3925 bgeu,a copyin_sqtr
3926 nop
3927 cmp %i5, 1
3928 be,a copyin_seg1
3929 nop
3930 ba,pt %ncc, copyin_seg0
3931 nop
3932 copyin_sqtr:
3933 be,a copyin_seg2
3934 nop
3935 ba,pt %ncc, copyin_seg3
3936 nop
3937
3938 copyin_hlf:
3939 bgeu,a copyin_fqtr
3940 nop
3941 cmp %i5, 5
3942 be,a copyin_seg5
3943 nop
3944 ba,pt %ncc, copyin_seg4
3945 nop
3946 copyin_fqtr:
3947 be,a copyin_seg6
3948 nop
3949 ba,pt %ncc, copyin_seg7
3950 nop
3951
3952 copyin_seg0:
3953 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3954 FALIGN_D0
3955 ldda [%l7]ASI_BLK_AIUS, %d0
3956 stda %d48, [%i0]ASI_BLK_P
3957 add %l7, 64, %l7
3958 subcc %i3, 64, %i3
3959 bz,pn %ncc, 0f
3960 add %i0, 64, %i0
3961 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3962 FALIGN_D16
3963 ldda [%l7]ASI_BLK_AIUS, %d16
3964 stda %d48, [%i0]ASI_BLK_P
3965 add %l7, 64, %l7
3966 subcc %i3, 64, %i3
3967 bz,pn %ncc, 1f
3968 add %i0, 64, %i0
3969 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3970 FALIGN_D32
3971 ldda [%l7]ASI_BLK_AIUS, %d32
3972 stda %d48, [%i0]ASI_BLK_P
3973 add %l7, 64, %l7
3974 subcc %i3, 64, %i3
3975 bz,pn %ncc, 2f
3976 add %i0, 64, %i0
3977 ba,a,pt %ncc, copyin_seg0
3978
3979 0:
3980 FALIGN_D16
3981 stda %d48, [%i0]ASI_BLK_P
3982 add %i0, 64, %i0
3983 membar #Sync
3984 FALIGN_D32
3985 stda %d48, [%i0]ASI_BLK_P
3986 ba,pt %ncc, copyin_blkd0
3987 add %i0, 64, %i0
3988
3989 1:
3990 FALIGN_D32
3991 stda %d48, [%i0]ASI_BLK_P
3992 add %i0, 64, %i0
3993 membar #Sync
3994 FALIGN_D0
3995 stda %d48, [%i0]ASI_BLK_P
3996 ba,pt %ncc, copyin_blkd16
3997 add %i0, 64, %i0
3998
3999 2:
4000 FALIGN_D0
4001 stda %d48, [%i0]ASI_BLK_P
4002 add %i0, 64, %i0
4003 membar #Sync
4004 FALIGN_D16
4005 stda %d48, [%i0]ASI_BLK_P
4006 ba,pt %ncc, copyin_blkd32
4007 add %i0, 64, %i0
4008
4009 copyin_seg1:
4010 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4011 FALIGN_D2
4012 ldda [%l7]ASI_BLK_AIUS, %d0
4013 stda %d48, [%i0]ASI_BLK_P
4014 add %l7, 64, %l7
4015 subcc %i3, 64, %i3
4016 bz,pn %ncc, 0f
4017 add %i0, 64, %i0
4018 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4019 FALIGN_D18
4020 ldda [%l7]ASI_BLK_AIUS, %d16
4021 stda %d48, [%i0]ASI_BLK_P
4022 add %l7, 64, %l7
4023 subcc %i3, 64, %i3
4024 bz,pn %ncc, 1f
4025 add %i0, 64, %i0
4026 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4027 FALIGN_D34
4028 ldda [%l7]ASI_BLK_AIUS, %d32
4029 stda %d48, [%i0]ASI_BLK_P
4030 add %l7, 64, %l7
4031 subcc %i3, 64, %i3
4032 bz,pn %ncc, 2f
4033 add %i0, 64, %i0
4034 ba,a,pt %ncc, copyin_seg1
4035 0:
4036 FALIGN_D18
4037 stda %d48, [%i0]ASI_BLK_P
4038 add %i0, 64, %i0
4039 membar #Sync
4040 FALIGN_D34
4041 stda %d48, [%i0]ASI_BLK_P
4042 ba,pt %ncc, copyin_blkd2
4043 add %i0, 64, %i0
4044
4045 1:
4046 FALIGN_D34
4047 stda %d48, [%i0]ASI_BLK_P
4048 add %i0, 64, %i0
4049 membar #Sync
4050 FALIGN_D2
4051 stda %d48, [%i0]ASI_BLK_P
4052 ba,pt %ncc, copyin_blkd18
4053 add %i0, 64, %i0
4054
4055 2:
4056 FALIGN_D2
4057 stda %d48, [%i0]ASI_BLK_P
4058 add %i0, 64, %i0
4059 membar #Sync
4060 FALIGN_D18
4061 stda %d48, [%i0]ASI_BLK_P
4062 ba,pt %ncc, copyin_blkd34
4063 add %i0, 64, %i0
4064 copyin_seg2:
4065 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4066 FALIGN_D4
4067 ldda [%l7]ASI_BLK_AIUS, %d0
4068 stda %d48, [%i0]ASI_BLK_P
4069 add %l7, 64, %l7
4070 subcc %i3, 64, %i3
4071 bz,pn %ncc, 0f
4072 add %i0, 64, %i0
4073 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4074 FALIGN_D20
4075 ldda [%l7]ASI_BLK_AIUS, %d16
4076 stda %d48, [%i0]ASI_BLK_P
4077 add %l7, 64, %l7
4078 subcc %i3, 64, %i3
4079 bz,pn %ncc, 1f
4080 add %i0, 64, %i0
4081 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4082 FALIGN_D36
4083 ldda [%l7]ASI_BLK_AIUS, %d32
4084 stda %d48, [%i0]ASI_BLK_P
4085 add %l7, 64, %l7
4086 subcc %i3, 64, %i3
4087 bz,pn %ncc, 2f
4088 add %i0, 64, %i0
4089 ba,a,pt %ncc, copyin_seg2
4090
4091 0:
4092 FALIGN_D20
4093 stda %d48, [%i0]ASI_BLK_P
4094 add %i0, 64, %i0
4095 membar #Sync
4096 FALIGN_D36
4097 stda %d48, [%i0]ASI_BLK_P
4098 ba,pt %ncc, copyin_blkd4
4099 add %i0, 64, %i0
4100
4101 1:
4102 FALIGN_D36
4103 stda %d48, [%i0]ASI_BLK_P
4104 add %i0, 64, %i0
4105 membar #Sync
4106 FALIGN_D4
4107 stda %d48, [%i0]ASI_BLK_P
4108 ba,pt %ncc, copyin_blkd20
4109 add %i0, 64, %i0
4110
4111 2:
4112 FALIGN_D4
4113 stda %d48, [%i0]ASI_BLK_P
4114 add %i0, 64, %i0
4115 membar #Sync
4116 FALIGN_D20
4117 stda %d48, [%i0]ASI_BLK_P
4118 ba,pt %ncc, copyin_blkd36
4119 add %i0, 64, %i0
4120
4121 copyin_seg3:
4122 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4123 FALIGN_D6
4124 ldda [%l7]ASI_BLK_AIUS, %d0
4125 stda %d48, [%i0]ASI_BLK_P
4126 add %l7, 64, %l7
4127 subcc %i3, 64, %i3
4128 bz,pn %ncc, 0f
4129 add %i0, 64, %i0
4130 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4131 FALIGN_D22
4132 ldda [%l7]ASI_BLK_AIUS, %d16
4133 stda %d48, [%i0]ASI_BLK_P
4134 add %l7, 64, %l7
4135 subcc %i3, 64, %i3
4136 bz,pn %ncc, 1f
4137 add %i0, 64, %i0
4138 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4139 FALIGN_D38
4140 ldda [%l7]ASI_BLK_AIUS, %d32
4141 stda %d48, [%i0]ASI_BLK_P
4142 add %l7, 64, %l7
4143 subcc %i3, 64, %i3
4144 bz,pn %ncc, 2f
4145 add %i0, 64, %i0
4146 ba,a,pt %ncc, copyin_seg3
4147
4148 0:
4149 FALIGN_D22
4150 stda %d48, [%i0]ASI_BLK_P
4151 add %i0, 64, %i0
4152 membar #Sync
4153 FALIGN_D38
4154 stda %d48, [%i0]ASI_BLK_P
4155 ba,pt %ncc, copyin_blkd6
4156 add %i0, 64, %i0
4157
4158 1:
4159 FALIGN_D38
4160 stda %d48, [%i0]ASI_BLK_P
4161 add %i0, 64, %i0
4162 membar #Sync
4163 FALIGN_D6
4164 stda %d48, [%i0]ASI_BLK_P
4165 ba,pt %ncc, copyin_blkd22
4166 add %i0, 64, %i0
4167
4168 2:
4169 FALIGN_D6
4170 stda %d48, [%i0]ASI_BLK_P
4171 add %i0, 64, %i0
4172 membar #Sync
4173 FALIGN_D22
4174 stda %d48, [%i0]ASI_BLK_P
4175 ba,pt %ncc, copyin_blkd38
4176 add %i0, 64, %i0
4177
4178 copyin_seg4:
4179 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4180 FALIGN_D8
4181 ldda [%l7]ASI_BLK_AIUS, %d0
4182 stda %d48, [%i0]ASI_BLK_P
4183 add %l7, 64, %l7
4184 subcc %i3, 64, %i3
4185 bz,pn %ncc, 0f
4186 add %i0, 64, %i0
4187 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4188 FALIGN_D24
4189 ldda [%l7]ASI_BLK_AIUS, %d16
4190 stda %d48, [%i0]ASI_BLK_P
4191 add %l7, 64, %l7
4192 subcc %i3, 64, %i3
4193 bz,pn %ncc, 1f
4194 add %i0, 64, %i0
4195 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4196 FALIGN_D40
4197 ldda [%l7]ASI_BLK_AIUS, %d32
4198 stda %d48, [%i0]ASI_BLK_P
4199 add %l7, 64, %l7
4200 subcc %i3, 64, %i3
4201 bz,pn %ncc, 2f
4202 add %i0, 64, %i0
4203 ba,a,pt %ncc, copyin_seg4
4204
4205 0:
4206 FALIGN_D24
4207 stda %d48, [%i0]ASI_BLK_P
4208 add %i0, 64, %i0
4209 membar #Sync
4210 FALIGN_D40
4211 stda %d48, [%i0]ASI_BLK_P
4212 ba,pt %ncc, copyin_blkd8
4213 add %i0, 64, %i0
4214
4215 1:
4216 FALIGN_D40
4217 stda %d48, [%i0]ASI_BLK_P
4218 add %i0, 64, %i0
4219 membar #Sync
4220 FALIGN_D8
4221 stda %d48, [%i0]ASI_BLK_P
4222 ba,pt %ncc, copyin_blkd24
4223 add %i0, 64, %i0
4224
4225 2:
4226 FALIGN_D8
4227 stda %d48, [%i0]ASI_BLK_P
4228 add %i0, 64, %i0
4229 membar #Sync
4230 FALIGN_D24
4231 stda %d48, [%i0]ASI_BLK_P
4232 ba,pt %ncc, copyin_blkd40
4233 add %i0, 64, %i0
4234
4235 copyin_seg5:
4236 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4237 FALIGN_D10
4238 ldda [%l7]ASI_BLK_AIUS, %d0
4239 stda %d48, [%i0]ASI_BLK_P
4240 add %l7, 64, %l7
4241 subcc %i3, 64, %i3
4242 bz,pn %ncc, 0f
4243 add %i0, 64, %i0
4244 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4245 FALIGN_D26
4246 ldda [%l7]ASI_BLK_AIUS, %d16
4247 stda %d48, [%i0]ASI_BLK_P
4248 add %l7, 64, %l7
4249 subcc %i3, 64, %i3
4250 bz,pn %ncc, 1f
4251 add %i0, 64, %i0
4252 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4253 FALIGN_D42
4254 ldda [%l7]ASI_BLK_AIUS, %d32
4255 stda %d48, [%i0]ASI_BLK_P
4256 add %l7, 64, %l7
4257 subcc %i3, 64, %i3
4258 bz,pn %ncc, 2f
4259 add %i0, 64, %i0
4260 ba,a,pt %ncc, copyin_seg5
4261
4262 0:
4263 FALIGN_D26
4264 stda %d48, [%i0]ASI_BLK_P
4265 add %i0, 64, %i0
4266 membar #Sync
4267 FALIGN_D42
4268 stda %d48, [%i0]ASI_BLK_P
4269 ba,pt %ncc, copyin_blkd10
4270 add %i0, 64, %i0
4271
4272 1:
4273 FALIGN_D42
4274 stda %d48, [%i0]ASI_BLK_P
4275 add %i0, 64, %i0
4276 membar #Sync
4277 FALIGN_D10
4278 stda %d48, [%i0]ASI_BLK_P
4279 ba,pt %ncc, copyin_blkd26
4280 add %i0, 64, %i0
4281
4282 2:
4283 FALIGN_D10
4284 stda %d48, [%i0]ASI_BLK_P
4285 add %i0, 64, %i0
4286 membar #Sync
4287 FALIGN_D26
4288 stda %d48, [%i0]ASI_BLK_P
4289 ba,pt %ncc, copyin_blkd42
4290 add %i0, 64, %i0
4291
4292 copyin_seg6:
4293 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4294 FALIGN_D12
4295 ldda [%l7]ASI_BLK_AIUS, %d0
4296 stda %d48, [%i0]ASI_BLK_P
4297 add %l7, 64, %l7
4298 subcc %i3, 64, %i3
4299 bz,pn %ncc, 0f
4300 add %i0, 64, %i0
4301 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4302 FALIGN_D28
4303 ldda [%l7]ASI_BLK_AIUS, %d16
4304 stda %d48, [%i0]ASI_BLK_P
4305 add %l7, 64, %l7
4306 subcc %i3, 64, %i3
4307 bz,pn %ncc, 1f
4308 add %i0, 64, %i0
4309 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4310 FALIGN_D44
4311 ldda [%l7]ASI_BLK_AIUS, %d32
4312 stda %d48, [%i0]ASI_BLK_P
4313 add %l7, 64, %l7
4314 subcc %i3, 64, %i3
4315 bz,pn %ncc, 2f
4316 add %i0, 64, %i0
4317 ba,a,pt %ncc, copyin_seg6
4318
4319 0:
4320 FALIGN_D28
4321 stda %d48, [%i0]ASI_BLK_P
4322 add %i0, 64, %i0
4323 membar #Sync
4324 FALIGN_D44
4325 stda %d48, [%i0]ASI_BLK_P
4326 ba,pt %ncc, copyin_blkd12
4327 add %i0, 64, %i0
4328
4329 1:
4330 FALIGN_D44
4331 stda %d48, [%i0]ASI_BLK_P
4332 add %i0, 64, %i0
4333 membar #Sync
4334 FALIGN_D12
4335 stda %d48, [%i0]ASI_BLK_P
4336 ba,pt %ncc, copyin_blkd28
4337 add %i0, 64, %i0
4338
4339 2:
4340 FALIGN_D12
4341 stda %d48, [%i0]ASI_BLK_P
4342 add %i0, 64, %i0
4343 membar #Sync
4344 FALIGN_D28
4345 stda %d48, [%i0]ASI_BLK_P
4346 ba,pt %ncc, copyin_blkd44
4347 add %i0, 64, %i0
4348
4349 copyin_seg7:
4350 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4351 FALIGN_D14
4352 ldda [%l7]ASI_BLK_AIUS, %d0
4353 stda %d48, [%i0]ASI_BLK_P
4354 add %l7, 64, %l7
4355 subcc %i3, 64, %i3
4356 bz,pn %ncc, 0f
4357 add %i0, 64, %i0
4358 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4359 FALIGN_D30
4360 ldda [%l7]ASI_BLK_AIUS, %d16
4361 stda %d48, [%i0]ASI_BLK_P
4362 add %l7, 64, %l7
4363 subcc %i3, 64, %i3
4364 bz,pn %ncc, 1f
4365 add %i0, 64, %i0
4366 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4367 FALIGN_D46
4368 ldda [%l7]ASI_BLK_AIUS, %d32
4369 stda %d48, [%i0]ASI_BLK_P
4370 add %l7, 64, %l7
4371 subcc %i3, 64, %i3
4372 bz,pn %ncc, 2f
4373 add %i0, 64, %i0
4374 ba,a,pt %ncc, copyin_seg7
4375
4376 0:
4377 FALIGN_D30
4378 stda %d48, [%i0]ASI_BLK_P
4379 add %i0, 64, %i0
4380 membar #Sync
4381 FALIGN_D46
4382 stda %d48, [%i0]ASI_BLK_P
4383 ba,pt %ncc, copyin_blkd14
4384 add %i0, 64, %i0
4385
4386 1:
4387 FALIGN_D46
4388 stda %d48, [%i0]ASI_BLK_P
4389 add %i0, 64, %i0
4390 membar #Sync
4391 FALIGN_D14
4392 stda %d48, [%i0]ASI_BLK_P
4393 ba,pt %ncc, copyin_blkd30
4394 add %i0, 64, %i0
4395
4396 2:
4397 FALIGN_D14
4398 stda %d48, [%i0]ASI_BLK_P
4399 add %i0, 64, %i0
4400 membar #Sync
4401 FALIGN_D30
4402 stda %d48, [%i0]ASI_BLK_P
4403 ba,pt %ncc, copyin_blkd46
4404 add %i0, 64, %i0
4405
4406
4407 !
4408 ! dribble out the last partial block
4409 !
4410 copyin_blkd0:
4411 subcc %i4, 8, %i4
4412 blu,pn %ncc, copyin_blkdone
4413 faligndata %d0, %d2, %d48
4414 std %d48, [%i0]
4415 add %i0, 8, %i0
4416 copyin_blkd2:
4417 subcc %i4, 8, %i4
4418 blu,pn %ncc, copyin_blkdone
4419 faligndata %d2, %d4, %d48
4420 std %d48, [%i0]
4421 add %i0, 8, %i0
4422 copyin_blkd4:
4423 subcc %i4, 8, %i4
4424 blu,pn %ncc, copyin_blkdone
4425 faligndata %d4, %d6, %d48
4426 std %d48, [%i0]
4427 add %i0, 8, %i0
4428 copyin_blkd6:
4429 subcc %i4, 8, %i4
4430 blu,pn %ncc, copyin_blkdone
4431 faligndata %d6, %d8, %d48
4432 std %d48, [%i0]
4433 add %i0, 8, %i0
4434 copyin_blkd8:
4435 subcc %i4, 8, %i4
4436 blu,pn %ncc, copyin_blkdone
4437 faligndata %d8, %d10, %d48
4438 std %d48, [%i0]
4439 add %i0, 8, %i0
4440 copyin_blkd10:
4441 subcc %i4, 8, %i4
4442 blu,pn %ncc, copyin_blkdone
4443 faligndata %d10, %d12, %d48
4444 std %d48, [%i0]
4445 add %i0, 8, %i0
4446 copyin_blkd12:
4447 subcc %i4, 8, %i4
4448 blu,pn %ncc, copyin_blkdone
4449 faligndata %d12, %d14, %d48
4450 std %d48, [%i0]
4451 add %i0, 8, %i0
4452 copyin_blkd14:
4453 subcc %i4, 8, %i4
4454 blu,pn %ncc, copyin_blkdone
4455 fsrc1 %d14, %d0
4456 ba,a,pt %ncc, copyin_blkleft
4457
4458 copyin_blkd16:
4459 subcc %i4, 8, %i4
4460 blu,pn %ncc, copyin_blkdone
4461 faligndata %d16, %d18, %d48
4462 std %d48, [%i0]
4463 add %i0, 8, %i0
4464 copyin_blkd18:
4465 subcc %i4, 8, %i4
4466 blu,pn %ncc, copyin_blkdone
4467 faligndata %d18, %d20, %d48
4468 std %d48, [%i0]
4469 add %i0, 8, %i0
4470 copyin_blkd20:
4471 subcc %i4, 8, %i4
4472 blu,pn %ncc, copyin_blkdone
4473 faligndata %d20, %d22, %d48
4474 std %d48, [%i0]
4475 add %i0, 8, %i0
4476 copyin_blkd22:
4477 subcc %i4, 8, %i4
4478 blu,pn %ncc, copyin_blkdone
4479 faligndata %d22, %d24, %d48
4480 std %d48, [%i0]
4481 add %i0, 8, %i0
4482 copyin_blkd24:
4483 subcc %i4, 8, %i4
4484 blu,pn %ncc, copyin_blkdone
4485 faligndata %d24, %d26, %d48
4486 std %d48, [%i0]
4487 add %i0, 8, %i0
4488 copyin_blkd26:
4489 subcc %i4, 8, %i4
4490 blu,pn %ncc, copyin_blkdone
4491 faligndata %d26, %d28, %d48
4492 std %d48, [%i0]
4493 add %i0, 8, %i0
4494 copyin_blkd28:
4495 subcc %i4, 8, %i4
4496 blu,pn %ncc, copyin_blkdone
4497 faligndata %d28, %d30, %d48
4498 std %d48, [%i0]
4499 add %i0, 8, %i0
4500 copyin_blkd30:
4501 subcc %i4, 8, %i4
4502 blu,pn %ncc, copyin_blkdone
4503 fsrc1 %d30, %d0
4504 ba,a,pt %ncc, copyin_blkleft
4505 copyin_blkd32:
4506 subcc %i4, 8, %i4
4507 blu,pn %ncc, copyin_blkdone
4508 faligndata %d32, %d34, %d48
4509 std %d48, [%i0]
4510 add %i0, 8, %i0
4511 copyin_blkd34:
4512 subcc %i4, 8, %i4
4513 blu,pn %ncc, copyin_blkdone
4514 faligndata %d34, %d36, %d48
4515 std %d48, [%i0]
4516 add %i0, 8, %i0
4517 copyin_blkd36:
4518 subcc %i4, 8, %i4
4519 blu,pn %ncc, copyin_blkdone
4520 faligndata %d36, %d38, %d48
4521 std %d48, [%i0]
4522 add %i0, 8, %i0
4523 copyin_blkd38:
4524 subcc %i4, 8, %i4
4525 blu,pn %ncc, copyin_blkdone
4526 faligndata %d38, %d40, %d48
4527 std %d48, [%i0]
4528 add %i0, 8, %i0
4529 copyin_blkd40:
4530 subcc %i4, 8, %i4
4531 blu,pn %ncc, copyin_blkdone
4532 faligndata %d40, %d42, %d48
4533 std %d48, [%i0]
4534 add %i0, 8, %i0
4535 copyin_blkd42:
4536 subcc %i4, 8, %i4
4537 blu,pn %ncc, copyin_blkdone
4538 faligndata %d42, %d44, %d48
4539 std %d48, [%i0]
4540 add %i0, 8, %i0
4541 copyin_blkd44:
4542 subcc %i4, 8, %i4
4543 blu,pn %ncc, copyin_blkdone
4544 faligndata %d44, %d46, %d48
4545 std %d48, [%i0]
4546 add %i0, 8, %i0
4547 copyin_blkd46:
4548 subcc %i4, 8, %i4
4549 blu,pn %ncc, copyin_blkdone
4550 fsrc1 %d46, %d0
4551
4552 copyin_blkleft:
4553 1:
4554 ldda [%l7]ASI_USER, %d2
4555 add %l7, 8, %l7
4556 subcc %i4, 8, %i4
4557 faligndata %d0, %d2, %d8
4558 std %d8, [%i0]
4559 blu,pn %ncc, copyin_blkdone
4560 add %i0, 8, %i0
4561 ldda [%l7]ASI_USER, %d0
4562 add %l7, 8, %l7
4563 subcc %i4, 8, %i4
4564 faligndata %d2, %d0, %d8
4565 std %d8, [%i0]
4566 bgeu,pt %ncc, 1b
4567 add %i0, 8, %i0
4568
4569 copyin_blkdone:
4570 tst %i2
4571 bz,pt %ncc, .copyin_exit
4572 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0
4573
4574 7: lduba [%i1]ASI_USER, %i4
4575 inc %i1
4576 inc %i0
4577 deccc %i2
4578 bgu %ncc, 7b
4579 stb %i4, [%i0 - 1]
4580
4581 .copyin_exit:
4582 membar #StoreLoad|#StoreStore
4583 btst FPUSED_FLAG, SAVED_LOFAULT
4584 bz %icc, 1f
4585 nop
4586
4587 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
4588 wr %o2, 0, %gsr
4589
4590 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
4591 btst FPRS_FEF, %o3
4592 bz %icc, 4f
4593 nop
4594
4595 ! restore fpregs from stack
4596 membar #Sync
4597 add %fp, STACK_BIAS - 257, %o2
4598 and %o2, -64, %o2
4599 ldda [%o2]ASI_BLK_P, %d0
4600 add %o2, 64, %o2
4601 ldda [%o2]ASI_BLK_P, %d16
4602 add %o2, 64, %o2
4603 ldda [%o2]ASI_BLK_P, %d32
4604 add %o2, 64, %o2
4605 ldda [%o2]ASI_BLK_P, %d48
4606 membar #Sync
4607
4608 ba,pt %ncc, 1f
4609 wr %o3, 0, %fprs ! restore fprs
4610
4611 4:
4612 FZERO ! zero all of the fpregs
4613 wr %o3, 0, %fprs ! restore fprs
4614
4615 1:
4616 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
4617 membar #Sync ! sync error barrier
4618 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
4619 ret
4620 restore %g0, 0, %o0
4621 .copyin_err:
4622 ldn [THREAD_REG + T_COPYOPS], %o4
4623 brz %o4, 2f
4624 nop
4625 ldn [%o4 + CP_COPYIN], %g2
4626 jmp %g2
4627 nop
4628 2:
4629 retl
4630 mov -1, %o0
4631 SET_SIZE(copyin)
4632
4633 #endif /* lint */
4634
4635 #ifdef lint
4636
4637 /*ARGSUSED*/
4638 int
4639 xcopyin(const void *uaddr, void *kaddr, size_t count)
4640 { return (0); }
4641
4642 #else /* lint */
4643
4644 ENTRY(xcopyin)
4645 sethi %hi(.xcopyin_err), REAL_LOFAULT
4646 b .do_copyin
4647 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
4648 .xcopyin_err:
4649 ldn [THREAD_REG + T_COPYOPS], %o4
4650 brz %o4, 2f
4651 nop
4652 ldn [%o4 + CP_XCOPYIN], %g2
4653 jmp %g2
4654 nop
4655 2:
4656 retl
4657 mov %g1, %o0
4658 SET_SIZE(xcopyin)
4659
4660 #endif /* lint */
4661
4662 #ifdef lint
4663
4664 /*ARGSUSED*/
4665 int
4666 xcopyin_little(const void *uaddr, void *kaddr, size_t count)
4667 { return (0); }
4668
4669 #else /* lint */
4670
4671 ENTRY(xcopyin_little)
4672 sethi %hi(.little_err), %o4
4673 ldn [THREAD_REG + T_LOFAULT], %o5
4674 or %o4, %lo(.little_err), %o4
4675 membar #Sync ! sync error barrier
4676 stn %o4, [THREAD_REG + T_LOFAULT]
4677
4678 subcc %g0, %o2, %o3
4679 add %o0, %o2, %o0
4680 bz,pn %ncc, 2f ! check for zero bytes
4681 sub %o2, 1, %o4
4682 add %o0, %o4, %o0 ! start w/last byte
4683 add %o1, %o2, %o1
4684 lduba [%o0+%o3]ASI_AIUSL, %o4
4685
4686 1: stb %o4, [%o1+%o3]
4687 inccc %o3
4688 sub %o0, 2, %o0 ! get next byte
4689 bcc,a,pt %ncc, 1b
4690 lduba [%o0+%o3]ASI_AIUSL, %o4
4691
4692 2: membar #Sync ! sync error barrier
4693 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
4694 retl
4695 mov %g0, %o0 ! return (0)
4696
4697 .little_err:
4698 membar #Sync ! sync error barrier
4699 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
4700 retl
4701 mov %g1, %o0
4702 SET_SIZE(xcopyin_little)
4703
4704 #endif /* lint */
4705
4706
4707 /*
4708 * Copy a block of storage - must not overlap (from + len <= to).
4709 * No fault handler installed (to be called under on_fault())
4710 */
4711 #if defined(lint)
4712
4713 /* ARGSUSED */
4714 void
4715 copyin_noerr(const void *ufrom, void *kto, size_t count)
4716 {}
4717
4718 #else /* lint */
4719
4720 ENTRY(copyin_noerr)
4721 sethi %hi(.copyio_noerr), REAL_LOFAULT
4722 b .do_copyin
4723 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
4724 .copyio_noerr:
4725 jmp SAVED_LOFAULT
4726 nop
4727 SET_SIZE(copyin_noerr)
4728
4729 #endif /* lint */
4730
4731 /*
4732 * Copy a block of storage - must not overlap (from + len <= to).
4733 * No fault handler installed (to be called under on_fault())
4734 */
4735
4736 #if defined(lint)
4737
4738 /* ARGSUSED */
4739 void
4740 copyout_noerr(const void *kfrom, void *uto, size_t count)
4741 {}
4742
4743 #else /* lint */
4744
4745 ENTRY(copyout_noerr)
4746 sethi %hi(.copyio_noerr), REAL_LOFAULT
4747 b .do_copyout
4748 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
4749 SET_SIZE(copyout_noerr)
4750
4751 #endif /* lint */
4752
4753 #if defined(lint)
4754
4755 int use_hw_bcopy = 1;
4756 int use_hw_copyio = 1;
4757 int use_hw_bzero = 1;
4758 uint_t hw_copy_limit_1 = 0;
4759 uint_t hw_copy_limit_2 = 0;
4760 uint_t hw_copy_limit_4 = 0;
4761 uint_t hw_copy_limit_8 = 0;
4762
4763 #else /* !lint */
4764
4765 .align 4
4766 DGDEF(use_hw_bcopy)
4767 .word 1
4768 DGDEF(use_hw_copyio)
4769 .word 1
4770 DGDEF(use_hw_bzero)
4771 .word 1
4772 DGDEF(hw_copy_limit_1)
4773 .word 0
4774 DGDEF(hw_copy_limit_2)
4775 .word 0
4776 DGDEF(hw_copy_limit_4)
4777 .word 0
4778 DGDEF(hw_copy_limit_8)
4779 .word 0
4780
4781 .align 64
4782 .section ".text"
4783 #endif /* !lint */
4784
4785
4786 /*
4787 * hwblkclr - clears block-aligned, block-multiple-sized regions that are
4788 * longer than 256 bytes in length using spitfire's block stores. If
4789 * the criteria for using this routine are not met then it calls bzero
4790 * and returns 1. Otherwise 0 is returned indicating success.
4791 * Caller is responsible for ensuring use_hw_bzero is true and that
4792 * kpreempt_disable() has been called.
4793 */
4794 #ifdef lint
4795 /*ARGSUSED*/
4796 int
4797 hwblkclr(void *addr, size_t len)
4798 {
4799 return(0);
4800 }
4801 #else /* lint */
4802 ! %i0 - start address
4803 ! %i1 - length of region (multiple of 64)
4804 ! %l0 - saved fprs
4805 ! %l1 - pointer to saved %d0 block
4806 ! %l2 - saved curthread->t_lwp
4807
4808 ENTRY(hwblkclr)
4809 ! get another window w/space for one aligned block of saved fpregs
4810 save %sp, -SA(MINFRAME + 2*64), %sp
4811
4812 ! Must be block-aligned
4813 andcc %i0, (64-1), %g0
4814 bnz,pn %ncc, 1f
4815 nop
4816
4817 ! ... and must be 256 bytes or more
4818 cmp %i1, 256
4819 blu,pn %ncc, 1f
4820 nop
4821
4822 ! ... and length must be a multiple of 64
4823 andcc %i1, (64-1), %g0
4824 bz,pn %ncc, 2f
4825 nop
4826
4827 1: ! punt, call bzero but notify the caller that bzero was used
4828 mov %i0, %o0
4829 call bzero
4830 mov %i1, %o1
4831 ret
4832 restore %g0, 1, %o0 ! return (1) - did not use block operations
4833
4834 2: rd %fprs, %l0 ! check for unused fp
4835 btst FPRS_FEF, %l0
4836 bz 1f
4837 nop
4838
4839 ! save in-use fpregs on stack
4840 membar #Sync
4841 add %fp, STACK_BIAS - 65, %l1
4842 and %l1, -64, %l1
4843 stda %d0, [%l1]ASI_BLK_P
4844
4845 1: membar #StoreStore|#StoreLoad|#LoadStore
4846 wr %g0, FPRS_FEF, %fprs
4847 wr %g0, ASI_BLK_P, %asi
4848
4849 ! Clear block
4850 fzero %d0
4851 fzero %d2
4852 fzero %d4
4853 fzero %d6
4854 fzero %d8
4855 fzero %d10
4856 fzero %d12
4857 fzero %d14
4858
4859 mov 256, %i3
4860 ba .pz_doblock
4861 nop
4862
4863 .pz_blkstart:
4864 ! stda %d0, [%i0+192]%asi ! in dly slot of branch that got us here
4865 stda %d0, [%i0+128]%asi
4866 stda %d0, [%i0+64]%asi
4867 stda %d0, [%i0]%asi
4868 .pz_zinst:
4869 add %i0, %i3, %i0
4870 sub %i1, %i3, %i1
4871 .pz_doblock:
4872 cmp %i1, 256
4873 bgeu,a %ncc, .pz_blkstart
4874 stda %d0, [%i0+192]%asi
4875
4876 cmp %i1, 64
4877 blu %ncc, .pz_finish
4878
4879 andn %i1, (64-1), %i3
4880 srl %i3, 4, %i2 ! using blocks, 1 instr / 16 words
4881 set .pz_zinst, %i4
4882 sub %i4, %i2, %i4
4883 jmp %i4
4884 nop
4885
4886 .pz_finish:
4887 membar #Sync
4888 btst FPRS_FEF, %l0
4889 bz,a .pz_finished
4890 wr %l0, 0, %fprs ! restore fprs
4891
4892 ! restore fpregs from stack
4893 ldda [%l1]ASI_BLK_P, %d0
4894 membar #Sync
4895 wr %l0, 0, %fprs ! restore fprs
4896
4897 .pz_finished:
4898 ret
4899 restore %g0, 0, %o0 ! return (bzero or not)
4900 SET_SIZE(hwblkclr)
4901 #endif /* lint */
4902
4903 #ifdef lint
4904 /* Copy 32 bytes of data from src to dst using physical addresses */
4905 /*ARGSUSED*/
4906 void
4907 hw_pa_bcopy32(uint64_t src, uint64_t dst)
4908 {}
4909 #else /*!lint */
4910
4911 /*
4912 * Copy 32 bytes of data from src (%o0) to dst (%o1)
4913 * using physical addresses.
4914 */
4915 ENTRY_NP(hw_pa_bcopy32)
4916 rdpr %pstate, %g1
4917 andn %g1, PSTATE_IE, %g2
4918 wrpr %g0, %g2, %pstate
4919
4920 ldxa [%o0]ASI_MEM, %o2
4921 add %o0, 8, %o0
4922 ldxa [%o0]ASI_MEM, %o3
4923 add %o0, 8, %o0
4924 ldxa [%o0]ASI_MEM, %o4
4925 add %o0, 8, %o0
4926 ldxa [%o0]ASI_MEM, %o5
4927 stxa %o2, [%o1]ASI_MEM
4928 add %o1, 8, %o1
4929 stxa %o3, [%o1]ASI_MEM
4930 add %o1, 8, %o1
4931 stxa %o4, [%o1]ASI_MEM
4932 add %o1, 8, %o1
4933 stxa %o5, [%o1]ASI_MEM
4934
4935 membar #Sync
4936 retl
4937 wrpr %g0, %g1, %pstate
4938 SET_SIZE(hw_pa_bcopy32)
4939 #endif /* lint */