1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 # ident "%Z%%M% %I% %E% SMI"
27
28 #include <sys/param.h>
29 #include <sys/errno.h>
30 #include <sys/asm_linkage.h>
31 #include <sys/vtrace.h>
32 #include <sys/machthread.h>
33 #include <sys/clock.h>
34 #include <sys/asi.h>
35 #include <sys/fsr.h>
36 #include <sys/privregs.h>
37
38 #if !defined(lint)
39 #include "assym.h"
40 #endif /* lint */
41
42
43 /*
44 * Less then or equal this number of bytes we will always copy byte-for-byte
45 */
46 #define SMALL_LIMIT 7
47
48 /*
49 * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault
50 * handler was set
51 */
52 #define LOFAULT_SET 2
53
54
55 /*
56 * Copy a block of storage, returning an error code if `from' or
57 * `to' takes a kernel pagefault which cannot be resolved.
58 * Returns errno value on pagefault error, 0 if all ok
59 */
60
61
62
63 #if defined(lint)
64
65 /* ARGSUSED */
66 int
67 kcopy(const void *from, void *to, size_t count)
68 { return(0); }
69
70 #else /* lint */
71
72 .seg ".text"
73 .align 4
74
75 ENTRY(kcopy)
76
77 save %sp, -SA(MINFRAME), %sp
78 set .copyerr, %l7 ! copyerr is lofault value
79 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler
80 or %o5, LOFAULT_SET, %o5
81 membar #Sync ! sync error barrier
82 b .do_copy ! common code
83 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault
84
85 /*
86 * We got here because of a fault during kcopy.
87 * Errno value is in %g1.
88 */
89 .copyerr:
90 ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET
91 ! into %o5 to indicate it has set t_lofault handler. Need to clear
92 ! LOFAULT_SET flag before restoring the error handler.
93 andn %o5, LOFAULT_SET, %o5
94 membar #Sync ! sync error barrier
95 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
96 ret
97 restore %g1, 0, %o0
98
99 SET_SIZE(kcopy)
100 #endif /* lint */
101
102
103 /*
104 * Copy a block of storage - must not overlap (from + len <= to).
105 */
106 #if defined(lint)
107
108 /* ARGSUSED */
109 void
110 bcopy(const void *from, void *to, size_t count)
111 {}
112
113 #else /* lint */
114
115 ENTRY(bcopy)
116
117 save %sp, -SA(MINFRAME), %sp
118 clr %o5 ! flag LOFAULT_SET is not set for bcopy
119
120 .do_copy:
121 mov %i1, %g5 ! save dest addr start
122
123 mov %i2, %l6 ! save size
124
125 cmp %i2, 12 ! for small counts
126 blu %ncc, .bytecp ! just copy bytes
127 .empty
128
129 !
130 ! use aligned transfers where possible
131 !
132 xor %i0, %i1, %o4 ! xor from and to address
133 btst 7, %o4 ! if lower three bits zero
134 bz .aldoubcp ! can align on double boundary
135 .empty ! assembler complaints about label
136
137 xor %i0, %i1, %o4 ! xor from and to address
138 btst 3, %o4 ! if lower two bits zero
139 bz .alwordcp ! can align on word boundary
140 btst 3, %i0 ! delay slot, from address unaligned?
141 !
142 ! use aligned reads and writes where possible
143 ! this differs from wordcp in that it copes
144 ! with odd alignment between source and destnation
145 ! using word reads and writes with the proper shifts
146 ! in between to align transfers to and from memory
147 ! i0 - src address, i1 - dest address, i2 - count
148 ! i3, i4 - tmps for used generating complete word
149 ! i5 (word to write)
150 ! l0 size in bits of upper part of source word (US)
151 ! l1 size in bits of lower part of source word (LS = 32 - US)
152 ! l2 size in bits of upper part of destination word (UD)
153 ! l3 size in bits of lower part of destination word (LD = 32 - UD)
154 ! l4 number of bytes leftover after aligned transfers complete
155 ! l5 the number 32
156 !
157 mov 32, %l5 ! load an oft-needed constant
158 bz .align_dst_only
159 btst 3, %i1 ! is destnation address aligned?
160 clr %i4 ! clear registers used in either case
161 bz .align_src_only
162 clr %l0
163 !
164 ! both source and destination addresses are unaligned
165 !
166 1: ! align source
167 ldub [%i0], %i3 ! read a byte from source address
168 add %i0, 1, %i0 ! increment source address
169 or %i4, %i3, %i4 ! or in with previous bytes (if any)
170 btst 3, %i0 ! is source aligned?
171 add %l0, 8, %l0 ! increment size of upper source (US)
172 bnz,a 1b
173 sll %i4, 8, %i4 ! make room for next byte
174
175 sub %l5, %l0, %l1 ! generate shift left count (LS)
176 sll %i4, %l1, %i4 ! prepare to get rest
177 ld [%i0], %i3 ! read a word
178 add %i0, 4, %i0 ! increment source address
179 srl %i3, %l0, %i5 ! upper src bits into lower dst bits
180 or %i4, %i5, %i5 ! merge
181 mov 24, %l3 ! align destination
182 1:
183 srl %i5, %l3, %i4 ! prepare to write a single byte
184 stb %i4, [%i1] ! write a byte
185 add %i1, 1, %i1 ! increment destination address
186 sub %i2, 1, %i2 ! decrement count
187 btst 3, %i1 ! is destination aligned?
188 bnz,a 1b
189 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD)
190 sub %l5, %l3, %l2 ! generate shift left count (UD)
191 sll %i5, %l2, %i5 ! move leftover into upper bytes
192 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left
193 bgu %ncc, .more_needed ! need more to fill than we have
194 nop
195
196 sll %i3, %l1, %i3 ! clear upper used byte(s)
197 srl %i3, %l1, %i3
198 ! get the odd bytes between alignments
199 sub %l0, %l2, %l0 ! regenerate shift count
200 sub %l5, %l0, %l1 ! generate new shift left count (LS)
201 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
202 andn %i2, 3, %i2 ! # of aligned bytes that can be moved
203 srl %i3, %l0, %i4
204 or %i5, %i4, %i5
205 st %i5, [%i1] ! write a word
206 subcc %i2, 4, %i2 ! decrement count
207 bz %ncc, .unalign_out
208 add %i1, 4, %i1 ! increment destination address
209
210 b 2f
211 sll %i3, %l1, %i5 ! get leftover into upper bits
212 .more_needed:
213 sll %i3, %l0, %i3 ! save remaining byte(s)
214 srl %i3, %l0, %i3
215 sub %l2, %l0, %l1 ! regenerate shift count
216 sub %l5, %l1, %l0 ! generate new shift left count
217 sll %i3, %l1, %i4 ! move to fill empty space
218 b 3f
219 or %i5, %i4, %i5 ! merge to complete word
220 !
221 ! the source address is aligned and destination is not
222 !
223 .align_dst_only:
224 ld [%i0], %i4 ! read a word
225 add %i0, 4, %i0 ! increment source address
226 mov 24, %l0 ! initial shift alignment count
227 1:
228 srl %i4, %l0, %i3 ! prepare to write a single byte
229 stb %i3, [%i1] ! write a byte
230 add %i1, 1, %i1 ! increment destination address
231 sub %i2, 1, %i2 ! decrement count
232 btst 3, %i1 ! is destination aligned?
233 bnz,a 1b
234 sub %l0, 8, %l0 ! delay slot, decrement shift count
235 .xfer:
236 sub %l5, %l0, %l1 ! generate shift left count
237 sll %i4, %l1, %i5 ! get leftover
238 3:
239 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
240 andn %i2, 3, %i2 ! # of aligned bytes that can be moved
241 2:
242 ld [%i0], %i3 ! read a source word
243 add %i0, 4, %i0 ! increment source address
244 srl %i3, %l0, %i4 ! upper src bits into lower dst bits
245 or %i5, %i4, %i5 ! merge with upper dest bits (leftover)
246 st %i5, [%i1] ! write a destination word
247 subcc %i2, 4, %i2 ! decrement count
248 bz %ncc, .unalign_out ! check if done
249 add %i1, 4, %i1 ! increment destination address
250 b 2b ! loop
251 sll %i3, %l1, %i5 ! get leftover
252 .unalign_out:
253 tst %l4 ! any bytes leftover?
254 bz %ncc, .cpdone
255 .empty ! allow next instruction in delay slot
256 1:
257 sub %l0, 8, %l0 ! decrement shift
258 srl %i3, %l0, %i4 ! upper src byte into lower dst byte
259 stb %i4, [%i1] ! write a byte
260 subcc %l4, 1, %l4 ! decrement count
261 bz %ncc, .cpdone ! done?
262 add %i1, 1, %i1 ! increment destination
263 tst %l0 ! any more previously read bytes
264 bnz %ncc, 1b ! we have leftover bytes
265 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants
266 b .dbytecp ! let dbytecp do the rest
267 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
268 !
269 ! the destination address is aligned and the source is not
270 !
271 .align_src_only:
272 ldub [%i0], %i3 ! read a byte from source address
273 add %i0, 1, %i0 ! increment source address
274 or %i4, %i3, %i4 ! or in with previous bytes (if any)
275 btst 3, %i0 ! is source aligned?
276 add %l0, 8, %l0 ! increment shift count (US)
277 bnz,a .align_src_only
278 sll %i4, 8, %i4 ! make room for next byte
279 b,a .xfer
280 !
281 ! if from address unaligned for double-word moves,
282 ! move bytes till it is, if count is < 56 it could take
283 ! longer to align the thing than to do the transfer
284 ! in word size chunks right away
285 !
286 .aldoubcp:
287 cmp %i2, 56 ! if count < 56, use wordcp, it takes
288 blu,a %ncc, .alwordcp ! longer to align doubles than words
289 mov 3, %o0 ! mask for word alignment
290 call .alignit ! copy bytes until aligned
291 mov 7, %o0 ! mask for double alignment
292 !
293 ! source and destination are now double-word aligned
294 ! i3 has aligned count returned by alignit
295 !
296 and %i2, 7, %i2 ! unaligned leftover count
297 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
298 5:
299 ldx [%i0+%i1], %o4 ! read from address
300 stx %o4, [%i1] ! write at destination address
301 subcc %i3, 8, %i3 ! dec count
302 bgu %ncc, 5b
303 add %i1, 8, %i1 ! delay slot, inc to address
304 cmp %i2, 4 ! see if we can copy a word
305 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp
306 .empty
307 !
308 ! for leftover bytes we fall into wordcp, if needed
309 !
310 .wordcp:
311 and %i2, 3, %i2 ! unaligned leftover count
312 5:
313 ld [%i0+%i1], %o4 ! read from address
314 st %o4, [%i1] ! write at destination address
315 subcc %i3, 4, %i3 ! dec count
316 bgu %ncc, 5b
317 add %i1, 4, %i1 ! delay slot, inc to address
318 b,a .dbytecp
319
320 ! we come here to align copies on word boundaries
321 .alwordcp:
322 call .alignit ! go word-align it
323 mov 3, %o0 ! bits that must be zero to be aligned
324 b .wordcp
325 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
326
327 !
328 ! byte copy, works with any alignment
329 !
330 .bytecp:
331 b .dbytecp
332 sub %i0, %i1, %i0 ! i0 gets difference of src and dst
333
334 !
335 ! differenced byte copy, works with any alignment
336 ! assumes dest in %i1 and (source - dest) in %i0
337 !
338 1:
339 stb %o4, [%i1] ! write to address
340 inc %i1 ! inc to address
341 .dbytecp:
342 deccc %i2 ! dec count
343 bgeu,a %ncc, 1b ! loop till done
344 ldub [%i0+%i1], %o4 ! read from address
345 .cpdone:
346 membar #Sync ! sync error barrier
347 ! Restore t_lofault handler, if came here from kcopy().
348 tst %o5
349 bz %ncc, 1f
350 andn %o5, LOFAULT_SET, %o5
351 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
352 1:
353 mov %g5, %o0 ! copy dest address
354 call sync_icache
355 mov %l6, %o1 ! saved size
356 ret
357 restore %g0, 0, %o0 ! return (0)
358
359 /*
360 * Common code used to align transfers on word and doubleword
361 * boudaries. Aligns source and destination and returns a count
362 * of aligned bytes to transfer in %i3
363 */
364 1:
365 inc %i0 ! inc from
366 stb %o4, [%i1] ! write a byte
367 inc %i1 ! inc to
368 dec %i2 ! dec count
369 .alignit:
370 btst %o0, %i0 ! %o0 is bit mask to check for alignment
371 bnz,a 1b
372 ldub [%i0], %o4 ! read next byte
373
374 retl
375 andn %i2, %o0, %i3 ! return size of aligned bytes
376 SET_SIZE(bcopy)
377
378 #endif /* lint */
379
380 /*
381 * Block copy with possibly overlapped operands.
382 */
383
384 #if defined(lint)
385
386 /*ARGSUSED*/
387 void
388 ovbcopy(const void *from, void *to, size_t count)
389 {}
390
391 #else /* lint */
392
393 ENTRY(ovbcopy)
394 tst %o2 ! check count
395 bgu,a %ncc, 1f ! nothing to do or bad arguments
396 subcc %o0, %o1, %o3 ! difference of from and to address
397
398 retl ! return
399 nop
400 1:
401 bneg,a %ncc, 2f
402 neg %o3 ! if < 0, make it positive
403 2: cmp %o2, %o3 ! cmp size and abs(from - to)
404 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy,
405 .empty ! no overlap
406 cmp %o0, %o1 ! compare from and to addresses
407 blu %ncc, .ov_bkwd ! if from < to, copy backwards
408 nop
409 !
410 ! Copy forwards.
411 !
412 .ov_fwd:
413 ldub [%o0], %o3 ! read from address
414 inc %o0 ! inc from address
415 stb %o3, [%o1] ! write to address
416 deccc %o2 ! dec count
417 bgu %ncc, .ov_fwd ! loop till done
418 inc %o1 ! inc to address
419
420 retl ! return
421 nop
422 !
423 ! Copy backwards.
424 !
425 .ov_bkwd:
426 deccc %o2 ! dec count
427 ldub [%o0 + %o2], %o3 ! get byte at end of src
428 bgu %ncc, .ov_bkwd ! loop till done
429 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst
430
431 retl ! return
432 nop
433 SET_SIZE(ovbcopy)
434
435 #endif /* lint */
436
437 /*
438 * hwblkpagecopy()
439 *
440 * Copies exactly one page. This routine assumes the caller (ppcopy)
441 * has already disabled kernel preemption and has checked
442 * use_hw_bcopy.
443 */
444 #ifdef lint
445 /*ARGSUSED*/
446 void
447 hwblkpagecopy(const void *src, void *dst)
448 { }
449 #else /* lint */
450 ENTRY(hwblkpagecopy)
451 save %sp, -SA(MINFRAME), %sp
452
453 ! %i0 - source address (arg)
454 ! %i1 - destination address (arg)
455 ! %i2 - length of region (not arg)
456
457 set PAGESIZE, %i2
458 mov %i1, %o0 ! store destination address for flushing
459
460 /*
461 * Copying exactly one page and PAGESIZE is in mutliple of 0x80.
462 */
463 1:
464 ldx [%i0+0x0], %l0
465 ldx [%i0+0x8], %l1
466 ldx [%i0+0x10], %l2
467 ldx [%i0+0x18], %l3
468 ldx [%i0+0x20], %l4
469 ldx [%i0+0x28], %l5
470 ldx [%i0+0x30], %l6
471 ldx [%i0+0x38], %l7
472 stx %l0, [%i1+0x0]
473 stx %l1, [%i1+0x8]
474 stx %l2, [%i1+0x10]
475 stx %l3, [%i1+0x18]
476 stx %l4, [%i1+0x20]
477 stx %l5, [%i1+0x28]
478 stx %l6, [%i1+0x30]
479 stx %l7, [%i1+0x38]
480
481 ldx [%i0+0x40], %l0
482 ldx [%i0+0x48], %l1
483 ldx [%i0+0x50], %l2
484 ldx [%i0+0x58], %l3
485 ldx [%i0+0x60], %l4
486 ldx [%i0+0x68], %l5
487 ldx [%i0+0x70], %l6
488 ldx [%i0+0x78], %l7
489 stx %l0, [%i1+0x40]
490 stx %l1, [%i1+0x48]
491 stx %l2, [%i1+0x50]
492 stx %l3, [%i1+0x58]
493 stx %l4, [%i1+0x60]
494 stx %l5, [%i1+0x68]
495 stx %l6, [%i1+0x70]
496 stx %l7, [%i1+0x78]
497
498 add %i0, 0x80, %i0
499 subcc %i2, 0x80, %i2
500 bgu,pt %xcc, 1b
501 add %i1, 0x80, %i1
502
503 ! %o0 contains the dest. address
504 set PAGESIZE, %o1
505 call sync_icache
506 nop
507
508 membar #Sync
509 ret
510 restore %g0, 0, %o0
511 SET_SIZE(hwblkpagecopy)
512 #endif /* lint */
513
514
515 /*
516 * Transfer data to and from user space -
517 * Note that these routines can cause faults
518 * It is assumed that the kernel has nothing at
519 * less than KERNELBASE in the virtual address space.
520 *
521 * Note that copyin(9F) and copyout(9F) are part of the
522 * DDI/DKI which specifies that they return '-1' on "errors."
523 *
524 * Sigh.
525 *
526 * So there's two extremely similar routines - xcopyin() and xcopyout()
527 * which return the errno that we've faithfully computed. This
528 * allows other callers (e.g. uiomove(9F)) to work correctly.
529 * Given that these are used pretty heavily, we expand the calling
530 * sequences inline for all flavours (rather than making wrappers).
531 *
532 * There are also stub routines for xcopyout_little and xcopyin_little,
533 * which currently are intended to handle requests of <= 16 bytes from
534 * do_unaligned. Future enhancement to make them handle 8k pages efficiently
535 * is left as an exercise...
536 */
537
538 /*
539 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
540 *
541 * General theory of operation:
542 *
543 * None of the copyops routines grab a window.
544 *
545 * Flow:
546 *
547 * If count == zero return zero.
548 *
549 * Store the previous lo_fault handler into %g6.
550 * Place our secondary lofault handler into %g5.
551 * Place the address of our fault handler into %o3.
552 *
553 * If count is less than or equal to SMALL_LIMIT (7) we
554 * always do a byte for byte copy.
555 *
556 * If count is > SMALL_LIMIT, we check the alignment of the input
557 * and output pointers. We store -count in %o3, we store the number
558 * of chunks (8, 4, 2 or 1 byte) operated on in our basic copy loop
559 * in %o2. Following this we branch to the appropriate copy loop and
560 * copy that many chunks. Since we've been adding the chunk size
561 * to %o3 each time through as well as decrementing %o2, we can tell
562 * if any data is is left to be copied by examining %o3. If that is
563 * zero, we're done and can go home. If not, we figure out what the
564 * largest chunk size left to be copied is and branch to that copy
565 * loop unless there's only one byte left. We load that as we're
566 * branching to code that stores it just before we return.
567 *
568 * Fault handlers are invoked if we reference memory that has no
569 * current mapping. All forms share the same copyio_fault handler.
570 * This routine handles fixing up the stack and general housecleaning.
571 * Each copy operation has a simple fault handler that is then called
572 * to do the work specific to the invidual operation. The handler
573 * for copyOP and xcopyOP are found at the end of individual function.
574 * The handlers for xcopyOP_little are found at the end of xcopyin_little.
575 * The handlers for copyOP_noerr are found at the end of copyin_noerr.
576 */
577
578 /*
579 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
580 */
581
582 #if defined(lint)
583
584 /*ARGSUSED*/
585 int
586 copyout(const void *kaddr, void *uaddr, size_t count)
587 { return (0); }
588
589 #else /* lint */
590
591 /*
592 * We save the arguments in the following registers in case of a fault:
593 * kaddr - %g2
594 * uaddr - %g3
595 * count - %g4
596 */
597 #define SAVE_SRC %g2
598 #define SAVE_DST %g3
599 #define SAVE_COUNT %g4
600
601 #define REAL_LOFAULT %g5
602 #define SAVED_LOFAULT %g6
603
604 /*
605 * Generic copyio fault handler. This is the first line of defense when a
606 * fault occurs in (x)copyin/(x)copyout. In order for this to function
607 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
608 * This allows us to share common code for all the flavors of the copy
609 * operations, including the _noerr versions.
610 *
611 * Note that this function will restore the original input parameters before
612 * calling REAL_LOFAULT. So the real handler can vector to the appropriate
613 * member of the t_copyop structure, if needed.
614 */
615 ENTRY(copyio_fault)
616 membar #Sync
617 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
618
619 mov SAVE_SRC, %o0
620 mov SAVE_DST, %o1
621 jmp REAL_LOFAULT
622 mov SAVE_COUNT, %o2
623 SET_SIZE(copyio_fault)
624
625 ENTRY(copyout)
626 sethi %hi(.copyout_err), REAL_LOFAULT
627 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
628
629 .do_copyout:
630 !
631 ! Check the length and bail if zero.
632 !
633 tst %o2
634 bnz,pt %ncc, 1f
635 nop
636 retl
637 clr %o0
638 1:
639 sethi %hi(copyio_fault), %o3
640 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
641 or %o3, %lo(copyio_fault), %o3
642 membar #Sync
643 stn %o3, [THREAD_REG + T_LOFAULT]
644
645 mov %o0, SAVE_SRC
646 mov %o1, SAVE_DST
647 mov %o2, SAVE_COUNT
648
649 !
650 ! Check to see if we're more than SMALL_LIMIT (7 bytes).
651 ! Run in leaf mode, using the %o regs as our input regs.
652 !
653 subcc %o2, SMALL_LIMIT, %o3
654 bgu,a,pt %ncc, .dco_ns
655 or %o0, %o1, %o3
656
657 .dcobcp:
658 sub %g0, %o2, %o3 ! negate count
659 add %o0, %o2, %o0 ! make %o0 point at the end
660 add %o1, %o2, %o1 ! make %o1 point at the end
661 ba,pt %ncc, .dcocl
662 ldub [%o0 + %o3], %o4 ! load first byte
663 !
664 ! %o0 and %o2 point at the end and remain pointing at the end
665 ! of their buffers. We pull things out by adding %o3 (which is
666 ! the negation of the length) to the buffer end which gives us
667 ! the curent location in the buffers. By incrementing %o3 we walk
668 ! through both buffers without having to bump each buffer's
669 ! pointer. A very fast 4 instruction loop.
670 !
671 .align 16
672 .dcocl:
673 stba %o4, [%o1 + %o3]ASI_USER
674 inccc %o3
675 bl,a,pt %ncc, .dcocl
676 ldub [%o0 + %o3], %o4
677 !
678 ! We're done. Go home.
679 !
680 membar #Sync
681 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
682 retl
683 clr %o0
684 !
685 ! Try aligned copies from here.
686 !
687 .dco_ns:
688 ! %o0 = kernel addr (to be copied from)
689 ! %o1 = user addr (to be copied to)
690 ! %o2 = length
691 ! %o3 = %o1 | %o2 (used for alignment checking)
692 ! %o4 is alternate lo_fault
693 ! %o5 is original lo_fault
694 !
695 ! See if we're single byte aligned. If we are, check the
696 ! limit for single byte copies. If we're smaller or equal,
697 ! bounce to the byte for byte copy loop. Otherwise do it in
698 ! HW (if enabled).
699 !
700 btst 1, %o3
701 bz,pt %icc, .dcoh8
702 btst 7, %o3
703
704 ba .dcobcp
705 nop
706 .dcoh8:
707 !
708 ! 8 byte aligned?
709 !
710 bnz,a %ncc, .dcoh4
711 btst 3, %o3
712 .dcos8:
713 !
714 ! Housekeeping for copy loops. Uses same idea as in the byte for
715 ! byte copy loop above.
716 !
717 add %o0, %o2, %o0
718 add %o1, %o2, %o1
719 sub %g0, %o2, %o3
720 ba,pt %ncc, .dodebc
721 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy
722 !
723 ! 4 byte aligned?
724 !
725 .dcoh4:
726 bnz,pn %ncc, .dcoh2
727 nop
728 .dcos4:
729 add %o0, %o2, %o0
730 add %o1, %o2, %o1
731 sub %g0, %o2, %o3
732 ba,pt %ncc, .dodfbc
733 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy
734 !
735 ! We must be 2 byte aligned. Off we go.
736 ! The check for small copies was done in the
737 ! delay at .dcoh4
738 !
739 .dcoh2:
740 .dcos2:
741 add %o0, %o2, %o0
742 add %o1, %o2, %o1
743 sub %g0, %o2, %o3
744 ba,pt %ncc, .dodtbc
745 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy
746
747 .dodebc:
748 ldx [%o0 + %o3], %o4
749 deccc %o2
750 stxa %o4, [%o1 + %o3]ASI_USER
751 bg,pt %ncc, .dodebc
752 addcc %o3, 8, %o3
753 !
754 ! End of copy loop. Check to see if we're done. Most
755 ! eight byte aligned copies end here.
756 !
757 bz,pt %ncc, .dcofh
758 nop
759 !
760 ! Something is left - do it byte for byte.
761 !
762 ba,pt %ncc, .dcocl
763 ldub [%o0 + %o3], %o4 ! load next byte
764 !
765 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
766 !
767 .align 32
768 .dodfbc:
769 lduw [%o0 + %o3], %o4
770 deccc %o2
771 sta %o4, [%o1 + %o3]ASI_USER
772 bg,pt %ncc, .dodfbc
773 addcc %o3, 4, %o3
774 !
775 ! End of copy loop. Check to see if we're done. Most
776 ! four byte aligned copies end here.
777 !
778 bz,pt %ncc, .dcofh
779 nop
780 !
781 ! Something is left. Do it byte for byte.
782 !
783 ba,pt %ncc, .dcocl
784 ldub [%o0 + %o3], %o4 ! load next byte
785 !
786 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
787 ! copy.
788 !
789 .align 32
790 .dodtbc:
791 lduh [%o0 + %o3], %o4
792 deccc %o2
793 stha %o4, [%o1 + %o3]ASI_USER
794 bg,pt %ncc, .dodtbc
795 addcc %o3, 2, %o3
796 !
797 ! End of copy loop. Anything left?
798 !
799 bz,pt %ncc, .dcofh
800 nop
801 !
802 ! Deal with the last byte
803 !
804 ldub [%o0 + %o3], %o4
805 stba %o4, [%o1 + %o3]ASI_USER
806 .dcofh:
807 membar #Sync
808 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
809 retl
810 clr %o0
811
812 .copyout_err:
813 ldn [THREAD_REG + T_COPYOPS], %o4
814 brz %o4, 2f
815 nop
816 ldn [%o4 + CP_COPYOUT], %g2
817 jmp %g2
818 nop
819 2:
820 retl
821 mov -1, %o0
822 SET_SIZE(copyout)
823
824 #endif /* lint */
825
826
827 #ifdef lint
828
829 /*ARGSUSED*/
830 int
831 xcopyout(const void *kaddr, void *uaddr, size_t count)
832 { return (0); }
833
834 #else /* lint */
835
836 ENTRY(xcopyout)
837 sethi %hi(.xcopyout_err), REAL_LOFAULT
838 b .do_copyout
839 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
840 .xcopyout_err:
841 ldn [THREAD_REG + T_COPYOPS], %o4
842 brz %o4, 2f
843 nop
844 ldn [%o4 + CP_XCOPYOUT], %g2
845 jmp %g2
846 nop
847 2:
848 retl
849 mov %g1, %o0
850 SET_SIZE(xcopyout)
851
852 #endif /* lint */
853
854 #ifdef lint
855
856 /*ARGSUSED*/
857 int
858 xcopyout_little(const void *kaddr, void *uaddr, size_t count)
859 { return (0); }
860
861 #else /* lint */
862
863 ENTRY(xcopyout_little)
864 sethi %hi(.little_err), %o4
865 ldn [THREAD_REG + T_LOFAULT], %o5
866 or %o4, %lo(.little_err), %o4
867 membar #Sync ! sync error barrier
868 stn %o4, [THREAD_REG + T_LOFAULT]
869
870 subcc %g0, %o2, %o3
871 add %o0, %o2, %o0
872 bz,pn %ncc, 2f ! check for zero bytes
873 sub %o2, 1, %o4
874 add %o0, %o4, %o0 ! start w/last byte
875 add %o1, %o2, %o1
876 ldub [%o0+%o3], %o4
877
878 1: stba %o4, [%o1+%o3]ASI_AIUSL
879 inccc %o3
880 sub %o0, 2, %o0 ! get next byte
881 bcc,a,pt %ncc, 1b
882 ldub [%o0+%o3], %o4
883
884 2: membar #Sync ! sync error barrier
885 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
886 retl
887 mov %g0, %o0 ! return (0)
888 SET_SIZE(xcopyout_little)
889
890 #endif /* lint */
891
892 /*
893 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
894 */
895
896 #if defined(lint)
897
898 /*ARGSUSED*/
899 int
900 copyin(const void *uaddr, void *kaddr, size_t count)
901 { return (0); }
902
903 #else /* lint */
904
905 ENTRY(copyin)
906 sethi %hi(.copyin_err), REAL_LOFAULT
907 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
908
909 .do_copyin:
910 !
911 ! Check the length and bail if zero.
912 !
913 tst %o2
914 bnz,pt %ncc, 1f
915 nop
916 retl
917 clr %o0
918 1:
919 sethi %hi(copyio_fault), %o3
920 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
921 or %o3, %lo(copyio_fault), %o3
922 membar #Sync
923 stn %o3, [THREAD_REG + T_LOFAULT]
924
925 mov %o0, SAVE_SRC
926 mov %o1, SAVE_DST
927 mov %o2, SAVE_COUNT
928
929 !
930 ! Check to see if we're more than SMALL_LIMIT.
931 !
932 subcc %o2, SMALL_LIMIT, %o3
933 bgu,a,pt %ncc, .dci_ns
934 or %o0, %o1, %o3
935
936 .dcibcp:
937 sub %g0, %o2, %o3 ! setup for copy loop
938 add %o0, %o2, %o0
939 add %o1, %o2, %o1
940 ba,pt %ncc, .dcicl
941 lduba [%o0 + %o3]ASI_USER, %o4
942 !
943 ! %o0 and %o1 point at the end and remain pointing at the end
944 ! of their buffers. We pull things out by adding %o3 (which is
945 ! the negation of the length) to the buffer end which gives us
946 ! the curent location in the buffers. By incrementing %o3 we walk
947 ! through both buffers without having to bump each buffer's
948 ! pointer. A very fast 4 instruction loop.
949 !
950 .align 16
951 .dcicl:
952 stb %o4, [%o1 + %o3]
953 inccc %o3
954 bl,a,pt %ncc, .dcicl
955 lduba [%o0 + %o3]ASI_USER, %o4
956 !
957 ! We're done. Go home.
958 !
959 membar #Sync
960 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
961 retl
962 clr %o0
963 !
964 ! Try aligned copies from here.
965 !
966 .dci_ns:
967 !
968 ! See if we're single byte aligned. If we are, check the
969 ! limit for single byte copies. If we're smaller, or equal,
970 ! bounce to the byte for byte copy loop. Otherwise do it in
971 ! HW (if enabled).
972 !
973 btst 1, %o3
974 bz,a,pt %icc, .dcih8
975 btst 7, %o3
976 ba .dcibcp
977 nop
978
979 .dcih8:
980 !
981 ! 8 byte aligned?
982 !
983 bnz,a %ncc, .dcih4
984 btst 3, %o3
985 .dcis8:
986 !
987 ! Housekeeping for copy loops. Uses same idea as in the byte for
988 ! byte copy loop above.
989 !
990 add %o0, %o2, %o0
991 add %o1, %o2, %o1
992 sub %g0, %o2, %o3
993 ba,pt %ncc, .didebc
994 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy
995 !
996 ! 4 byte aligned?
997 !
998 .dcih4:
999 bnz %ncc, .dcih2
1000 nop
1001 .dcis4:
1002 !
1003 ! Housekeeping for copy loops. Uses same idea as in the byte
1004 ! for byte copy loop above.
1005 !
1006 add %o0, %o2, %o0
1007 add %o1, %o2, %o1
1008 sub %g0, %o2, %o3
1009 ba,pt %ncc, .didfbc
1010 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy
1011 .dcih2:
1012 .dcis2:
1013 add %o0, %o2, %o0
1014 add %o1, %o2, %o1
1015 sub %g0, %o2, %o3
1016 ba,pt %ncc, .didtbc
1017 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy
1018
1019 .didebc:
1020 ldxa [%o0 + %o3]ASI_USER, %o4
1021 deccc %o2
1022 stx %o4, [%o1 + %o3]
1023 bg,pt %ncc, .didebc
1024 addcc %o3, 8, %o3
1025 !
1026 ! End of copy loop. Most 8 byte aligned copies end here.
1027 !
1028 bz,pt %ncc, .dcifh
1029 nop
1030 !
1031 ! Something is left. Do it byte for byte.
1032 !
1033 ba,pt %ncc, .dcicl
1034 lduba [%o0 + %o3]ASI_USER, %o4
1035 !
1036 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
1037 !
1038 .align 32
1039 .didfbc:
1040 lduwa [%o0 + %o3]ASI_USER, %o4
1041 deccc %o2
1042 st %o4, [%o1 + %o3]
1043 bg,pt %ncc, .didfbc
1044 addcc %o3, 4, %o3
1045 !
1046 ! End of copy loop. Most 4 byte aligned copies end here.
1047 !
1048 bz,pt %ncc, .dcifh
1049 nop
1050 !
1051 ! Something is left. Do it byte for byte.
1052 !
1053 ba,pt %ncc, .dcicl
1054 lduba [%o0 + %o3]ASI_USER, %o4
1055 !
1056 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
1057 ! copy.
1058 !
1059 .align 32
1060 .didtbc:
1061 lduha [%o0 + %o3]ASI_USER, %o4
1062 deccc %o2
1063 sth %o4, [%o1 + %o3]
1064 bg,pt %ncc, .didtbc
1065 addcc %o3, 2, %o3
1066 !
1067 ! End of copy loop. Most 2 byte aligned copies end here.
1068 !
1069 bz,pt %ncc, .dcifh
1070 nop
1071 !
1072 ! Deal with the last byte
1073 !
1074 lduba [%o0 + %o3]ASI_USER, %o4
1075 stb %o4, [%o1 + %o3]
1076 .dcifh:
1077 membar #Sync
1078 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1079 retl
1080 clr %o0
1081
1082 .copyin_err:
1083 ldn [THREAD_REG + T_COPYOPS], %o4
1084 brz %o4, 2f
1085 nop
1086 ldn [%o4 + CP_COPYIN], %g2
1087 jmp %g2
1088 nop
1089 2:
1090 retl
1091 mov -1, %o0
1092 SET_SIZE(copyin)
1093
1094 #endif /* lint */
1095
1096 #ifdef lint
1097
1098 /*ARGSUSED*/
1099 int
1100 xcopyin(const void *uaddr, void *kaddr, size_t count)
1101 { return (0); }
1102
1103 #else /* lint */
1104
1105 ENTRY(xcopyin)
1106 sethi %hi(.xcopyin_err), REAL_LOFAULT
1107 b .do_copyin
1108 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
1109 .xcopyin_err:
1110 ldn [THREAD_REG + T_COPYOPS], %o4
1111 brz %o4, 2f
1112 nop
1113 ldn [%o4 + CP_XCOPYIN], %g2
1114 jmp %g2
1115 nop
1116 2:
1117 retl
1118 mov %g1, %o0
1119 SET_SIZE(xcopyin)
1120
1121 #endif /* lint */
1122
1123 #ifdef lint
1124
1125 /*ARGSUSED*/
1126 int
1127 xcopyin_little(const void *uaddr, void *kaddr, size_t count)
1128 { return (0); }
1129
1130 #else /* lint */
1131
1132 ENTRY(xcopyin_little)
1133 sethi %hi(.little_err), %o4
1134 ldn [THREAD_REG + T_LOFAULT], %o5
1135 or %o4, %lo(.little_err), %o4
1136 membar #Sync ! sync error barrier
1137 stn %o4, [THREAD_REG + T_LOFAULT]
1138
1139 subcc %g0, %o2, %o3
1140 add %o0, %o2, %o0
1141 bz,pn %ncc, 2f ! check for zero bytes
1142 sub %o2, 1, %o4
1143 add %o0, %o4, %o0 ! start w/last byte
1144 add %o1, %o2, %o1
1145 lduba [%o0+%o3]ASI_AIUSL, %o4
1146
1147 1: stb %o4, [%o1+%o3]
1148 inccc %o3
1149 sub %o0, 2, %o0 ! get next byte
1150 bcc,a,pt %ncc, 1b
1151 lduba [%o0+%o3]ASI_AIUSL, %o4
1152
1153 2: membar #Sync ! sync error barrier
1154 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1155 retl
1156 mov %g0, %o0 ! return (0)
1157
1158 .little_err:
1159 membar #Sync ! sync error barrier
1160 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1161 retl
1162 mov %g1, %o0
1163 SET_SIZE(xcopyin_little)
1164
1165 #endif /* lint */
1166
1167
1168 /*
1169 * Copy a block of storage - must not overlap (from + len <= to).
1170 * No fault handler installed (to be called under on_fault())
1171 */
1172 #if defined(lint)
1173
1174 /* ARGSUSED */
1175 void
1176 copyin_noerr(const void *ufrom, void *kto, size_t count)
1177 {}
1178
1179 #else /* lint */
1180
1181 ENTRY(copyin_noerr)
1182 sethi %hi(.copyio_noerr), REAL_LOFAULT
1183 b .do_copyin
1184 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1185 .copyio_noerr:
1186 jmp SAVED_LOFAULT
1187 nop
1188 SET_SIZE(copyin_noerr)
1189
1190 #endif /* lint */
1191
1192 /*
1193 * Copy a block of storage - must not overlap (from + len <= to).
1194 * No fault handler installed (to be called under on_fault())
1195 */
1196
1197 #if defined(lint)
1198
1199 /* ARGSUSED */
1200 void
1201 copyout_noerr(const void *kfrom, void *uto, size_t count)
1202 {}
1203
1204 #else /* lint */
1205
1206 ENTRY(copyout_noerr)
1207 sethi %hi(.copyio_noerr), REAL_LOFAULT
1208 b .do_copyout
1209 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1210 SET_SIZE(copyout_noerr)
1211
1212 #endif /* lint */
1213
1214 #if defined(lint)
1215
1216 int use_hw_bcopy = 1;
1217 int use_hw_bzero = 1;
1218
1219 #else /* !lint */
1220
1221 .align 4
1222 DGDEF(use_hw_bcopy)
1223 .word 1
1224 DGDEF(use_hw_bzero)
1225 .word 1
1226
1227 .align 64
1228 .section ".text"
1229 #endif /* !lint */
1230
1231
1232 /*
1233 * hwblkclr - clears block-aligned, block-multiple-sized regions that are
1234 * longer than 256 bytes in length. For the generic module we will simply
1235 * call bzero and return 1 to ensure that the pages in cache should be
1236 * flushed to ensure integrity.
1237 * Caller is responsible for ensuring use_hw_bzero is true and that
1238 * kpreempt_disable() has been called.
1239 */
1240 #ifdef lint
1241 /*ARGSUSED*/
1242 int
1243 hwblkclr(void *addr, size_t len)
1244 {
1245 return(0);
1246 }
1247 #else /* lint */
1248 ! %i0 - start address
1249 ! %i1 - length of region (multiple of 64)
1250
1251 ENTRY(hwblkclr)
1252 save %sp, -SA(MINFRAME), %sp
1253
1254 ! Simply call bzero and notify the caller that bzero was used
1255 mov %i0, %o0
1256 call bzero
1257 mov %i1, %o1
1258 ret
1259 restore %g0, 1, %o0 ! return (1) - did not use block operations
1260
1261 SET_SIZE(hwblkclr)
1262 #endif /* lint */
1263
1264 #ifdef lint
1265 /* Copy 32 bytes of data from src to dst using physical addresses */
1266 /*ARGSUSED*/
1267 void
1268 hw_pa_bcopy32(uint64_t src, uint64_t dst)
1269 {}
1270 #else /*!lint */
1271
1272 /*
1273 * Copy 32 bytes of data from src (%o0) to dst (%o1)
1274 * using physical addresses.
1275 */
1276 ENTRY_NP(hw_pa_bcopy32)
1277 rdpr %pstate, %g1
1278 andn %g1, PSTATE_IE, %g2
1279 wrpr %g0, %g2, %pstate
1280
1281 ldxa [%o0]ASI_MEM, %o2
1282 add %o0, 8, %o0
1283 ldxa [%o0]ASI_MEM, %o3
1284 add %o0, 8, %o0
1285 ldxa [%o0]ASI_MEM, %o4
1286 add %o0, 8, %o0
1287 ldxa [%o0]ASI_MEM, %o5
1288 stxa %o2, [%o1]ASI_MEM
1289 add %o1, 8, %o1
1290 stxa %o3, [%o1]ASI_MEM
1291 add %o1, 8, %o1
1292 stxa %o4, [%o1]ASI_MEM
1293 add %o1, 8, %o1
1294 stxa %o5, [%o1]ASI_MEM
1295
1296 membar #Sync
1297 retl
1298 wrpr %g0, %g1, %pstate
1299 SET_SIZE(hw_pa_bcopy32)
1300 #endif /* lint */
1301
1302 /*
1303 * Zero a block of storage.
1304 *
1305 * uzero is used by the kernel to zero a block in user address space.
1306 */
1307
1308
1309 #if defined(lint)
1310
1311 /* ARGSUSED */
1312 int
1313 kzero(void *addr, size_t count)
1314 { return(0); }
1315
1316 /* ARGSUSED */
1317 void
1318 uzero(void *addr, size_t count)
1319 {}
1320
1321 #else /* lint */
1322
1323 ENTRY(uzero)
1324 !
1325 ! Set a new lo_fault handler only if we came in with one
1326 ! already specified.
1327 !
1328 wr %g0, ASI_USER, %asi
1329 ldn [THREAD_REG + T_LOFAULT], %o5
1330 tst %o5
1331 bz,pt %ncc, .do_zero
1332 sethi %hi(.zeroerr), %o2
1333 or %o2, %lo(.zeroerr), %o2
1334 membar #Sync
1335 ba,pt %ncc, .do_zero
1336 stn %o2, [THREAD_REG + T_LOFAULT]
1337
1338 ENTRY(kzero)
1339 !
1340 ! Always set a lo_fault handler
1341 !
1342 wr %g0, ASI_P, %asi
1343 ldn [THREAD_REG + T_LOFAULT], %o5
1344 sethi %hi(.zeroerr), %o2
1345 or %o5, LOFAULT_SET, %o5
1346 or %o2, %lo(.zeroerr), %o2
1347 membar #Sync
1348 ba,pt %ncc, .do_zero
1349 stn %o2, [THREAD_REG + T_LOFAULT]
1350
1351 /*
1352 * We got here because of a fault during kzero or if
1353 * uzero or bzero was called with t_lofault non-zero.
1354 * Otherwise we've already run screaming from the room.
1355 * Errno value is in %g1. Note that we're here iff
1356 * we did set t_lofault.
1357 */
1358 .zeroerr:
1359 !
1360 ! Undo asi register setting. Just set it to be the
1361 ! kernel default without checking.
1362 !
1363 wr %g0, ASI_P, %asi
1364
1365 !
1366 ! We did set t_lofault. It may well have been zero coming in.
1367 !
1368 1:
1369 tst %o5
1370 membar #Sync
1371 bne,pn %ncc, 3f
1372 andncc %o5, LOFAULT_SET, %o5
1373 2:
1374 !
1375 ! Old handler was zero. Just return the error.
1376 !
1377 retl ! return
1378 mov %g1, %o0 ! error code from %g1
1379 3:
1380 !
1381 ! We're here because %o5 was non-zero. It was non-zero
1382 ! because either LOFAULT_SET was present, a previous fault
1383 ! handler was present or both. In all cases we need to reset
1384 ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET
1385 ! before we either simply return the error or we invoke the
1386 ! previously specified handler.
1387 !
1388 be %ncc, 2b
1389 stn %o5, [THREAD_REG + T_LOFAULT]
1390 jmp %o5 ! goto real handler
1391 nop
1392 SET_SIZE(kzero)
1393 SET_SIZE(uzero)
1394
1395 #endif /* lint */
1396
1397 /*
1398 * Zero a block of storage.
1399 */
1400
1401 #if defined(lint)
1402
1403 /* ARGSUSED */
1404 void
1405 bzero(void *addr, size_t count)
1406 {}
1407
1408 #else /* lint */
1409
1410 ENTRY(bzero)
1411 wr %g0, ASI_P, %asi
1412
1413 ldn [THREAD_REG + T_LOFAULT], %o5 ! save old vector
1414 tst %o5
1415 bz,pt %ncc, .do_zero
1416 sethi %hi(.zeroerr), %o2
1417 or %o2, %lo(.zeroerr), %o2
1418 membar #Sync ! sync error barrier
1419 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector
1420
1421 .do_zero:
1422 cmp %o1, 7
1423 blu,pn %ncc, .byteclr
1424 nop
1425
1426 cmp %o1, 15
1427 blu,pn %ncc, .wdalign
1428 nop
1429
1430 andcc %o0, 7, %o3 ! is add aligned on a 8 byte bound
1431 bz,pt %ncc, .blkalign ! already double aligned
1432 sub %o3, 8, %o3 ! -(bytes till double aligned)
1433 add %o1, %o3, %o1 ! update o1 with new count
1434
1435 1:
1436 stba %g0, [%o0]%asi
1437 inccc %o3
1438 bl,pt %ncc, 1b
1439 inc %o0
1440
1441 ! Now address is double aligned
1442 .blkalign:
1443 cmp %o1, 0x80 ! check if there are 128 bytes to set
1444 blu,pn %ncc, .bzero_small
1445 mov %o1, %o3
1446
1447 andcc %o0, 0x3f, %o3 ! is block aligned?
1448 bz,pt %ncc, .bzero_blk
1449 sub %o3, 0x40, %o3 ! -(bytes till block aligned)
1450 add %o1, %o3, %o1 ! o1 is the remainder
1451
1452 ! Clear -(%o3) bytes till block aligned
1453 1:
1454 stxa %g0, [%o0]%asi
1455 addcc %o3, 8, %o3
1456 bl,pt %ncc, 1b
1457 add %o0, 8, %o0
1458
1459 .bzero_blk:
1460 and %o1, 0x3f, %o3 ! calc bytes left after blk clear
1461 andn %o1, 0x3f, %o4 ! calc size of blocks in bytes
1462
1463 cmp %o4, 0x100 ! 256 bytes or more
1464 blu,pn %ncc, 3f
1465 nop
1466
1467 2:
1468 stxa %g0, [%o0+0x0]%asi
1469 stxa %g0, [%o0+0x40]%asi
1470 stxa %g0, [%o0+0x80]%asi
1471 stxa %g0, [%o0+0xc0]%asi
1472
1473 stxa %g0, [%o0+0x8]%asi
1474 stxa %g0, [%o0+0x10]%asi
1475 stxa %g0, [%o0+0x18]%asi
1476 stxa %g0, [%o0+0x20]%asi
1477 stxa %g0, [%o0+0x28]%asi
1478 stxa %g0, [%o0+0x30]%asi
1479 stxa %g0, [%o0+0x38]%asi
1480
1481 stxa %g0, [%o0+0x48]%asi
1482 stxa %g0, [%o0+0x50]%asi
1483 stxa %g0, [%o0+0x58]%asi
1484 stxa %g0, [%o0+0x60]%asi
1485 stxa %g0, [%o0+0x68]%asi
1486 stxa %g0, [%o0+0x70]%asi
1487 stxa %g0, [%o0+0x78]%asi
1488
1489 stxa %g0, [%o0+0x88]%asi
1490 stxa %g0, [%o0+0x90]%asi
1491 stxa %g0, [%o0+0x98]%asi
1492 stxa %g0, [%o0+0xa0]%asi
1493 stxa %g0, [%o0+0xa8]%asi
1494 stxa %g0, [%o0+0xb0]%asi
1495 stxa %g0, [%o0+0xb8]%asi
1496
1497 stxa %g0, [%o0+0xc8]%asi
1498 stxa %g0, [%o0+0xd0]%asi
1499 stxa %g0, [%o0+0xd8]%asi
1500 stxa %g0, [%o0+0xe0]%asi
1501 stxa %g0, [%o0+0xe8]%asi
1502 stxa %g0, [%o0+0xf0]%asi
1503 stxa %g0, [%o0+0xf8]%asi
1504
1505 sub %o4, 0x100, %o4
1506 cmp %o4, 0x100
1507 bgu,pt %ncc, 2b
1508 add %o0, 0x100, %o0
1509
1510 3:
1511 ! ... check if 64 bytes to set
1512 cmp %o4, 0x40
1513 blu %ncc, .bzero_blk_done
1514 nop
1515
1516 4:
1517 stxa %g0, [%o0+0x0]%asi
1518 stxa %g0, [%o0+0x8]%asi
1519 stxa %g0, [%o0+0x10]%asi
1520 stxa %g0, [%o0+0x18]%asi
1521 stxa %g0, [%o0+0x20]%asi
1522 stxa %g0, [%o0+0x28]%asi
1523 stxa %g0, [%o0+0x30]%asi
1524 stxa %g0, [%o0+0x38]%asi
1525
1526 subcc %o4, 0x40, %o4
1527 bgu,pt %ncc, 3b
1528 add %o0, 0x40, %o0
1529
1530 .bzero_blk_done:
1531 membar #Sync
1532
1533 .bzero_small:
1534 ! Set the remaining doubles
1535 subcc %o3, 8, %o3 ! Can we store any doubles?
1536 blu,pn %ncc, .byteclr
1537 and %o1, 7, %o1 ! calc bytes left after doubles
1538
1539 .dbclr:
1540 stxa %g0, [%o0]%asi ! Clear the doubles
1541 subcc %o3, 8, %o3
1542 bgeu,pt %ncc, .dbclr
1543 add %o0, 8, %o0
1544
1545 ba .byteclr
1546 nop
1547
1548 .wdalign:
1549 andcc %o0, 3, %o3 ! is add aligned on a word boundary
1550 bz,pn %ncc, .wdclr
1551 andn %o1, 3, %o3 ! create word sized count in %o3
1552
1553 dec %o1 ! decrement count
1554 stba %g0, [%o0]%asi ! clear a byte
1555 ba .wdalign
1556 inc %o0 ! next byte
1557
1558 .wdclr:
1559 sta %g0, [%o0]%asi ! 4-byte clearing loop
1560 subcc %o3, 4, %o3
1561 bnz,pt %ncc, .wdclr
1562 inc 4, %o0
1563
1564 and %o1, 3, %o1 ! leftover count, if any
1565
1566 .byteclr:
1567 ! Set the leftover bytes
1568 brz %o1, .bzero_exit
1569 nop
1570
1571 7:
1572 deccc %o1 ! byte clearing loop
1573 stba %g0, [%o0]%asi
1574 bgu,pt %ncc, 7b
1575 inc %o0
1576
1577 .bzero_exit:
1578 !
1579 ! We're just concerned with whether t_lofault was set
1580 ! when we came in. We end up here from either kzero()
1581 ! or bzero(). kzero() *always* sets a lofault handler.
1582 ! It ors LOFAULT_SET into %o5 to indicate it has done
1583 ! this even if the value of %o5 is otherwise zero.
1584 ! bzero() sets a lofault handler *only* if one was
1585 ! previously set. Accordingly we need to examine
1586 ! %o5 and if it is non-zero be sure to clear LOFAULT_SET
1587 ! before resetting the error handler.
1588 !
1589 tst %o5
1590 bz %ncc, 1f
1591 andn %o5, LOFAULT_SET, %o5
1592 membar #Sync ! sync error barrier
1593 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1594 1:
1595 retl
1596 clr %o0 ! return (0)
1597
1598 SET_SIZE(bzero)
1599 #endif /* lint */