7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <sys/param.h>
30 #include <sys/errno.h>
31 #include <sys/asm_linkage.h>
32 #include <sys/vtrace.h>
33 #include <sys/machthread.h>
34 #include <sys/clock.h>
35 #include <sys/asi.h>
36 #include <sys/fsr.h>
37 #include <sys/privregs.h>
38 #include <sys/fpras_impl.h>
39
40 #if !defined(lint)
41 #include "assym.h"
42 #endif /* lint */
43
44 /*
45 * Pseudo-code to aid in understanding the control flow of the
46 * bcopy/copyin/copyout routines.
47 *
48 * On entry:
49 *
50 * ! Determine whether to use the FP register version
51 * ! or the leaf routine version depending on size
52 * ! of copy and flags. Set up error handling accordingly.
53 * ! The transition point depends on whether the src and
54 * ! dst addresses can be aligned to long word, word,
55 * ! half word, or byte boundaries.
56 * !
57 * ! WARNING: <Register usage convention>
58 * ! For FP version, %l6 holds previous error handling and
59 * ! a flag: TRAMP_FLAG (low bits)
60 * ! for leaf routine version, %o4 holds those values.
61 * ! So either %l6 or %o4 is reserved and not available for
62 * ! any other use.
604 * the additional overhead of this save and restore.
605 *
606 * A membar #Sync is needed before save to sync fp ops initiated before
607 * the call to the copy function (by whoever has fp in use); for example
608 * an earlier block load to the quadrant we are about to save may still be
609 * "in flight". A membar #Sync is required at the end of the save to
610 * sync our block store (the copy code is about to begin ldd's to the
611 * first quadrant). Note, however, that since Cheetah pipeline block load
612 * is blocking we can omit the initial membar before saving fp state (they're
613 * commented below in case of future porting to a chip that does not block
614 * on block load).
615 *
616 * Similarly: a membar #Sync before restore allows the block stores of
617 * the copy operation to complete before we fill the quadrants with their
618 * original data, and a membar #Sync after restore lets the block loads
619 * of the restore complete before we return to whoever has the fp regs
620 * in use. To avoid repeated membar #Sync we make it the responsibility
621 * of the copy code to membar #Sync immediately after copy is complete
622 * and before using the BLD_*_FROMSTACK macro.
623 */
624 #if !defined(lint)
625 #define BST_FPQ1Q3_TOSTACK(tmp1) \
626 /* membar #Sync */ ;\
627 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
628 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
629 stda %f0, [tmp1]ASI_BLK_P ;\
630 add tmp1, VIS_BLOCKSIZE, tmp1 ;\
631 stda %f32, [tmp1]ASI_BLK_P ;\
632 membar #Sync
633
634 #define BLD_FPQ1Q3_FROMSTACK(tmp1) \
635 /* membar #Sync - provided at copy completion */ ;\
636 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
637 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
638 ldda [tmp1]ASI_BLK_P, %f0 ;\
639 add tmp1, VIS_BLOCKSIZE, tmp1 ;\
640 ldda [tmp1]ASI_BLK_P, %f32 ;\
641 membar #Sync
642
643 #define BST_FPQ2Q4_TOSTACK(tmp1) \
644 /* membar #Sync */ ;\
645 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
646 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
647 stda %f16, [tmp1]ASI_BLK_P ;\
648 add tmp1, VIS_BLOCKSIZE, tmp1 ;\
649 stda %f48, [tmp1]ASI_BLK_P ;\
650 membar #Sync
651
652 #define BLD_FPQ2Q4_FROMSTACK(tmp1) \
653 /* membar #Sync - provided at copy completion */ ;\
654 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
655 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
656 ldda [tmp1]ASI_BLK_P, %f16 ;\
657 add tmp1, VIS_BLOCKSIZE, tmp1 ;\
658 ldda [tmp1]ASI_BLK_P, %f48 ;\
659 membar #Sync
660 #endif
661
662 /*
663 * FP_NOMIGRATE and FP_ALLOWMIGRATE. Prevent migration (or, stronger,
664 * prevent preemption if there is no t_lwp to save FP state to on context
665 * switch) before commencing a FP copy, and reallow it on completion or
666 * in error trampoline paths when we were using FP copy.
667 *
668 * Both macros may call other functions, so be aware that all outputs are
669 * forfeit after using these macros. For this reason we do not pass registers
670 * to use - we just use any outputs we want.
671 *
672 * For fpRAS we need to perform the fpRAS mechanism test on the same
673 * CPU as we use for the copy operation, both so that we validate the
674 * CPU we perform the copy on and so that we know which CPU failed
675 * if a failure is detected. Hence we need to be bound to "our" CPU.
676 * This could be achieved through disabling preemption (and we have do it that
677 * way for threads with no t_lwp) but for larger copies this may hold
678 * higher priority threads off of cpu for too long (eg, realtime). So we
679 * make use of the lightweight t_nomigrate mechanism where we can (ie, when
680 * we have a t_lwp).
720 ba label2/**/f ;\
721 nop ;\
722 label1: ;\
723 dec %o1 ;\
724 brnz,pn %o1, label2/**/f ;\
725 stb %o1, [THREAD_REG + T_PREEMPT] ;\
726 ldn [THREAD_REG + T_CPU], %o0 ;\
727 ldub [%o0 + CPU_KPRUNRUN], %o0 ;\
728 brz,pt %o0, label2/**/f ;\
729 nop ;\
730 call kpreempt ;\
731 rdpr %pil, %o0 ;\
732 label2:
733
734 /*
735 * Copy a block of storage, returning an error code if `from' or
736 * `to' takes a kernel pagefault which cannot be resolved.
737 * Returns errno value on pagefault error, 0 if all ok
738 */
739
740 #if defined(lint)
741
742 /* ARGSUSED */
743 int
744 kcopy(const void *from, void *to, size_t count)
745 { return(0); }
746
747 #else /* lint */
748
749 .seg ".text"
750 .align 4
751
752 ENTRY(kcopy)
753
754 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
755 bleu,pt %ncc, .kcopy_small ! go to larger cases
756 xor %o0, %o1, %o3 ! are src, dst alignable?
757 btst 7, %o3 !
758 bz,pt %ncc, .kcopy_8 ! check for longword alignment
759 nop
760 btst 1, %o3 !
761 bz,pt %ncc, .kcopy_2 ! check for half-word
762 nop
763 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
764 ld [%o3 + %lo(hw_copy_limit_1)], %o3
765 tst %o3
766 bz,pn %icc, .kcopy_small ! if zero, disable HW copy
767 cmp %o2, %o3 ! if length <= limit
768 bleu,pt %ncc, .kcopy_small ! go to small copy
896
897 /*
898 * We got here because of a fault during a small kcopy or bcopy.
899 * No floating point registers are used by the small copies.
900 * Errno value is in %g1.
901 */
902 .sm_copyerr:
903 1:
904 btst TRAMP_FLAG, %o4
905 membar #Sync
906 andn %o4, TRAMP_FLAG, %o4
907 bnz,pn %ncc, 3f
908 stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
909 retl
910 mov %g1, %o0
911 3:
912 jmp %o4 ! goto real handler
913 mov %g0, %o0 !
914
915 SET_SIZE(kcopy)
916 #endif /* lint */
917
918
919 /*
920 * Copy a block of storage - must not overlap (from + len <= to).
921 * Registers: l6 - saved t_lofault
922 * (for short copies, o4 - saved t_lofault)
923 *
924 * Copy a page of memory.
925 * Assumes double word alignment and a count >= 256.
926 */
927 #if defined(lint)
928
929 /* ARGSUSED */
930 void
931 bcopy(const void *from, void *to, size_t count)
932 {}
933
934 #else /* lint */
935
936 ENTRY(bcopy)
937
938 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
939 bleu,pt %ncc, .bcopy_small ! go to larger cases
940 xor %o0, %o1, %o3 ! are src, dst alignable?
941 btst 7, %o3 !
942 bz,pt %ncc, .bcopy_8 ! check for longword alignment
943 nop
944 btst 1, %o3 !
945 bz,pt %ncc, .bcopy_2 ! check for half-word
946 nop
947 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
948 ld [%o3 + %lo(hw_copy_limit_1)], %o3
949 tst %o3
950 bz,pn %icc, .bcopy_small ! if zero, disable HW copy
951 cmp %o2, %o3 ! if length <= limit
952 bleu,pt %ncc, .bcopy_small ! go to small copy
953 nop
954 ba,pt %ncc, .bcopy_more ! otherwise go to large copy
955 nop
1485 bz,pt %icc, 4f
1486 nop
1487
1488 BLD_FPQ1Q3_FROMSTACK(%o2)
1489
1490 ba,pt %ncc, 2f
1491 wr %o3, 0, %fprs ! restore fprs
1492 4:
1493 FZEROQ1Q3
1494 wr %o3, 0, %fprs ! restore fprs
1495 2:
1496 membar #Sync ! sync error barrier
1497 andn %l6, MASK_FLAGS, %l6
1498 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1499 FP_ALLOWMIGRATE(5, 6)
1500 ret
1501 restore %g0, 0, %o0
1502
1503 SET_SIZE(bcopy_more)
1504
1505 #endif /* lint */
1506
1507 /*
1508 * Block copy with possibly overlapped operands.
1509 */
1510
1511 #if defined(lint)
1512
1513 /*ARGSUSED*/
1514 void
1515 ovbcopy(const void *from, void *to, size_t count)
1516 {}
1517
1518 #else /* lint */
1519
1520 ENTRY(ovbcopy)
1521 tst %o2 ! check count
1522 bgu,a %ncc, 1f ! nothing to do or bad arguments
1523 subcc %o0, %o1, %o3 ! difference of from and to address
1524
1525 retl ! return
1526 nop
1527 1:
1528 bneg,a %ncc, 2f
1529 neg %o3 ! if < 0, make it positive
1530 2: cmp %o2, %o3 ! cmp size and abs(from - to)
1531 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy,
1532 .empty ! no overlap
1533 cmp %o0, %o1 ! compare from and to addresses
1534 blu %ncc, .ov_bkwd ! if from < to, copy backwards
1535 nop
1536 !
1537 ! Copy forwards.
1538 !
1539 .ov_fwd:
1543 deccc %o2 ! dec count
1544 bgu %ncc, .ov_fwd ! loop till done
1545 inc %o1 ! inc to address
1546
1547 retl ! return
1548 nop
1549 !
1550 ! Copy backwards.
1551 !
1552 .ov_bkwd:
1553 deccc %o2 ! dec count
1554 ldub [%o0 + %o2], %o3 ! get byte at end of src
1555 bgu %ncc, .ov_bkwd ! loop till done
1556 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst
1557
1558 retl ! return
1559 nop
1560
1561 SET_SIZE(ovbcopy)
1562
1563 #endif /* lint */
1564
1565
1566 /*
1567 * hwblkpagecopy()
1568 *
1569 * Copies exactly one page. This routine assumes the caller (ppcopy)
1570 * has already disabled kernel preemption and has checked
1571 * use_hw_bcopy. Preventing preemption also prevents cpu migration.
1572 */
1573 #ifdef lint
1574 /*ARGSUSED*/
1575 void
1576 hwblkpagecopy(const void *src, void *dst)
1577 { }
1578 #else /* lint */
1579 ENTRY(hwblkpagecopy)
1580 ! get another window w/space for three aligned blocks of saved fpregs
1581 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
1582
1583 ! %i0 - source address (arg)
1584 ! %i1 - destination address (arg)
1585 ! %i2 - length of region (not arg)
1586 ! %l0 - saved fprs
1587 ! %l1 - pointer to saved fpregs
1588
1589 rd %fprs, %l0 ! check for unused fp
1590 btst FPRS_FEF, %l0
1591 bz,a,pt %icc, 1f
1592 wr %g0, FPRS_FEF, %fprs
1593
1594 BST_FPQ1Q3_TOSTACK(%l1)
1595
1596 1: set PAGESIZE, CNT
1597 mov REALSRC, SRC
1598
1686
1687 FPRAS_INTERVAL(FPRAS_PGCOPY, 1, %l5, %o2, %o3, %o4, %o5, 8)
1688 FPRAS_REWRITE_TYPE1(1, %l5, %f32, %o2, 9)
1689 FPRAS_CHECK(FPRAS_PGCOPY, %l5, 9) ! lose outputs
1690
1691 btst FPRS_FEF, %l0
1692 bz,pt %icc, 2f
1693 nop
1694
1695 BLD_FPQ1Q3_FROMSTACK(%l3)
1696 ba 3f
1697 nop
1698
1699 2: FZEROQ1Q3
1700
1701 3: wr %l0, 0, %fprs ! restore fprs
1702 ret
1703 restore %g0, 0, %o0
1704
1705 SET_SIZE(hwblkpagecopy)
1706 #endif /* lint */
1707
1708
1709 /*
1710 * Transfer data to and from user space -
1711 * Note that these routines can cause faults
1712 * It is assumed that the kernel has nothing at
1713 * less than KERNELBASE in the virtual address space.
1714 *
1715 * Note that copyin(9F) and copyout(9F) are part of the
1716 * DDI/DKI which specifies that they return '-1' on "errors."
1717 *
1718 * Sigh.
1719 *
1720 * So there's two extremely similar routines - xcopyin() and xcopyout()
1721 * which return the errno that we've faithfully computed. This
1722 * allows other callers (e.g. uiomove(9F)) to work correctly.
1723 * Given that these are used pretty heavily, we expand the calling
1724 * sequences inline for all flavours (rather than making wrappers).
1725 *
1726 * There are also stub routines for xcopyout_little and xcopyin_little,
1745 *
1746 * None of the copyops routines grab a window until it's decided that
1747 * we need to do a HW block copy operation. This saves a window
1748 * spill/fill when we're called during socket ops. The typical IO
1749 * path won't cause spill/fill traps.
1750 *
1751 * This code uses a set of 4 limits for the maximum size that will
1752 * be copied given a particular input/output address alignment.
1753 * If the value for a particular limit is zero, the copy will be performed
1754 * by the plain copy loops rather than FPBLK.
1755 *
1756 * See the description of bcopy above for more details of the
1757 * data copying algorithm and the default limits.
1758 *
1759 */
1760
1761 /*
1762 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
1763 */
1764
1765 #if defined(lint)
1766
1767
1768 #else /* lint */
1769 /*
1770 * We save the arguments in the following registers in case of a fault:
1771 * kaddr - %l1
1772 * uaddr - %l2
1773 * count - %l3
1774 */
1775 #define SAVE_SRC %l1
1776 #define SAVE_DST %l2
1777 #define SAVE_COUNT %l3
1778
1779 #define SM_SAVE_SRC %g4
1780 #define SM_SAVE_DST %g5
1781 #define SM_SAVE_COUNT %o5
1782 #define ERRNO %l5
1783
1784
1785 #define REAL_LOFAULT %l4
1786 /*
1787 * Generic copyio fault handler. This is the first line of defense when a
1788 * fault occurs in (x)copyin/(x)copyout. In order for this to function
1815 wr %o3, 0, %fprs ! restore fprs
1816
1817 4:
1818 FZEROQ2Q4
1819 wr %o3, 0, %fprs ! restore fprs
1820
1821 1:
1822 andn %l6, FPUSED_FLAG, %l6
1823 membar #Sync
1824 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1825 FP_ALLOWMIGRATE(5, 6)
1826
1827 mov SAVE_SRC, %i0
1828 mov SAVE_DST, %i1
1829 jmp REAL_LOFAULT
1830 mov SAVE_COUNT, %i2
1831
1832 SET_SIZE(copyio_fault)
1833
1834
1835 #endif
1836
1837 #if defined(lint)
1838
1839 /*ARGSUSED*/
1840 int
1841 copyout(const void *kaddr, void *uaddr, size_t count)
1842 { return (0); }
1843
1844 #else /* lint */
1845
1846 ENTRY(copyout)
1847
1848 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
1849 bleu,pt %ncc, .copyout_small ! go to larger cases
1850 xor %o0, %o1, %o3 ! are src, dst alignable?
1851 btst 7, %o3 !
1852 bz,pt %ncc, .copyout_8 ! check for longword alignment
1853 nop
1854 btst 1, %o3 !
1855 bz,pt %ncc, .copyout_2 ! check for half-word
1856 nop
1857 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
1858 ld [%o3 + %lo(hw_copy_limit_1)], %o3
1859 tst %o3
1860 bz,pn %icc, .copyout_small ! if zero, disable HW copy
1861 cmp %o2, %o3 ! if length <= limit
1862 bleu,pt %ncc, .copyout_small ! go to small copy
1863 nop
1864 ba,pt %ncc, .copyout_more ! otherwise go to large copy
1865 nop
2450
2451 /*
2452 * We got here because of a fault during copyout.
2453 * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
2454 */
2455 .copyout_err:
2456 ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
2457 tst %o4
2458 bz,pt %ncc, 2f ! if not, return error
2459 nop
2460 ldn [%o4 + CP_COPYOUT], %g2 ! if handler, invoke it with
2461 jmp %g2 ! original arguments
2462 restore %g0, 0, %g0 ! dispose of copy window
2463 2:
2464 ret
2465 restore %g0, -1, %o0 ! return error value
2466
2467
2468 SET_SIZE(copyout_more)
2469
2470 #endif /* lint */
2471
2472
2473 #ifdef lint
2474
2475 /*ARGSUSED*/
2476 int
2477 xcopyout(const void *kaddr, void *uaddr, size_t count)
2478 { return (0); }
2479
2480 #else /* lint */
2481
2482 ENTRY(xcopyout)
2483 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
2484 bleu,pt %ncc, .xcopyout_small ! go to larger cases
2485 xor %o0, %o1, %o3 ! are src, dst alignable?
2486 btst 7, %o3 !
2487 bz,pt %ncc, .xcopyout_8 !
2488 nop
2489 btst 1, %o3 !
2490 bz,pt %ncc, .xcopyout_2 ! check for half-word
2491 nop
2492 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
2493 ld [%o3 + %lo(hw_copy_limit_1)], %o3
2494 tst %o3
2495 bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
2496 cmp %o2, %o3 ! if length <= limit
2497 bleu,pt %ncc, .xcopyout_small ! go to small copy
2498 nop
2499 ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
2500 nop
2501 .xcopyout_2:
2566 .sm_xcopyout_err:
2567
2568 membar #Sync
2569 stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
2570 mov SM_SAVE_SRC, %o0
2571 mov SM_SAVE_DST, %o1
2572 mov SM_SAVE_COUNT, %o2
2573 ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
2574 tst %o3
2575 bz,pt %ncc, 3f ! if not, return error
2576 nop
2577 ldn [%o3 + CP_XCOPYOUT], %o5 ! if handler, invoke it with
2578 jmp %o5 ! original arguments
2579 nop
2580 3:
2581 retl
2582 or %g1, 0, %o0 ! return errno value
2583
2584 SET_SIZE(xcopyout)
2585
2586 #endif /* lint */
2587
2588 #ifdef lint
2589
2590 /*ARGSUSED*/
2591 int
2592 xcopyout_little(const void *kaddr, void *uaddr, size_t count)
2593 { return (0); }
2594
2595 #else /* lint */
2596
2597 ENTRY(xcopyout_little)
2598 sethi %hi(.xcopyio_err), %o5
2599 or %o5, %lo(.xcopyio_err), %o5
2600 ldn [THREAD_REG + T_LOFAULT], %o4
2601 membar #Sync ! sync error barrier
2602 stn %o5, [THREAD_REG + T_LOFAULT]
2603 mov %o4, %o5
2604
2605 subcc %g0, %o2, %o3
2606 add %o0, %o2, %o0
2607 bz,pn %ncc, 2f ! check for zero bytes
2608 sub %o2, 1, %o4
2609 add %o0, %o4, %o0 ! start w/last byte
2610 add %o1, %o2, %o1
2611 ldub [%o0 + %o3], %o4
2612
2613 1: stba %o4, [%o1 + %o3]ASI_AIUSL
2614 inccc %o3
2615 sub %o0, 2, %o0 ! get next byte
2616 bcc,a,pt %ncc, 1b
2617 ldub [%o0 + %o3], %o4
2618
2619 2:
2620 membar #Sync ! sync error barrier
2621 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
2622 retl
2623 mov %g0, %o0 ! return (0)
2624
2625 SET_SIZE(xcopyout_little)
2626
2627 #endif /* lint */
2628
2629 /*
2630 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
2631 */
2632
2633 #if defined(lint)
2634
2635 /*ARGSUSED*/
2636 int
2637 copyin(const void *uaddr, void *kaddr, size_t count)
2638 { return (0); }
2639
2640 #else /* lint */
2641
2642 ENTRY(copyin)
2643 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
2644 bleu,pt %ncc, .copyin_small ! go to larger cases
2645 xor %o0, %o1, %o3 ! are src, dst alignable?
2646 btst 7, %o3 !
2647 bz,pt %ncc, .copyin_8 ! check for longword alignment
2648 nop
2649 btst 1, %o3 !
2650 bz,pt %ncc, .copyin_2 ! check for half-word
2651 nop
2652 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
2653 ld [%o3 + %lo(hw_copy_limit_1)], %o3
2654 tst %o3
2655 bz,pn %icc, .copyin_small ! if zero, disable HW copy
2656 cmp %o2, %o3 ! if length <= limit
2657 bleu,pt %ncc, .copyin_small ! go to small copy
2658 nop
2659 ba,pt %ncc, .copyin_more ! otherwise go to large copy
2660 nop
2661 .copyin_2:
3237 restore %g0, 0, %o0
3238 /*
3239 * We got here because of a fault during copyin
3240 * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
3241 */
3242 .copyin_err:
3243 ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
3244 tst %o4
3245 bz,pt %ncc, 2f ! if not, return error
3246 nop
3247 ldn [%o4 + CP_COPYIN], %g2 ! if handler, invoke it with
3248 jmp %g2 ! original arguments
3249 restore %g0, 0, %g0 ! dispose of copy window
3250 2:
3251 ret
3252 restore %g0, -1, %o0 ! return error value
3253
3254
3255 SET_SIZE(copyin_more)
3256
3257 #endif /* lint */
3258
3259 #ifdef lint
3260
3261 /*ARGSUSED*/
3262 int
3263 xcopyin(const void *uaddr, void *kaddr, size_t count)
3264 { return (0); }
3265
3266 #else /* lint */
3267
3268 ENTRY(xcopyin)
3269
3270 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
3271 bleu,pt %ncc, .xcopyin_small ! go to larger cases
3272 xor %o0, %o1, %o3 ! are src, dst alignable?
3273 btst 7, %o3 !
3274 bz,pt %ncc, .xcopyin_8 ! check for longword alignment
3275 nop
3276 btst 1, %o3 !
3277 bz,pt %ncc, .xcopyin_2 ! check for half-word
3278 nop
3279 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
3280 ld [%o3 + %lo(hw_copy_limit_1)], %o3
3281 tst %o3
3282 bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
3283 cmp %o2, %o3 ! if length <= limit
3284 bleu,pt %ncc, .xcopyin_small ! go to small copy
3285 nop
3286 ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
3287 nop
3353 .sm_xcopyin_err:
3354
3355 membar #Sync
3356 stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3357 mov SM_SAVE_SRC, %o0
3358 mov SM_SAVE_DST, %o1
3359 mov SM_SAVE_COUNT, %o2
3360 ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
3361 tst %o3
3362 bz,pt %ncc, 3f ! if not, return error
3363 nop
3364 ldn [%o3 + CP_XCOPYIN], %o5 ! if handler, invoke it with
3365 jmp %o5 ! original arguments
3366 nop
3367 3:
3368 retl
3369 or %g1, 0, %o0 ! return errno value
3370
3371 SET_SIZE(xcopyin)
3372
3373 #endif /* lint */
3374
3375 #ifdef lint
3376
3377 /*ARGSUSED*/
3378 int
3379 xcopyin_little(const void *uaddr, void *kaddr, size_t count)
3380 { return (0); }
3381
3382 #else /* lint */
3383
3384 ENTRY(xcopyin_little)
3385 sethi %hi(.xcopyio_err), %o5
3386 or %o5, %lo(.xcopyio_err), %o5
3387 ldn [THREAD_REG + T_LOFAULT], %o4
3388 membar #Sync ! sync error barrier
3389 stn %o5, [THREAD_REG + T_LOFAULT]
3390 mov %o4, %o5
3391
3392 subcc %g0, %o2, %o3
3393 add %o0, %o2, %o0
3394 bz,pn %ncc, 2f ! check for zero bytes
3395 sub %o2, 1, %o4
3396 add %o0, %o4, %o0 ! start w/last byte
3397 add %o1, %o2, %o1
3398 lduba [%o0 + %o3]ASI_AIUSL, %o4
3399
3400 1: stb %o4, [%o1 + %o3]
3401 inccc %o3
3402 sub %o0, 2, %o0 ! get next byte
3403 bcc,a,pt %ncc, 1b
3404 lduba [%o0 + %o3]ASI_AIUSL, %o4
3405
3406 2:
3407 membar #Sync ! sync error barrier
3408 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3409 retl
3410 mov %g0, %o0 ! return (0)
3411
3412 .xcopyio_err:
3413 membar #Sync ! sync error barrier
3414 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3415 retl
3416 mov %g1, %o0
3417
3418 SET_SIZE(xcopyin_little)
3419
3420 #endif /* lint */
3421
3422
3423 /*
3424 * Copy a block of storage - must not overlap (from + len <= to).
3425 * No fault handler installed (to be called under on_fault())
3426 */
3427 #if defined(lint)
3428
3429 /* ARGSUSED */
3430 void
3431 copyin_noerr(const void *ufrom, void *kto, size_t count)
3432 {}
3433
3434 #else /* lint */
3435 ENTRY(copyin_noerr)
3436
3437 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
3438 bleu,pt %ncc, .copyin_ne_small ! go to larger cases
3439 xor %o0, %o1, %o3 ! are src, dst alignable?
3440 btst 7, %o3 !
3441 bz,pt %ncc, .copyin_ne_8 ! check for longword alignment
3442 nop
3443 btst 1, %o3 !
3444 bz,pt %ncc, .copyin_ne_2 ! check for half-word
3445 nop
3446 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
3447 ld [%o3 + %lo(hw_copy_limit_1)], %o3
3448 tst %o3
3449 bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
3450 cmp %o2, %o3 ! if length <= limit
3451 bleu,pt %ncc, .copyin_ne_small ! go to small copy
3452 nop
3453 ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
3454 nop
3498 ba,pt %ncc, .sm_do_copyin
3499 stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault
3500
3501 .copyin_noerr_more:
3502 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3503 sethi %hi(.copyio_noerr), REAL_LOFAULT
3504 ba,pt %ncc, .do_copyin
3505 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
3506
3507 .copyio_noerr:
3508 jmp %l6
3509 restore %g0,0,%g0
3510
3511 .sm_copyio_noerr:
3512 membar #Sync
3513 stn %o4, [THREAD_REG + T_LOFAULT] ! restore t_lofault
3514 jmp %o4
3515 nop
3516
3517 SET_SIZE(copyin_noerr)
3518 #endif /* lint */
3519
3520 /*
3521 * Copy a block of storage - must not overlap (from + len <= to).
3522 * No fault handler installed (to be called under on_fault())
3523 */
3524
3525 #if defined(lint)
3526
3527 /* ARGSUSED */
3528 void
3529 copyout_noerr(const void *kfrom, void *uto, size_t count)
3530 {}
3531
3532 #else /* lint */
3533 ENTRY(copyout_noerr)
3534
3535 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
3536 bleu,pt %ncc, .copyout_ne_small ! go to larger cases
3537 xor %o0, %o1, %o3 ! are src, dst alignable?
3538 btst 7, %o3 !
3539 bz,pt %ncc, .copyout_ne_8 ! check for longword alignment
3540 nop
3541 btst 1, %o3 !
3542 bz,pt %ncc, .copyout_ne_2 ! check for half-word
3543 nop
3544 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
3545 ld [%o3 + %lo(hw_copy_limit_1)], %o3
3546 tst %o3
3547 bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
3548 cmp %o2, %o3 ! if length <= limit
3549 bleu,pt %ncc, .copyout_ne_small ! go to small copy
3550 nop
3551 ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
3552 nop
3586 nop
3587
3588 .copyout_ne_small:
3589 ldn [THREAD_REG + T_LOFAULT], %o4
3590 tst %o4
3591 bz,pn %ncc, .sm_do_copyout
3592 nop
3593 sethi %hi(.sm_copyio_noerr), %o5
3594 or %o5, %lo(.sm_copyio_noerr), %o5
3595 membar #Sync ! sync error barrier
3596 ba,pt %ncc, .sm_do_copyout
3597 stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault
3598
3599 .copyout_noerr_more:
3600 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3601 sethi %hi(.copyio_noerr), REAL_LOFAULT
3602 ba,pt %ncc, .do_copyout
3603 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
3604
3605 SET_SIZE(copyout_noerr)
3606 #endif /* lint */
3607
3608
3609 /*
3610 * hwblkclr - clears block-aligned, block-multiple-sized regions that are
3611 * longer than 256 bytes in length using spitfire's block stores. If
3612 * the criteria for using this routine are not met then it calls bzero
3613 * and returns 1. Otherwise 0 is returned indicating success.
3614 * Caller is responsible for ensuring use_hw_bzero is true and that
3615 * kpreempt_disable() has been called.
3616 */
3617 #ifdef lint
3618 /*ARGSUSED*/
3619 int
3620 hwblkclr(void *addr, size_t len)
3621 {
3622 return(0);
3623 }
3624 #else /* lint */
3625 ! %i0 - start address
3626 ! %i1 - length of region (multiple of 64)
3627 ! %l0 - saved fprs
3628 ! %l1 - pointer to saved %d0 block
3629 ! %l2 - saved curthread->t_lwp
3630
3631 ENTRY(hwblkclr)
3632 ! get another window w/space for one aligned block of saved fpregs
3633 save %sp, -SA(MINFRAME + 2*VIS_BLOCKSIZE), %sp
3634
3635 ! Must be block-aligned
3636 andcc %i0, (VIS_BLOCKSIZE-1), %g0
3637 bnz,pn %ncc, 1f
3638 nop
3639
3640 ! ... and must be 256 bytes or more
3641 cmp %i1, 256
3642 blu,pn %ncc, 1f
3643 nop
3644
3705 sub %i4, %i2, %i4
3706 jmp %i4
3707 nop
3708
3709 .pz_finish:
3710 membar #Sync
3711 btst FPRS_FEF, %l0
3712 bz,a .pz_finished
3713 wr %l0, 0, %fprs ! restore fprs
3714
3715 ! restore fpregs from stack
3716 ldda [%l1]ASI_BLK_P, %d0
3717 membar #Sync
3718 wr %l0, 0, %fprs ! restore fprs
3719
3720 .pz_finished:
3721 ret
3722 restore %g0, 0, %o0 ! return (bzero or not)
3723
3724 SET_SIZE(hwblkclr)
3725 #endif /* lint */
3726
3727 #ifdef lint
3728 /*ARGSUSED*/
3729 void
3730 hw_pa_bcopy32(uint64_t src, uint64_t dst)
3731 {}
3732 #else /*!lint */
3733 /*
3734 * Copy 32 bytes of data from src (%o0) to dst (%o1)
3735 * using physical addresses.
3736 */
3737 ENTRY_NP(hw_pa_bcopy32)
3738 rdpr %pstate, %g1
3739 andn %g1, PSTATE_IE, %g2
3740 wrpr %g0, %g2, %pstate
3741
3742 rdpr %pstate, %g0
3743 ldxa [%o0]ASI_MEM, %o2
3744 add %o0, 8, %o0
3745 ldxa [%o0]ASI_MEM, %o3
3746 add %o0, 8, %o0
3747 ldxa [%o0]ASI_MEM, %o4
3748 add %o0, 8, %o0
3749 ldxa [%o0]ASI_MEM, %o5
3750
3751 stxa %g0, [%o1]ASI_DC_INVAL
3752 membar #Sync
3753
3754 stxa %o2, [%o1]ASI_MEM
3755 add %o1, 8, %o1
3756 stxa %o3, [%o1]ASI_MEM
3757 add %o1, 8, %o1
3758 stxa %o4, [%o1]ASI_MEM
3759 add %o1, 8, %o1
3760 stxa %o5, [%o1]ASI_MEM
3761
3762 retl
3763 wrpr %g0, %g1, %pstate
3764
3765 SET_SIZE(hw_pa_bcopy32)
3766
3767 #endif /* lint */
3768
3769 #if defined(lint)
3770
3771 int use_hw_bcopy = 1;
3772 int use_hw_bzero = 1;
3773 uint_t hw_copy_limit_1 = 0;
3774 uint_t hw_copy_limit_2 = 0;
3775 uint_t hw_copy_limit_4 = 0;
3776 uint_t hw_copy_limit_8 = 0;
3777
3778 #else /* !lint */
3779
3780 DGDEF(use_hw_bcopy)
3781 .word 1
3782 DGDEF(use_hw_bzero)
3783 .word 1
3784 DGDEF(hw_copy_limit_1)
3785 .word 0
3786 DGDEF(hw_copy_limit_2)
3787 .word 0
3788 DGDEF(hw_copy_limit_4)
3789 .word 0
3790 DGDEF(hw_copy_limit_8)
3791 .word 0
3792
3793 .align 64
3794 .section ".text"
3795 #endif /* !lint */
|
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/param.h>
28 #include <sys/errno.h>
29 #include <sys/asm_linkage.h>
30 #include <sys/vtrace.h>
31 #include <sys/machthread.h>
32 #include <sys/clock.h>
33 #include <sys/asi.h>
34 #include <sys/fsr.h>
35 #include <sys/privregs.h>
36 #include <sys/fpras_impl.h>
37
38 #include "assym.h"
39
40 /*
41 * Pseudo-code to aid in understanding the control flow of the
42 * bcopy/copyin/copyout routines.
43 *
44 * On entry:
45 *
46 * ! Determine whether to use the FP register version
47 * ! or the leaf routine version depending on size
48 * ! of copy and flags. Set up error handling accordingly.
49 * ! The transition point depends on whether the src and
50 * ! dst addresses can be aligned to long word, word,
51 * ! half word, or byte boundaries.
52 * !
53 * ! WARNING: <Register usage convention>
54 * ! For FP version, %l6 holds previous error handling and
55 * ! a flag: TRAMP_FLAG (low bits)
56 * ! for leaf routine version, %o4 holds those values.
57 * ! So either %l6 or %o4 is reserved and not available for
58 * ! any other use.
600 * the additional overhead of this save and restore.
601 *
602 * A membar #Sync is needed before save to sync fp ops initiated before
603 * the call to the copy function (by whoever has fp in use); for example
604 * an earlier block load to the quadrant we are about to save may still be
605 * "in flight". A membar #Sync is required at the end of the save to
606 * sync our block store (the copy code is about to begin ldd's to the
607 * first quadrant). Note, however, that since Cheetah pipeline block load
608 * is blocking we can omit the initial membar before saving fp state (they're
609 * commented below in case of future porting to a chip that does not block
610 * on block load).
611 *
612 * Similarly: a membar #Sync before restore allows the block stores of
613 * the copy operation to complete before we fill the quadrants with their
614 * original data, and a membar #Sync after restore lets the block loads
615 * of the restore complete before we return to whoever has the fp regs
616 * in use. To avoid repeated membar #Sync we make it the responsibility
617 * of the copy code to membar #Sync immediately after copy is complete
618 * and before using the BLD_*_FROMSTACK macro.
619 */
620 #define BST_FPQ1Q3_TOSTACK(tmp1) \
621 /* membar #Sync */ ;\
622 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
623 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
624 stda %f0, [tmp1]ASI_BLK_P ;\
625 add tmp1, VIS_BLOCKSIZE, tmp1 ;\
626 stda %f32, [tmp1]ASI_BLK_P ;\
627 membar #Sync
628
629 #define BLD_FPQ1Q3_FROMSTACK(tmp1) \
630 /* membar #Sync - provided at copy completion */ ;\
631 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
632 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
633 ldda [tmp1]ASI_BLK_P, %f0 ;\
634 add tmp1, VIS_BLOCKSIZE, tmp1 ;\
635 ldda [tmp1]ASI_BLK_P, %f32 ;\
636 membar #Sync
637
638 #define BST_FPQ2Q4_TOSTACK(tmp1) \
639 /* membar #Sync */ ;\
640 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
641 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
642 stda %f16, [tmp1]ASI_BLK_P ;\
643 add tmp1, VIS_BLOCKSIZE, tmp1 ;\
644 stda %f48, [tmp1]ASI_BLK_P ;\
645 membar #Sync
646
647 #define BLD_FPQ2Q4_FROMSTACK(tmp1) \
648 /* membar #Sync - provided at copy completion */ ;\
649 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
650 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
651 ldda [tmp1]ASI_BLK_P, %f16 ;\
652 add tmp1, VIS_BLOCKSIZE, tmp1 ;\
653 ldda [tmp1]ASI_BLK_P, %f48 ;\
654 membar #Sync
655
656 /*
657 * FP_NOMIGRATE and FP_ALLOWMIGRATE. Prevent migration (or, stronger,
658 * prevent preemption if there is no t_lwp to save FP state to on context
659 * switch) before commencing a FP copy, and reallow it on completion or
660 * in error trampoline paths when we were using FP copy.
661 *
662 * Both macros may call other functions, so be aware that all outputs are
663 * forfeit after using these macros. For this reason we do not pass registers
664 * to use - we just use any outputs we want.
665 *
666 * For fpRAS we need to perform the fpRAS mechanism test on the same
667 * CPU as we use for the copy operation, both so that we validate the
668 * CPU we perform the copy on and so that we know which CPU failed
669 * if a failure is detected. Hence we need to be bound to "our" CPU.
670 * This could be achieved through disabling preemption (and we have do it that
671 * way for threads with no t_lwp) but for larger copies this may hold
672 * higher priority threads off of cpu for too long (eg, realtime). So we
673 * make use of the lightweight t_nomigrate mechanism where we can (ie, when
674 * we have a t_lwp).
714 ba label2/**/f ;\
715 nop ;\
716 label1: ;\
717 dec %o1 ;\
718 brnz,pn %o1, label2/**/f ;\
719 stb %o1, [THREAD_REG + T_PREEMPT] ;\
720 ldn [THREAD_REG + T_CPU], %o0 ;\
721 ldub [%o0 + CPU_KPRUNRUN], %o0 ;\
722 brz,pt %o0, label2/**/f ;\
723 nop ;\
724 call kpreempt ;\
725 rdpr %pil, %o0 ;\
726 label2:
727
728 /*
729 * Copy a block of storage, returning an error code if `from' or
730 * `to' takes a kernel pagefault which cannot be resolved.
731 * Returns errno value on pagefault error, 0 if all ok
732 */
733
734 .seg ".text"
735 .align 4
736
737 ENTRY(kcopy)
738
739 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
740 bleu,pt %ncc, .kcopy_small ! go to larger cases
741 xor %o0, %o1, %o3 ! are src, dst alignable?
742 btst 7, %o3 !
743 bz,pt %ncc, .kcopy_8 ! check for longword alignment
744 nop
745 btst 1, %o3 !
746 bz,pt %ncc, .kcopy_2 ! check for half-word
747 nop
748 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
749 ld [%o3 + %lo(hw_copy_limit_1)], %o3
750 tst %o3
751 bz,pn %icc, .kcopy_small ! if zero, disable HW copy
752 cmp %o2, %o3 ! if length <= limit
753 bleu,pt %ncc, .kcopy_small ! go to small copy
881
882 /*
883 * We got here because of a fault during a small kcopy or bcopy.
884 * No floating point registers are used by the small copies.
885 * Errno value is in %g1.
886 */
887 .sm_copyerr:
888 1:
889 btst TRAMP_FLAG, %o4
890 membar #Sync
891 andn %o4, TRAMP_FLAG, %o4
892 bnz,pn %ncc, 3f
893 stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
894 retl
895 mov %g1, %o0
896 3:
897 jmp %o4 ! goto real handler
898 mov %g0, %o0 !
899
900 SET_SIZE(kcopy)
901
902
903 /*
904 * Copy a block of storage - must not overlap (from + len <= to).
905 * Registers: l6 - saved t_lofault
906 * (for short copies, o4 - saved t_lofault)
907 *
908 * Copy a page of memory.
909 * Assumes double word alignment and a count >= 256.
910 */
911
912 ENTRY(bcopy)
913
914 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
915 bleu,pt %ncc, .bcopy_small ! go to larger cases
916 xor %o0, %o1, %o3 ! are src, dst alignable?
917 btst 7, %o3 !
918 bz,pt %ncc, .bcopy_8 ! check for longword alignment
919 nop
920 btst 1, %o3 !
921 bz,pt %ncc, .bcopy_2 ! check for half-word
922 nop
923 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
924 ld [%o3 + %lo(hw_copy_limit_1)], %o3
925 tst %o3
926 bz,pn %icc, .bcopy_small ! if zero, disable HW copy
927 cmp %o2, %o3 ! if length <= limit
928 bleu,pt %ncc, .bcopy_small ! go to small copy
929 nop
930 ba,pt %ncc, .bcopy_more ! otherwise go to large copy
931 nop
1461 bz,pt %icc, 4f
1462 nop
1463
1464 BLD_FPQ1Q3_FROMSTACK(%o2)
1465
1466 ba,pt %ncc, 2f
1467 wr %o3, 0, %fprs ! restore fprs
1468 4:
1469 FZEROQ1Q3
1470 wr %o3, 0, %fprs ! restore fprs
1471 2:
1472 membar #Sync ! sync error barrier
1473 andn %l6, MASK_FLAGS, %l6
1474 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1475 FP_ALLOWMIGRATE(5, 6)
1476 ret
1477 restore %g0, 0, %o0
1478
1479 SET_SIZE(bcopy_more)
1480
1481 /*
1482 * Block copy with possibly overlapped operands.
1483 */
1484
1485 ENTRY(ovbcopy)
1486 tst %o2 ! check count
1487 bgu,a %ncc, 1f ! nothing to do or bad arguments
1488 subcc %o0, %o1, %o3 ! difference of from and to address
1489
1490 retl ! return
1491 nop
1492 1:
1493 bneg,a %ncc, 2f
1494 neg %o3 ! if < 0, make it positive
1495 2: cmp %o2, %o3 ! cmp size and abs(from - to)
1496 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy,
1497 .empty ! no overlap
1498 cmp %o0, %o1 ! compare from and to addresses
1499 blu %ncc, .ov_bkwd ! if from < to, copy backwards
1500 nop
1501 !
1502 ! Copy forwards.
1503 !
1504 .ov_fwd:
1508 deccc %o2 ! dec count
1509 bgu %ncc, .ov_fwd ! loop till done
1510 inc %o1 ! inc to address
1511
1512 retl ! return
1513 nop
1514 !
1515 ! Copy backwards.
1516 !
1517 .ov_bkwd:
1518 deccc %o2 ! dec count
1519 ldub [%o0 + %o2], %o3 ! get byte at end of src
1520 bgu %ncc, .ov_bkwd ! loop till done
1521 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst
1522
1523 retl ! return
1524 nop
1525
1526 SET_SIZE(ovbcopy)
1527
1528
1529 /*
1530 * hwblkpagecopy()
1531 *
1532 * Copies exactly one page. This routine assumes the caller (ppcopy)
1533 * has already disabled kernel preemption and has checked
1534 * use_hw_bcopy. Preventing preemption also prevents cpu migration.
1535 */
1536 ENTRY(hwblkpagecopy)
1537 ! get another window w/space for three aligned blocks of saved fpregs
1538 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
1539
1540 ! %i0 - source address (arg)
1541 ! %i1 - destination address (arg)
1542 ! %i2 - length of region (not arg)
1543 ! %l0 - saved fprs
1544 ! %l1 - pointer to saved fpregs
1545
1546 rd %fprs, %l0 ! check for unused fp
1547 btst FPRS_FEF, %l0
1548 bz,a,pt %icc, 1f
1549 wr %g0, FPRS_FEF, %fprs
1550
1551 BST_FPQ1Q3_TOSTACK(%l1)
1552
1553 1: set PAGESIZE, CNT
1554 mov REALSRC, SRC
1555
1643
1644 FPRAS_INTERVAL(FPRAS_PGCOPY, 1, %l5, %o2, %o3, %o4, %o5, 8)
1645 FPRAS_REWRITE_TYPE1(1, %l5, %f32, %o2, 9)
1646 FPRAS_CHECK(FPRAS_PGCOPY, %l5, 9) ! lose outputs
1647
1648 btst FPRS_FEF, %l0
1649 bz,pt %icc, 2f
1650 nop
1651
1652 BLD_FPQ1Q3_FROMSTACK(%l3)
1653 ba 3f
1654 nop
1655
1656 2: FZEROQ1Q3
1657
1658 3: wr %l0, 0, %fprs ! restore fprs
1659 ret
1660 restore %g0, 0, %o0
1661
1662 SET_SIZE(hwblkpagecopy)
1663
1664
1665 /*
1666 * Transfer data to and from user space -
1667 * Note that these routines can cause faults
1668 * It is assumed that the kernel has nothing at
1669 * less than KERNELBASE in the virtual address space.
1670 *
1671 * Note that copyin(9F) and copyout(9F) are part of the
1672 * DDI/DKI which specifies that they return '-1' on "errors."
1673 *
1674 * Sigh.
1675 *
1676 * So there's two extremely similar routines - xcopyin() and xcopyout()
1677 * which return the errno that we've faithfully computed. This
1678 * allows other callers (e.g. uiomove(9F)) to work correctly.
1679 * Given that these are used pretty heavily, we expand the calling
1680 * sequences inline for all flavours (rather than making wrappers).
1681 *
1682 * There are also stub routines for xcopyout_little and xcopyin_little,
1701 *
1702 * None of the copyops routines grab a window until it's decided that
1703 * we need to do a HW block copy operation. This saves a window
1704 * spill/fill when we're called during socket ops. The typical IO
1705 * path won't cause spill/fill traps.
1706 *
1707 * This code uses a set of 4 limits for the maximum size that will
1708 * be copied given a particular input/output address alignment.
1709 * If the value for a particular limit is zero, the copy will be performed
1710 * by the plain copy loops rather than FPBLK.
1711 *
1712 * See the description of bcopy above for more details of the
1713 * data copying algorithm and the default limits.
1714 *
1715 */
1716
1717 /*
1718 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
1719 */
1720
1721 /*
1722 * We save the arguments in the following registers in case of a fault:
1723 * kaddr - %l1
1724 * uaddr - %l2
1725 * count - %l3
1726 */
1727 #define SAVE_SRC %l1
1728 #define SAVE_DST %l2
1729 #define SAVE_COUNT %l3
1730
1731 #define SM_SAVE_SRC %g4
1732 #define SM_SAVE_DST %g5
1733 #define SM_SAVE_COUNT %o5
1734 #define ERRNO %l5
1735
1736
1737 #define REAL_LOFAULT %l4
1738 /*
1739 * Generic copyio fault handler. This is the first line of defense when a
1740 * fault occurs in (x)copyin/(x)copyout. In order for this to function
1767 wr %o3, 0, %fprs ! restore fprs
1768
1769 4:
1770 FZEROQ2Q4
1771 wr %o3, 0, %fprs ! restore fprs
1772
1773 1:
1774 andn %l6, FPUSED_FLAG, %l6
1775 membar #Sync
1776 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1777 FP_ALLOWMIGRATE(5, 6)
1778
1779 mov SAVE_SRC, %i0
1780 mov SAVE_DST, %i1
1781 jmp REAL_LOFAULT
1782 mov SAVE_COUNT, %i2
1783
1784 SET_SIZE(copyio_fault)
1785
1786
1787 ENTRY(copyout)
1788
1789 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
1790 bleu,pt %ncc, .copyout_small ! go to larger cases
1791 xor %o0, %o1, %o3 ! are src, dst alignable?
1792 btst 7, %o3 !
1793 bz,pt %ncc, .copyout_8 ! check for longword alignment
1794 nop
1795 btst 1, %o3 !
1796 bz,pt %ncc, .copyout_2 ! check for half-word
1797 nop
1798 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
1799 ld [%o3 + %lo(hw_copy_limit_1)], %o3
1800 tst %o3
1801 bz,pn %icc, .copyout_small ! if zero, disable HW copy
1802 cmp %o2, %o3 ! if length <= limit
1803 bleu,pt %ncc, .copyout_small ! go to small copy
1804 nop
1805 ba,pt %ncc, .copyout_more ! otherwise go to large copy
1806 nop
2391
2392 /*
2393 * We got here because of a fault during copyout.
2394 * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
2395 */
2396 .copyout_err:
2397 ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
2398 tst %o4
2399 bz,pt %ncc, 2f ! if not, return error
2400 nop
2401 ldn [%o4 + CP_COPYOUT], %g2 ! if handler, invoke it with
2402 jmp %g2 ! original arguments
2403 restore %g0, 0, %g0 ! dispose of copy window
2404 2:
2405 ret
2406 restore %g0, -1, %o0 ! return error value
2407
2408
2409 SET_SIZE(copyout_more)
2410
2411
2412 ENTRY(xcopyout)
2413 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
2414 bleu,pt %ncc, .xcopyout_small ! go to larger cases
2415 xor %o0, %o1, %o3 ! are src, dst alignable?
2416 btst 7, %o3 !
2417 bz,pt %ncc, .xcopyout_8 !
2418 nop
2419 btst 1, %o3 !
2420 bz,pt %ncc, .xcopyout_2 ! check for half-word
2421 nop
2422 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
2423 ld [%o3 + %lo(hw_copy_limit_1)], %o3
2424 tst %o3
2425 bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
2426 cmp %o2, %o3 ! if length <= limit
2427 bleu,pt %ncc, .xcopyout_small ! go to small copy
2428 nop
2429 ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
2430 nop
2431 .xcopyout_2:
2496 .sm_xcopyout_err:
2497
2498 membar #Sync
2499 stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
2500 mov SM_SAVE_SRC, %o0
2501 mov SM_SAVE_DST, %o1
2502 mov SM_SAVE_COUNT, %o2
2503 ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
2504 tst %o3
2505 bz,pt %ncc, 3f ! if not, return error
2506 nop
2507 ldn [%o3 + CP_XCOPYOUT], %o5 ! if handler, invoke it with
2508 jmp %o5 ! original arguments
2509 nop
2510 3:
2511 retl
2512 or %g1, 0, %o0 ! return errno value
2513
2514 SET_SIZE(xcopyout)
2515
2516 ENTRY(xcopyout_little)
2517 sethi %hi(.xcopyio_err), %o5
2518 or %o5, %lo(.xcopyio_err), %o5
2519 ldn [THREAD_REG + T_LOFAULT], %o4
2520 membar #Sync ! sync error barrier
2521 stn %o5, [THREAD_REG + T_LOFAULT]
2522 mov %o4, %o5
2523
2524 subcc %g0, %o2, %o3
2525 add %o0, %o2, %o0
2526 bz,pn %ncc, 2f ! check for zero bytes
2527 sub %o2, 1, %o4
2528 add %o0, %o4, %o0 ! start w/last byte
2529 add %o1, %o2, %o1
2530 ldub [%o0 + %o3], %o4
2531
2532 1: stba %o4, [%o1 + %o3]ASI_AIUSL
2533 inccc %o3
2534 sub %o0, 2, %o0 ! get next byte
2535 bcc,a,pt %ncc, 1b
2536 ldub [%o0 + %o3], %o4
2537
2538 2:
2539 membar #Sync ! sync error barrier
2540 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
2541 retl
2542 mov %g0, %o0 ! return (0)
2543
2544 SET_SIZE(xcopyout_little)
2545
2546 /*
2547 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
2548 */
2549
2550 ENTRY(copyin)
2551 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
2552 bleu,pt %ncc, .copyin_small ! go to larger cases
2553 xor %o0, %o1, %o3 ! are src, dst alignable?
2554 btst 7, %o3 !
2555 bz,pt %ncc, .copyin_8 ! check for longword alignment
2556 nop
2557 btst 1, %o3 !
2558 bz,pt %ncc, .copyin_2 ! check for half-word
2559 nop
2560 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
2561 ld [%o3 + %lo(hw_copy_limit_1)], %o3
2562 tst %o3
2563 bz,pn %icc, .copyin_small ! if zero, disable HW copy
2564 cmp %o2, %o3 ! if length <= limit
2565 bleu,pt %ncc, .copyin_small ! go to small copy
2566 nop
2567 ba,pt %ncc, .copyin_more ! otherwise go to large copy
2568 nop
2569 .copyin_2:
3145 restore %g0, 0, %o0
3146 /*
3147 * We got here because of a fault during copyin
3148 * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
3149 */
3150 .copyin_err:
3151 ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
3152 tst %o4
3153 bz,pt %ncc, 2f ! if not, return error
3154 nop
3155 ldn [%o4 + CP_COPYIN], %g2 ! if handler, invoke it with
3156 jmp %g2 ! original arguments
3157 restore %g0, 0, %g0 ! dispose of copy window
3158 2:
3159 ret
3160 restore %g0, -1, %o0 ! return error value
3161
3162
3163 SET_SIZE(copyin_more)
3164
3165 ENTRY(xcopyin)
3166
3167 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
3168 bleu,pt %ncc, .xcopyin_small ! go to larger cases
3169 xor %o0, %o1, %o3 ! are src, dst alignable?
3170 btst 7, %o3 !
3171 bz,pt %ncc, .xcopyin_8 ! check for longword alignment
3172 nop
3173 btst 1, %o3 !
3174 bz,pt %ncc, .xcopyin_2 ! check for half-word
3175 nop
3176 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
3177 ld [%o3 + %lo(hw_copy_limit_1)], %o3
3178 tst %o3
3179 bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
3180 cmp %o2, %o3 ! if length <= limit
3181 bleu,pt %ncc, .xcopyin_small ! go to small copy
3182 nop
3183 ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
3184 nop
3250 .sm_xcopyin_err:
3251
3252 membar #Sync
3253 stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3254 mov SM_SAVE_SRC, %o0
3255 mov SM_SAVE_DST, %o1
3256 mov SM_SAVE_COUNT, %o2
3257 ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
3258 tst %o3
3259 bz,pt %ncc, 3f ! if not, return error
3260 nop
3261 ldn [%o3 + CP_XCOPYIN], %o5 ! if handler, invoke it with
3262 jmp %o5 ! original arguments
3263 nop
3264 3:
3265 retl
3266 or %g1, 0, %o0 ! return errno value
3267
3268 SET_SIZE(xcopyin)
3269
3270 ENTRY(xcopyin_little)
3271 sethi %hi(.xcopyio_err), %o5
3272 or %o5, %lo(.xcopyio_err), %o5
3273 ldn [THREAD_REG + T_LOFAULT], %o4
3274 membar #Sync ! sync error barrier
3275 stn %o5, [THREAD_REG + T_LOFAULT]
3276 mov %o4, %o5
3277
3278 subcc %g0, %o2, %o3
3279 add %o0, %o2, %o0
3280 bz,pn %ncc, 2f ! check for zero bytes
3281 sub %o2, 1, %o4
3282 add %o0, %o4, %o0 ! start w/last byte
3283 add %o1, %o2, %o1
3284 lduba [%o0 + %o3]ASI_AIUSL, %o4
3285
3286 1: stb %o4, [%o1 + %o3]
3287 inccc %o3
3288 sub %o0, 2, %o0 ! get next byte
3289 bcc,a,pt %ncc, 1b
3290 lduba [%o0 + %o3]ASI_AIUSL, %o4
3291
3292 2:
3293 membar #Sync ! sync error barrier
3294 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3295 retl
3296 mov %g0, %o0 ! return (0)
3297
3298 .xcopyio_err:
3299 membar #Sync ! sync error barrier
3300 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3301 retl
3302 mov %g1, %o0
3303
3304 SET_SIZE(xcopyin_little)
3305
3306
3307 /*
3308 * Copy a block of storage - must not overlap (from + len <= to).
3309 * No fault handler installed (to be called under on_fault())
3310 */
3311 ENTRY(copyin_noerr)
3312
3313 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
3314 bleu,pt %ncc, .copyin_ne_small ! go to larger cases
3315 xor %o0, %o1, %o3 ! are src, dst alignable?
3316 btst 7, %o3 !
3317 bz,pt %ncc, .copyin_ne_8 ! check for longword alignment
3318 nop
3319 btst 1, %o3 !
3320 bz,pt %ncc, .copyin_ne_2 ! check for half-word
3321 nop
3322 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
3323 ld [%o3 + %lo(hw_copy_limit_1)], %o3
3324 tst %o3
3325 bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
3326 cmp %o2, %o3 ! if length <= limit
3327 bleu,pt %ncc, .copyin_ne_small ! go to small copy
3328 nop
3329 ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
3330 nop
3374 ba,pt %ncc, .sm_do_copyin
3375 stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault
3376
3377 .copyin_noerr_more:
3378 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3379 sethi %hi(.copyio_noerr), REAL_LOFAULT
3380 ba,pt %ncc, .do_copyin
3381 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
3382
3383 .copyio_noerr:
3384 jmp %l6
3385 restore %g0,0,%g0
3386
3387 .sm_copyio_noerr:
3388 membar #Sync
3389 stn %o4, [THREAD_REG + T_LOFAULT] ! restore t_lofault
3390 jmp %o4
3391 nop
3392
3393 SET_SIZE(copyin_noerr)
3394
3395 /*
3396 * Copy a block of storage - must not overlap (from + len <= to).
3397 * No fault handler installed (to be called under on_fault())
3398 */
3399
3400 ENTRY(copyout_noerr)
3401
3402 cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
3403 bleu,pt %ncc, .copyout_ne_small ! go to larger cases
3404 xor %o0, %o1, %o3 ! are src, dst alignable?
3405 btst 7, %o3 !
3406 bz,pt %ncc, .copyout_ne_8 ! check for longword alignment
3407 nop
3408 btst 1, %o3 !
3409 bz,pt %ncc, .copyout_ne_2 ! check for half-word
3410 nop
3411 sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
3412 ld [%o3 + %lo(hw_copy_limit_1)], %o3
3413 tst %o3
3414 bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
3415 cmp %o2, %o3 ! if length <= limit
3416 bleu,pt %ncc, .copyout_ne_small ! go to small copy
3417 nop
3418 ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
3419 nop
3453 nop
3454
3455 .copyout_ne_small:
3456 ldn [THREAD_REG + T_LOFAULT], %o4
3457 tst %o4
3458 bz,pn %ncc, .sm_do_copyout
3459 nop
3460 sethi %hi(.sm_copyio_noerr), %o5
3461 or %o5, %lo(.sm_copyio_noerr), %o5
3462 membar #Sync ! sync error barrier
3463 ba,pt %ncc, .sm_do_copyout
3464 stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault
3465
3466 .copyout_noerr_more:
3467 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3468 sethi %hi(.copyio_noerr), REAL_LOFAULT
3469 ba,pt %ncc, .do_copyout
3470 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
3471
3472 SET_SIZE(copyout_noerr)
3473
3474
3475 /*
3476 * hwblkclr - clears block-aligned, block-multiple-sized regions that are
3477 * longer than 256 bytes in length using spitfire's block stores. If
3478 * the criteria for using this routine are not met then it calls bzero
3479 * and returns 1. Otherwise 0 is returned indicating success.
3480 * Caller is responsible for ensuring use_hw_bzero is true and that
3481 * kpreempt_disable() has been called.
3482 */
3483 ! %i0 - start address
3484 ! %i1 - length of region (multiple of 64)
3485 ! %l0 - saved fprs
3486 ! %l1 - pointer to saved %d0 block
3487 ! %l2 - saved curthread->t_lwp
3488
3489 ENTRY(hwblkclr)
3490 ! get another window w/space for one aligned block of saved fpregs
3491 save %sp, -SA(MINFRAME + 2*VIS_BLOCKSIZE), %sp
3492
3493 ! Must be block-aligned
3494 andcc %i0, (VIS_BLOCKSIZE-1), %g0
3495 bnz,pn %ncc, 1f
3496 nop
3497
3498 ! ... and must be 256 bytes or more
3499 cmp %i1, 256
3500 blu,pn %ncc, 1f
3501 nop
3502
3563 sub %i4, %i2, %i4
3564 jmp %i4
3565 nop
3566
3567 .pz_finish:
3568 membar #Sync
3569 btst FPRS_FEF, %l0
3570 bz,a .pz_finished
3571 wr %l0, 0, %fprs ! restore fprs
3572
3573 ! restore fpregs from stack
3574 ldda [%l1]ASI_BLK_P, %d0
3575 membar #Sync
3576 wr %l0, 0, %fprs ! restore fprs
3577
3578 .pz_finished:
3579 ret
3580 restore %g0, 0, %o0 ! return (bzero or not)
3581
3582 SET_SIZE(hwblkclr)
3583
3584 /*
3585 * Copy 32 bytes of data from src (%o0) to dst (%o1)
3586 * using physical addresses.
3587 */
3588 ENTRY_NP(hw_pa_bcopy32)
3589 rdpr %pstate, %g1
3590 andn %g1, PSTATE_IE, %g2
3591 wrpr %g0, %g2, %pstate
3592
3593 rdpr %pstate, %g0
3594 ldxa [%o0]ASI_MEM, %o2
3595 add %o0, 8, %o0
3596 ldxa [%o0]ASI_MEM, %o3
3597 add %o0, 8, %o0
3598 ldxa [%o0]ASI_MEM, %o4
3599 add %o0, 8, %o0
3600 ldxa [%o0]ASI_MEM, %o5
3601
3602 stxa %g0, [%o1]ASI_DC_INVAL
3603 membar #Sync
3604
3605 stxa %o2, [%o1]ASI_MEM
3606 add %o1, 8, %o1
3607 stxa %o3, [%o1]ASI_MEM
3608 add %o1, 8, %o1
3609 stxa %o4, [%o1]ASI_MEM
3610 add %o1, 8, %o1
3611 stxa %o5, [%o1]ASI_MEM
3612
3613 retl
3614 wrpr %g0, %g1, %pstate
3615
3616 SET_SIZE(hw_pa_bcopy32)
3617
3618 DGDEF(use_hw_bcopy)
3619 .word 1
3620 DGDEF(use_hw_bzero)
3621 .word 1
3622 DGDEF(hw_copy_limit_1)
3623 .word 0
3624 DGDEF(hw_copy_limit_2)
3625 .word 0
3626 DGDEF(hw_copy_limit_4)
3627 .word 0
3628 DGDEF(hw_copy_limit_8)
3629 .word 0
3630
3631 .align 64
3632 .section ".text"
|