1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #ident  "%Z%%M% %I%     %E% SMI"
  28 
  29 #include <sys/asm_linkage.h>
  30 
  31 /*
  32  * ip_ocsum(address, halfword_count, sum)
  33  * Do a 16 bit one's complement sum of a given number of (16-bit)
  34  * halfwords. The halfword pointer must not be odd.
  35  *      %o0 address; %o1 count; %o2 sum accumulator; %o4 temp
  36  *      %g2 and %g3 used in main loop
  37  *
  38  * (from @(#)ocsum.s 1.3 89/02/24 SMI)
  39  *
  40  */
  41 
  42         ENTRY(ip_ocsum)
  43         cmp     %o1, 31         ! less than 62 bytes?
  44         bl,a    .dohw           !   just do halfwords
  45         tst     %o1             ! delay slot, test count
  46 
  47         btst    31, %o0         ! (delay slot)
  48         bz      2f              ! if 32 byte aligned, skip
  49         nop
  50 
  51         !
  52         ! Do first halfwords until 32-byte aligned
  53         !
  54 1:
  55         lduh    [%o0], %g2      ! read data
  56         add     %o0, 2, %o0     ! increment address
  57         add     %o2, %g2, %o2   ! add to accumulator, don't need carry yet
  58         btst    31, %o0         ! 32 byte aligned?
  59         bnz     1b
  60         sub     %o1, 1, %o1     ! decrement count
  61         !
  62         ! loop to add in 32 byte chunks
  63         ! The loads and adds are staggered to help avoid load/use
  64         ! interlocks on highly pipelined implementations, and double
  65         ! loads are used for 64-bit wide memory systems.
  66         !
  67 2:
  68         sub     %o1, 16, %o1    ! decrement count to aid testing
  69 4:
  70         ldd     [%o0], %g2      ! read data
  71         ldd     [%o0+8], %o4    ! read more data
  72         addcc   %o2, %g2, %o2   ! add to accumulator
  73         addxcc  %o2, %g3, %o2   ! add to accumulator with carry
  74         ldd     [%o0+16], %g2   ! read more data
  75         addxcc  %o2, %o4, %o2   ! add to accumulator with carry
  76         addxcc  %o2, %o5, %o2   ! add to accumulator with carry
  77         ldd     [%o0+24], %o4   ! read more data
  78         addxcc  %o2, %g2, %o2   ! add to accumulator with carry
  79         addxcc  %o2, %g3, %o2   ! add to accumulator with carry
  80         addxcc  %o2, %o4, %o2   ! add to accumulator
  81         addxcc  %o2, %o5, %o2   ! add to accumulator with carry
  82         addxcc  %o2, 0, %o2     ! if final carry, add it in
  83         subcc   %o1, 16, %o1    ! decrement count (in halfwords)
  84         bge     4b
  85         add     %o0, 32, %o0    ! delay slot, increment address
  86         
  87         add     %o1, 16, %o1    ! add back in
  88         !
  89         ! Do any remaining halfwords
  90         !
  91         b       .dohw
  92         tst     %o1             ! delay slot, for more to do
  93 
  94 3:
  95         add     %o0, 2, %o0     ! increment address
  96         addcc   %o2, %g2, %o2   ! add to accumulator
  97         addxcc  %o2, 0, %o2     ! if carry, add it in
  98         subcc   %o1, 1, %o1     ! decrement count
  99 .dohw:
 100         bg,a    3b              ! more to do?
 101         lduh    [%o0], %g2      ! read data
 102 
 103         !
 104         ! at this point the 32-bit accumulator
 105         ! has the result that needs to be returned in 16-bits
 106         !
 107         sll     %o2, 16, %o4    ! put low halfword in high halfword %o4
 108         addcc   %o4, %o2, %o2   ! add the 2 halfwords in high %o2, set carry
 109         srl     %o2, 16, %o2    ! shift to low halfword
 110         retl                    ! return
 111         addxcc  %o2, 0, %o0     ! add in carry if any. result in %o0
 112         SET_SIZE(ip_ocsum)
 113