Print this page
de-linting of .s files


   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28 
  29 #include <sys/param.h>
  30 #include <sys/errno.h>
  31 #include <sys/asm_linkage.h>
  32 #include <sys/vtrace.h>
  33 #include <sys/machthread.h>
  34 #include <sys/machparam.h>
  35 
  36 #if defined(lint)
  37 #include <sys/types.h>
  38 #else   /* lint */
  39 #include "assym.h"
  40 #endif  /* lint */
  41 
  42 /*
  43  * Prefetch considerations
  44  * 
  45  * We prefetch one cacheline ahead.  This may not be enough on Serengeti
  46  * systems - see default_copyout() etc which prefetch 5 lines ahead.
  47  * On the other hand, we expect most of the source buffers to be
  48  * recently used enough to be cached.
  49  *
  50  * On US-I the prefetches are inoperative.  On US-II they preload the E$;
  51  * the mainloop unrolling and load-buffer should cover loads from E$.
  52  * The stores appear to be the slow point on US-II.
  53  * 
  54  * On US-IIICu the prefetch preloads the L2$ too, but there is no load
  55  * buffer so the loads will stall for D$ miss, L2$ hit.  The hardware
  56  * auto-prefetch is not activated by integer loads.  No solution
  57  * in sight for this, barring odd games with FP read, write, integer read.
  58  * 
  59  * US-IV (Panther) appears similar to US-IIICu, except that a strong
  60  * variant of prefetch is available which can take TLB traps.  We don't
  61  * use this.  The h/w prefetch stride can be set to 64, 128 or 192,
  62  * and they only reach to the L2$ (we don't use these either).
  63  * L2$ load-to-use latency is 15 cycles (best).
  64  */
  65 
  66 
  67 /*
  68  * ip_ocsum(address, halfword_count, sum)
  69  * Do a 16 bit one's complement sum of a given number of (16-bit)
  70  * halfwords. The halfword pointer must not be odd.
  71  *      %o0 address; %o1 count; %o2 sum accumulator; %o4 temp
  72  *      %g2 and %g3 used in main loop
  73  *
  74  * (from @(#)ocsum.s 1.3 89/02/24 SMI)
  75  *
  76  */
  77 
  78 #if defined(lint) 
  79 
  80 /* ARGSUSED */
  81 unsigned int
  82 ip_ocsum(u_short *address, int halfword_count, unsigned int sum)
  83 { return (0); }
  84 
  85 #else   /* lint */
  86 
  87         ENTRY(ip_ocsum)
  88 
  89 /*
  90  * On ttcp transmits, called once per ocsum_copyin but with a small
  91  * block ( >99.9% ).  Could be the tx hdrs?  How many acks/seg are we rxing?
  92  * On ttcp receives, called more than once per ocsum_copyout. Rx hdrs
  93  * and tx acks?
  94  *
  95  * To do: telnet and nfs traffic
  96  *
  97  * On an NCA'd webserver about 10% of the calls are >64 bytes
  98  *      about 10% of those start on a 64byte boundary
  99  *      about 30% are >5*64 bytes.
 100  * The NCA numbers & proportions don't change with h/w cksum on.
 101  *
 102  * Tx hdrs are likely to be already in cache.
 103  * Rx hdrs depends if already inspected.
 104  */
 105 
 106         !


 444         and     %i2, %g1, %o1   ! lo32
 445 
 446         add     %o0, %o1, %o0   ! 33b
 447 
 448         srlx    %o0, 16, %o1    ! hi17
 449         and     %o0, %g4, %o0   ! lo16
 450 
 451         add     %o1, %o0, %o0   ! 18b
 452 
 453         srlx    %o0, 16, %o1    ! hi2
 454         and     %o0, %g4, %o0   ! lo16
 455 
 456         add     %o1, %o0, %i0   ! 16b result in %i0
 457 
 458         ret                     ! return
 459         restore
 460 
 461 
 462         SET_SIZE(ip_ocsum_long) ! 64-bit version
 463 
 464 #endif  /* lint */


   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 


  27 #include <sys/param.h>
  28 #include <sys/errno.h>
  29 #include <sys/asm_linkage.h>
  30 #include <sys/vtrace.h>
  31 #include <sys/machthread.h>
  32 #include <sys/machparam.h>
  33 



  34 #include "assym.h"

  35 
  36 /*
  37  * Prefetch considerations
  38  * 
  39  * We prefetch one cacheline ahead.  This may not be enough on Serengeti
  40  * systems - see default_copyout() etc which prefetch 5 lines ahead.
  41  * On the other hand, we expect most of the source buffers to be
  42  * recently used enough to be cached.
  43  *
  44  * On US-I the prefetches are inoperative.  On US-II they preload the E$;
  45  * the mainloop unrolling and load-buffer should cover loads from E$.
  46  * The stores appear to be the slow point on US-II.
  47  * 
  48  * On US-IIICu the prefetch preloads the L2$ too, but there is no load
  49  * buffer so the loads will stall for D$ miss, L2$ hit.  The hardware
  50  * auto-prefetch is not activated by integer loads.  No solution
  51  * in sight for this, barring odd games with FP read, write, integer read.
  52  * 
  53  * US-IV (Panther) appears similar to US-IIICu, except that a strong
  54  * variant of prefetch is available which can take TLB traps.  We don't
  55  * use this.  The h/w prefetch stride can be set to 64, 128 or 192,
  56  * and they only reach to the L2$ (we don't use these either).
  57  * L2$ load-to-use latency is 15 cycles (best).
  58  */
  59 
  60 
  61 /*
  62  * ip_ocsum(address, halfword_count, sum)
  63  * Do a 16 bit one's complement sum of a given number of (16-bit)
  64  * halfwords. The halfword pointer must not be odd.
  65  *      %o0 address; %o1 count; %o2 sum accumulator; %o4 temp
  66  *      %g2 and %g3 used in main loop
  67  *
  68  * (from @(#)ocsum.s 1.3 89/02/24 SMI)
  69  *
  70  */
  71 









  72         ENTRY(ip_ocsum)
  73 
  74 /*
  75  * On ttcp transmits, called once per ocsum_copyin but with a small
  76  * block ( >99.9% ).  Could be the tx hdrs?  How many acks/seg are we rxing?
  77  * On ttcp receives, called more than once per ocsum_copyout. Rx hdrs
  78  * and tx acks?
  79  *
  80  * To do: telnet and nfs traffic
  81  *
  82  * On an NCA'd webserver about 10% of the calls are >64 bytes
  83  *      about 10% of those start on a 64byte boundary
  84  *      about 30% are >5*64 bytes.
  85  * The NCA numbers & proportions don't change with h/w cksum on.
  86  *
  87  * Tx hdrs are likely to be already in cache.
  88  * Rx hdrs depends if already inspected.
  89  */
  90 
  91         !


 429         and     %i2, %g1, %o1   ! lo32
 430 
 431         add     %o0, %o1, %o0   ! 33b
 432 
 433         srlx    %o0, 16, %o1    ! hi17
 434         and     %o0, %g4, %o0   ! lo16
 435 
 436         add     %o1, %o0, %o0   ! 18b
 437 
 438         srlx    %o0, 16, %o1    ! hi2
 439         and     %o0, %g4, %o0   ! lo16
 440 
 441         add     %o1, %o0, %i0   ! 16b result in %i0
 442 
 443         ret                     ! return
 444         restore
 445 
 446 
 447         SET_SIZE(ip_ocsum_long) ! 64-bit version
 448