1 !
   2 ! This file was generated by a compiler that is currently not part of the CBE
   3 ! (as the CBE compiler does not generate code for the T4 architecture), and
   4 ! then it was modified by hand to remove some unnecessary instructions that
   5 ! the compiler generated and the main loop's branches was rearranged for
   6 ! fewer taken branches on the most frequent code path. These modifications 
   7 ! were made in 7 steps. In each step, a few lines were removed from and added
   8 ! to the compiler generated code to produce an equivalent binary. The lines
   9 ! that were removed in step <i> are marked by "!<i>" at the beginning of the
  10 ! line, the lines added in this step are marked by the same added at the end of
  11 ! the line. In other words, let C_i mean the code, after step <i> (C_0 is
  12 ! the original, compiler generated code, C_7 is the code in this file)
  13 ! To reproduce C_i (0 <= i < 7) first take C_<i+1>, remove the lines that
  14 ! end in !<i+1>, and then remove the !<i+1> string from the beginning of those
  15 ! lines that start with it. Comparing C_i and C_<i+1> is a simple task, as
  16 ! only a few lines have changed.
  17 !  If a compiler (e.g. the Oracle Studio 12.3) becomes part of the CBE and
  18 ! it will be able to generate as efficient code as in this file the 
  19 ! longest_match.o file can simply be comp[iled from longest_match.c .
  20 !
  21 
  22         .section        ".text",#alloc,#execinstr,#progbits
  23         .file   "deflate-t4.c"
  24 
  25         .section        ".bss",#alloc,#write,#nobits
  26 
  27 Bbss.bss:
  28 
  29         .section        ".data",#alloc,#write,#progbits
  30 
  31 Ddata.data:
  32 
  33         .section        ".rodata",#alloc,#progbits
  34 !
  35 ! CONSTANT POOL
  36 !
  37 
  38 Drodata.rodata:
  39 
  40         .section        ".picdata",#alloc,#write
  41 
  42 Dpicdata.picdata:
  43 
  44         .section        ".tbss",#alloc,#write,#tls,#nobits
  45 
  46 Ttbss.bss:
  47 
  48         .section        ".tdata",#alloc,#write,#tls,#progbits
  49 
  50 Ttdata.data:
  51 
  52         .section        ".rodata1",#alloc,#progbits
  53         .align  8
  54 !
  55 ! CONSTANT POOL
  56 !
  57 
  58 .L95:
  59         .ascii  "invalid distance too far back\000"
  60         .align  8
  61 !
  62 ! CONSTANT POOL
  63 !
  64 
  65 .L147:
  66         .ascii  "invalid distance code\000"
  67         .align  8
  68 !
  69 ! CONSTANT POOL
  70 !
  71 
  72 .L153:
  73         .ascii  "invalid literal/length code\000"
  74 
  75         .section        ".text",#alloc,#execinstr,#progbits
  76 /* 000000          0 */         .align  4
  77 ! FILE deflate-t4.c
  78 
  79 !    1                !#include <sun_prefetch.h>
  80 !    2                !#include "deflate.h"
  81 !    3                !#define NIL 0
  82 !    5                !uInt longest_match(s, cur_match)
  83 !    6                !    deflate_state *s;
  84 !    7                !    IPos cur_match;                             /* current match */
  85 !    8                !{
  86 
  87 !
  88 ! SUBROUTINE longest_match
  89 !
  90 ! OFFSET    SOURCE LINE LABEL   INSTRUCTION
  91 
  92                         .global longest_match
  93                        
  94 
  95                         longest_match:
  96 
  97                         .L900000112:
  98         save      %sp, -0xb0, %sp
  99          ld        [%i0 + 0x4c], %l4    !7
 100          ldn       [%i0 + 0x60], %l5    !7
 101          and       %i1, %l4, %l2        !7
 102          prefetch [%l5 + %l2], #n_reads !7
 103 !7      ld        [%i0 + 0x9c], %l4
 104          ld        [%i0 + 0x9c], %l1    !7
 105         ld        [%i0 + 0x44], %l6
 106         clr       %g4
 107         ldn       [%i0 + 0x50], %g1
 108         ld        [%i0 + 0xa8], %i2
 109         ld        [%i0 + 0xac], %g5
 110         ld        [%i0 + 0xc0], %o0
 111 !5      srl       %l4, 0x0, %l5
 112         ld        [%i0 + 0xbc], %l7
 113         add       %l6, -0x106, %i3
 114 !5      add       %g1, %l5, %i4
 115 !7       add       %g1, %l4, %i4        !5
 116          add       %g1, %l1, %i4        !7
 117 !7      cwbleu    %l4, %i3, lm_0x38
 118          cwbleu    %l1, %i3, lm_0x38    !7
 119 !7      sub       %l4, %i3, %g4
 120          sub       %l1, %i3, %g4        !7
 121                 
 122 lm_0x38:
 123 !7      ld        [%i0 + 0x4c], %l4
 124 !4      add       %i2, -0x1, %l3
 125 !7      ldn       [%i0 + 0x60], %l5
 126 !4      sra       %l3, 0x0, %o2
 127          add       %i2, -0x1, %o2       !4
 128         ldub      [%i4 + %o2], %o2
 129 !3      sra       %i2, 0x0, %l6
 130 !3      ldub      [%i4 + %l6], %o1
 131          ldub      [%i4 + %i2], %o1     !3
 132         cmp       %i2, %l7
 133         add       %i4, 0x102, %l7
 134         ld        [%i0 + 0xa4], %i3
 135         bcs,pn    %icc, lm_0x6c
 136         mov       0x102, %l3
 137 
 138         srl       %g5, 0x2, %g5
 139                 
 140 lm_0x6c:
 141         cmp       %o0, %i3
 142 !6      srl       %i1, 0x0, %l0
 143 !7       and       %i1, %l4, %l2        !6
 144         movgu     %icc, %i3, %o0
 145                 
 146 lm_0x78:
 147 !6      and       %i1, %l4, %l2
 148 !6      add       %l0, %g1, %o3
 149          add       %i1, %g1, %o3        !6
 150 !3      ldub      [%o3 + %l6], %o5
 151          ldub      [%o3 + %i2], %o5             !3
 152 !1      srl       %l2, 0x0, %o4
 153 !1      sllx      %o4, 0x1, %l2
 154          sllx     %l2, 0x1, %l2                 !1
 155          add       %l2, %l5, %l1                !1
 156          prefetch  [%l1 - 0x40], #n_reads       !1
 157         cwbe     %o5, %o1, lm_0x17c_neg
 158 
 159 lm_0x17c:
 160         lduh      [%l5 + %l2], %i1
 161         cwbleu    %i1, %g4, lm_0x190
 162 
 163         addcc     %g5, -0x1, %g5
 164         bne,pt    %icc, lm_0x78
 165 !6      srl       %i1, 0x0, %l0
 166          and       %i1, %l4, %l2        !6
 167                 
 168 lm_0x190:
 169         cmp       %i2, %i3
 170         movgu     %icc, %i3, %i2
 171         return    %i7 + 0x8
 172         srl       %o2, 0x0, %o0
 173 
 174 lm_0x17c_neg:
 175 !3      add       %o3, %l6, %o7
 176          add       %o3, %i2, %o7                !3
 177         ldub      [%o7 - 0x1], %l1
 178         cwbne     %l1, %o2, lm_0x17c
 179 
 180 !6      ldub      [%g1 + %l0], %i5
 181          ldub      [%g1 + %i1], %i5     !6
 182         ldub      [%i4], %o5
 183         cwbne     %i5, %o5, lm_0x17c
 184 
 185         ldub      [%i4 + 0x1], %l1
 186         ldub      [%o3 + 0x1], %o4
 187         cwbne     %o4, %l1, lm_0x17c
 188 
 189         add       %o3, 0x2, %o3
 190 !1      add       %l2, %l5, %l1
 191         add       %i4, 0x2, %o4
 192 
 193 lm_0xc0:
 194         ldub      [%o4 + 0x1], %l0
 195         add       %o4, 0x1, %o4
 196         ldub      [%o3 + 0x1], %o7
 197         cwbne     %l0, %o7, lm_0x14c
 198 
 199         ldub      [%o4 + 0x1], %i5
 200         add       %o4, 0x1, %o4
 201         ldub      [%o3 + 0x2], %o5
 202         cwbne     %i5, %o5, lm_0x14c
 203 
 204         ldub      [%o4 + 0x1], %l0
 205         add       %o4, 0x1, %o4
 206         ldub      [%o3 + 0x3], %o7
 207         cwbne     %l0, %o7, lm_0x14c
 208 
 209         ldub      [%o4 + 0x1], %i5
 210         add       %o4, 0x1, %o4
 211         ldub      [%o3 + 0x4], %o5
 212         cwbne     %i5, %o5, lm_0x14c
 213 
 214         ldub      [%o4 + 0x1], %l0
 215         add       %o4, 0x1, %o4
 216         ldub      [%o3 + 0x5], %o7
 217         cwbne     %l0, %o7, lm_0x14c
 218 
 219         ldub      [%o4 + 0x1], %i5
 220         add       %o4, 0x1, %o4
 221         ldub      [%o3 + 0x6], %o5
 222         cwbne     %i5, %o5, lm_0x14c
 223 
 224         ldub      [%o4 + 0x1], %l0
 225         add       %o4, 0x1, %o4
 226         ldub      [%o3 + 0x7], %o7
 227         cwbne     %l0, %o7, lm_0x14c
 228 
 229         ldub      [%o4 + 0x1], %i5
 230         add       %o4, 0x1, %o4
 231         ldub      [%o3 + 0x8], %o5
 232         add       %o3, 0x8, %o3
 233         cwbne     %i5, %o5, lm_0x14c
 234 
 235         nop
 236         cxbcs     %o4, %l7, lm_0xc0
 237                 
 238 lm_0x14c:
 239 !1      prefetch  [%l1 - 0x40], #n_reads
 240         sub       %l7, %o4, %l0
 241         sub       %l3, %l0, %o7
 242         cwble     %o7, %i2, lm_0x17c
 243 
 244         st        %i1, [%i0 + 0xa0]
 245         mov       %o7, %i2
 246         cwbge     %o7, %o0, lm_0x190
 247 
 248 !2      sra       %o7, 0x0, %i1
 249 !3      sra       %o7, 0x0, %l6
 250 !2      add       %i4, %i1, %l1
 251          add       %i4, %o7, %l1        !2
 252 !2      ldub      [%i4 + %i1], %o1
 253          ldub      [%i4 + %o7], %o1     !2
 254          ba        lm_0x17c
 255         ldub      [%l1 - 0x1], %o2
 256         
 257 
 258 /* 0x0220          0 */         .type   longest_match,#function
 259 /* 0x0220          0 */         .size   longest_match,(.-longest_match)
 260                        
 261 
 262                         .L900000113:
 263 
 264         .section        ".text",#alloc,#execinstr,#progbits
 265 /* 000000          0 */         .align  8
 266 /* 000000            */         .skip   24
 267 /* 0x0018            */         .align  4
 268 
 269 
 270                         .L900000286:
 271 
 272         .section        ".text",#alloc,#execinstr,#progbits
 273 
 274 ! Begin Disassembling Ident
 275         .ident  "cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"   ! (NO SOURCE LINE)
 276         .ident  "acomp: Sun C 5.12 SunOS_sparc 2011/11/16"      ! (/tmp/acomp.1329237379.172468.02.sd:24)
 277         .ident  "iropt: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"        ! (/tmp/acomp.1329237379.172468.02.sd:25)
 278         .ident  "cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"   ! (NO SOURCE LINE)
 279 ! End Disassembling Ident