1 ! 2 ! This file was generated by a compiler that is currently not part of the CBE 3 ! (as the CBE compiler does not generate code for the T4 architecture), and 4 ! then it was modified by hand to remove some unnecessary instructions that 5 ! the compiler generated and the main loop's branches was rearranged for 6 ! fewer taken branches on the most frequent code path. These modifications 7 ! were made in 7 steps. In each step, a few lines were removed from and added 8 ! to the compiler generated code to produce an equivalent binary. The lines 9 ! that were removed in step <i> are marked by "!<i>" at the beginning of the 10 ! line, the lines added in this step are marked by the same added at the end of 11 ! the line. In other words, let C_i mean the code, after step <i> (C_0 is 12 ! the original, compiler generated code, C_7 is the code in this file) 13 ! To reproduce C_i (0 <= i < 7) first take C_<i+1>, remove the lines that 14 ! end in !<i+1>, and then remove the !<i+1> string from the beginning of those 15 ! lines that start with it. Comparing C_i and C_<i+1> is a simple task, as 16 ! only a few lines have changed. 17 ! If a compiler (e.g. the Oracle Studio 12.3) becomes part of the CBE and 18 ! it will be able to generate as efficient code as in this file the 19 ! longest_match.o file can simply be comp[iled from longest_match.c . 20 ! 21 22 .section ".text",#alloc,#execinstr,#progbits 23 .file "deflate-t4.c" 24 25 .section ".bss",#alloc,#write,#nobits 26 27 Bbss.bss: 28 29 .section ".data",#alloc,#write,#progbits 30 31 Ddata.data: 32 33 .section ".rodata",#alloc,#progbits 34 ! 35 ! CONSTANT POOL 36 ! 37 38 Drodata.rodata: 39 40 .section ".picdata",#alloc,#write 41 42 Dpicdata.picdata: 43 44 .section ".tbss",#alloc,#write,#tls,#nobits 45 46 Ttbss.bss: 47 48 .section ".tdata",#alloc,#write,#tls,#progbits 49 50 Ttdata.data: 51 52 .section ".rodata1",#alloc,#progbits 53 .align 8 54 ! 55 ! CONSTANT POOL 56 ! 57 58 .L95: 59 .ascii "invalid distance too far back\000" 60 .align 8 61 ! 62 ! CONSTANT POOL 63 ! 64 65 .L147: 66 .ascii "invalid distance code\000" 67 .align 8 68 ! 69 ! CONSTANT POOL 70 ! 71 72 .L153: 73 .ascii "invalid literal/length code\000" 74 75 .section ".text",#alloc,#execinstr,#progbits 76 /* 000000 0 */ .align 4 77 ! FILE deflate-t4.c 78 79 ! 1 !#include <sun_prefetch.h> 80 ! 2 !#include "deflate.h" 81 ! 3 !#define NIL 0 82 ! 5 !uInt longest_match(s, cur_match) 83 ! 6 ! deflate_state *s; 84 ! 7 ! IPos cur_match; /* current match */ 85 ! 8 !{ 86 87 ! 88 ! SUBROUTINE longest_match 89 ! 90 ! OFFSET SOURCE LINE LABEL INSTRUCTION 91 92 .global longest_match 93 94 95 longest_match: 96 97 .L900000112: 98 save %sp, -0xb0, %sp 99 ld [%i0 + 0x4c], %l4 !7 100 ldn [%i0 + 0x60], %l5 !7 101 and %i1, %l4, %l2 !7 102 prefetch [%l5 + %l2], #n_reads !7 103 !7 ld [%i0 + 0x9c], %l4 104 ld [%i0 + 0x9c], %l1 !7 105 ld [%i0 + 0x44], %l6 106 clr %g4 107 ldn [%i0 + 0x50], %g1 108 ld [%i0 + 0xa8], %i2 109 ld [%i0 + 0xac], %g5 110 ld [%i0 + 0xc0], %o0 111 !5 srl %l4, 0x0, %l5 112 ld [%i0 + 0xbc], %l7 113 add %l6, -0x106, %i3 114 !5 add %g1, %l5, %i4 115 !7 add %g1, %l4, %i4 !5 116 add %g1, %l1, %i4 !7 117 !7 cwbleu %l4, %i3, lm_0x38 118 cwbleu %l1, %i3, lm_0x38 !7 119 !7 sub %l4, %i3, %g4 120 sub %l1, %i3, %g4 !7 121 122 lm_0x38: 123 !7 ld [%i0 + 0x4c], %l4 124 !4 add %i2, -0x1, %l3 125 !7 ldn [%i0 + 0x60], %l5 126 !4 sra %l3, 0x0, %o2 127 add %i2, -0x1, %o2 !4 128 ldub [%i4 + %o2], %o2 129 !3 sra %i2, 0x0, %l6 130 !3 ldub [%i4 + %l6], %o1 131 ldub [%i4 + %i2], %o1 !3 132 cmp %i2, %l7 133 add %i4, 0x102, %l7 134 ld [%i0 + 0xa4], %i3 135 bcs,pn %icc, lm_0x6c 136 mov 0x102, %l3 137 138 srl %g5, 0x2, %g5 139 140 lm_0x6c: 141 cmp %o0, %i3 142 !6 srl %i1, 0x0, %l0 143 !7 and %i1, %l4, %l2 !6 144 movgu %icc, %i3, %o0 145 146 lm_0x78: 147 !6 and %i1, %l4, %l2 148 !6 add %l0, %g1, %o3 149 add %i1, %g1, %o3 !6 150 !3 ldub [%o3 + %l6], %o5 151 ldub [%o3 + %i2], %o5 !3 152 !1 srl %l2, 0x0, %o4 153 !1 sllx %o4, 0x1, %l2 154 sllx %l2, 0x1, %l2 !1 155 add %l2, %l5, %l1 !1 156 prefetch [%l1 - 0x40], #n_reads !1 157 cwbe %o5, %o1, lm_0x17c_neg 158 159 lm_0x17c: 160 lduh [%l5 + %l2], %i1 161 cwbleu %i1, %g4, lm_0x190 162 163 addcc %g5, -0x1, %g5 164 bne,pt %icc, lm_0x78 165 !6 srl %i1, 0x0, %l0 166 and %i1, %l4, %l2 !6 167 168 lm_0x190: 169 cmp %i2, %i3 170 movgu %icc, %i3, %i2 171 return %i7 + 0x8 172 srl %o2, 0x0, %o0 173 174 lm_0x17c_neg: 175 !3 add %o3, %l6, %o7 176 add %o3, %i2, %o7 !3 177 ldub [%o7 - 0x1], %l1 178 cwbne %l1, %o2, lm_0x17c 179 180 !6 ldub [%g1 + %l0], %i5 181 ldub [%g1 + %i1], %i5 !6 182 ldub [%i4], %o5 183 cwbne %i5, %o5, lm_0x17c 184 185 ldub [%i4 + 0x1], %l1 186 ldub [%o3 + 0x1], %o4 187 cwbne %o4, %l1, lm_0x17c 188 189 add %o3, 0x2, %o3 190 !1 add %l2, %l5, %l1 191 add %i4, 0x2, %o4 192 193 lm_0xc0: 194 ldub [%o4 + 0x1], %l0 195 add %o4, 0x1, %o4 196 ldub [%o3 + 0x1], %o7 197 cwbne %l0, %o7, lm_0x14c 198 199 ldub [%o4 + 0x1], %i5 200 add %o4, 0x1, %o4 201 ldub [%o3 + 0x2], %o5 202 cwbne %i5, %o5, lm_0x14c 203 204 ldub [%o4 + 0x1], %l0 205 add %o4, 0x1, %o4 206 ldub [%o3 + 0x3], %o7 207 cwbne %l0, %o7, lm_0x14c 208 209 ldub [%o4 + 0x1], %i5 210 add %o4, 0x1, %o4 211 ldub [%o3 + 0x4], %o5 212 cwbne %i5, %o5, lm_0x14c 213 214 ldub [%o4 + 0x1], %l0 215 add %o4, 0x1, %o4 216 ldub [%o3 + 0x5], %o7 217 cwbne %l0, %o7, lm_0x14c 218 219 ldub [%o4 + 0x1], %i5 220 add %o4, 0x1, %o4 221 ldub [%o3 + 0x6], %o5 222 cwbne %i5, %o5, lm_0x14c 223 224 ldub [%o4 + 0x1], %l0 225 add %o4, 0x1, %o4 226 ldub [%o3 + 0x7], %o7 227 cwbne %l0, %o7, lm_0x14c 228 229 ldub [%o4 + 0x1], %i5 230 add %o4, 0x1, %o4 231 ldub [%o3 + 0x8], %o5 232 add %o3, 0x8, %o3 233 cwbne %i5, %o5, lm_0x14c 234 235 nop 236 cxbcs %o4, %l7, lm_0xc0 237 238 lm_0x14c: 239 !1 prefetch [%l1 - 0x40], #n_reads 240 sub %l7, %o4, %l0 241 sub %l3, %l0, %o7 242 cwble %o7, %i2, lm_0x17c 243 244 st %i1, [%i0 + 0xa0] 245 mov %o7, %i2 246 cwbge %o7, %o0, lm_0x190 247 248 !2 sra %o7, 0x0, %i1 249 !3 sra %o7, 0x0, %l6 250 !2 add %i4, %i1, %l1 251 add %i4, %o7, %l1 !2 252 !2 ldub [%i4 + %i1], %o1 253 ldub [%i4 + %o7], %o1 !2 254 ba lm_0x17c 255 ldub [%l1 - 0x1], %o2 256 257 258 /* 0x0220 0 */ .type longest_match,#function 259 /* 0x0220 0 */ .size longest_match,(.-longest_match) 260 261 262 .L900000113: 263 264 .section ".text",#alloc,#execinstr,#progbits 265 /* 000000 0 */ .align 8 266 /* 000000 */ .skip 24 267 /* 0x0018 */ .align 4 268 269 270 .L900000286: 271 272 .section ".text",#alloc,#execinstr,#progbits 273 274 ! Begin Disassembling Ident 275 .ident "cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16" ! (NO SOURCE LINE) 276 .ident "acomp: Sun C 5.12 SunOS_sparc 2011/11/16" ! (/tmp/acomp.1329237379.172468.02.sd:24) 277 .ident "iropt: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16" ! (/tmp/acomp.1329237379.172468.02.sd:25) 278 .ident "cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16" ! (NO SOURCE LINE) 279 ! End Disassembling Ident