1 !
2 ! This file was generated by a compiler that is currently not part of the CBE
3 ! (as the CBE compiler does not generate code for the T4 architecture), and
4 ! then it was modified by hand to remove some unnecessary instructions that
5 ! the compiler generated and the main loop's branches was rearranged for
6 ! fewer taken branches on the most frequent code path. These modifications
7 ! were made in 7 steps. In each step, a few lines were removed from and added
8 ! to the compiler generated code to produce an equivalent binary. The lines
9 ! that were removed in step <i> are marked by "!<i>" at the beginning of the
10 ! line, the lines added in this step are marked by the same added at the end of
11 ! the line. In other words, let C_i mean the code, after step <i> (C_0 is
12 ! the original, compiler generated code, C_7 is the code in this file)
13 ! To reproduce C_i (0 <= i < 7) first take C_<i+1>, remove the lines that
14 ! end in !<i+1>, and then remove the !<i+1> string from the beginning of those
15 ! lines that start with it. Comparing C_i and C_<i+1> is a simple task, as
16 ! only a few lines have changed.
17 ! If a compiler (e.g. the Oracle Studio 12.3) becomes part of the CBE and
18 ! it will be able to generate as efficient code as in this file the
19 ! longest_match.o file can simply be comp[iled from longest_match.c .
20 !
21
22 .section ".text",#alloc,#execinstr,#progbits
23 .file "deflate-t4.c"
24
25 .section ".bss",#alloc,#write,#nobits
26
27 Bbss.bss:
28
29 .section ".data",#alloc,#write,#progbits
30
31 Ddata.data:
32
33 .section ".rodata",#alloc,#progbits
34 !
35 ! CONSTANT POOL
36 !
37
38 Drodata.rodata:
39
40 .section ".picdata",#alloc,#write
41
42 Dpicdata.picdata:
43
44 .section ".tbss",#alloc,#write,#tls,#nobits
45
46 Ttbss.bss:
47
48 .section ".tdata",#alloc,#write,#tls,#progbits
49
50 Ttdata.data:
51
52 .section ".rodata1",#alloc,#progbits
53 .align 8
54 !
55 ! CONSTANT POOL
56 !
57
58 .L95:
59 .ascii "invalid distance too far back\000"
60 .align 8
61 !
62 ! CONSTANT POOL
63 !
64
65 .L147:
66 .ascii "invalid distance code\000"
67 .align 8
68 !
69 ! CONSTANT POOL
70 !
71
72 .L153:
73 .ascii "invalid literal/length code\000"
74
75 .section ".text",#alloc,#execinstr,#progbits
76 /* 000000 0 */ .align 4
77 ! FILE deflate-t4.c
78
79 ! 1 !#include <sun_prefetch.h>
80 ! 2 !#include "deflate.h"
81 ! 3 !#define NIL 0
82 ! 5 !uInt longest_match(s, cur_match)
83 ! 6 ! deflate_state *s;
84 ! 7 ! IPos cur_match; /* current match */
85 ! 8 !{
86
87 !
88 ! SUBROUTINE longest_match
89 !
90 ! OFFSET SOURCE LINE LABEL INSTRUCTION
91
92 .global longest_match
93
94
95 longest_match:
96
97 .L900000112:
98 save %sp, -0xb0, %sp
99 ld [%i0 + 0x4c], %l4 !7
100 ldn [%i0 + 0x60], %l5 !7
101 and %i1, %l4, %l2 !7
102 prefetch [%l5 + %l2], #n_reads !7
103 !7 ld [%i0 + 0x9c], %l4
104 ld [%i0 + 0x9c], %l1 !7
105 ld [%i0 + 0x44], %l6
106 clr %g4
107 ldn [%i0 + 0x50], %g1
108 ld [%i0 + 0xa8], %i2
109 ld [%i0 + 0xac], %g5
110 ld [%i0 + 0xc0], %o0
111 !5 srl %l4, 0x0, %l5
112 ld [%i0 + 0xbc], %l7
113 add %l6, -0x106, %i3
114 !5 add %g1, %l5, %i4
115 !7 add %g1, %l4, %i4 !5
116 add %g1, %l1, %i4 !7
117 !7 cwbleu %l4, %i3, lm_0x38
118 cwbleu %l1, %i3, lm_0x38 !7
119 !7 sub %l4, %i3, %g4
120 sub %l1, %i3, %g4 !7
121
122 lm_0x38:
123 !7 ld [%i0 + 0x4c], %l4
124 !4 add %i2, -0x1, %l3
125 !7 ldn [%i0 + 0x60], %l5
126 !4 sra %l3, 0x0, %o2
127 add %i2, -0x1, %o2 !4
128 ldub [%i4 + %o2], %o2
129 !3 sra %i2, 0x0, %l6
130 !3 ldub [%i4 + %l6], %o1
131 ldub [%i4 + %i2], %o1 !3
132 cmp %i2, %l7
133 add %i4, 0x102, %l7
134 ld [%i0 + 0xa4], %i3
135 bcs,pn %icc, lm_0x6c
136 mov 0x102, %l3
137
138 srl %g5, 0x2, %g5
139
140 lm_0x6c:
141 cmp %o0, %i3
142 !6 srl %i1, 0x0, %l0
143 !7 and %i1, %l4, %l2 !6
144 movgu %icc, %i3, %o0
145
146 lm_0x78:
147 !6 and %i1, %l4, %l2
148 !6 add %l0, %g1, %o3
149 add %i1, %g1, %o3 !6
150 !3 ldub [%o3 + %l6], %o5
151 ldub [%o3 + %i2], %o5 !3
152 !1 srl %l2, 0x0, %o4
153 !1 sllx %o4, 0x1, %l2
154 sllx %l2, 0x1, %l2 !1
155 add %l2, %l5, %l1 !1
156 prefetch [%l1 - 0x40], #n_reads !1
157 cwbe %o5, %o1, lm_0x17c_neg
158
159 lm_0x17c:
160 lduh [%l5 + %l2], %i1
161 cwbleu %i1, %g4, lm_0x190
162
163 addcc %g5, -0x1, %g5
164 bne,pt %icc, lm_0x78
165 !6 srl %i1, 0x0, %l0
166 and %i1, %l4, %l2 !6
167
168 lm_0x190:
169 cmp %i2, %i3
170 movgu %icc, %i3, %i2
171 return %i7 + 0x8
172 srl %o2, 0x0, %o0
173
174 lm_0x17c_neg:
175 !3 add %o3, %l6, %o7
176 add %o3, %i2, %o7 !3
177 ldub [%o7 - 0x1], %l1
178 cwbne %l1, %o2, lm_0x17c
179
180 !6 ldub [%g1 + %l0], %i5
181 ldub [%g1 + %i1], %i5 !6
182 ldub [%i4], %o5
183 cwbne %i5, %o5, lm_0x17c
184
185 ldub [%i4 + 0x1], %l1
186 ldub [%o3 + 0x1], %o4
187 cwbne %o4, %l1, lm_0x17c
188
189 add %o3, 0x2, %o3
190 !1 add %l2, %l5, %l1
191 add %i4, 0x2, %o4
192
193 lm_0xc0:
194 ldub [%o4 + 0x1], %l0
195 add %o4, 0x1, %o4
196 ldub [%o3 + 0x1], %o7
197 cwbne %l0, %o7, lm_0x14c
198
199 ldub [%o4 + 0x1], %i5
200 add %o4, 0x1, %o4
201 ldub [%o3 + 0x2], %o5
202 cwbne %i5, %o5, lm_0x14c
203
204 ldub [%o4 + 0x1], %l0
205 add %o4, 0x1, %o4
206 ldub [%o3 + 0x3], %o7
207 cwbne %l0, %o7, lm_0x14c
208
209 ldub [%o4 + 0x1], %i5
210 add %o4, 0x1, %o4
211 ldub [%o3 + 0x4], %o5
212 cwbne %i5, %o5, lm_0x14c
213
214 ldub [%o4 + 0x1], %l0
215 add %o4, 0x1, %o4
216 ldub [%o3 + 0x5], %o7
217 cwbne %l0, %o7, lm_0x14c
218
219 ldub [%o4 + 0x1], %i5
220 add %o4, 0x1, %o4
221 ldub [%o3 + 0x6], %o5
222 cwbne %i5, %o5, lm_0x14c
223
224 ldub [%o4 + 0x1], %l0
225 add %o4, 0x1, %o4
226 ldub [%o3 + 0x7], %o7
227 cwbne %l0, %o7, lm_0x14c
228
229 ldub [%o4 + 0x1], %i5
230 add %o4, 0x1, %o4
231 ldub [%o3 + 0x8], %o5
232 add %o3, 0x8, %o3
233 cwbne %i5, %o5, lm_0x14c
234
235 nop
236 cxbcs %o4, %l7, lm_0xc0
237
238 lm_0x14c:
239 !1 prefetch [%l1 - 0x40], #n_reads
240 sub %l7, %o4, %l0
241 sub %l3, %l0, %o7
242 cwble %o7, %i2, lm_0x17c
243
244 st %i1, [%i0 + 0xa0]
245 mov %o7, %i2
246 cwbge %o7, %o0, lm_0x190
247
248 !2 sra %o7, 0x0, %i1
249 !3 sra %o7, 0x0, %l6
250 !2 add %i4, %i1, %l1
251 add %i4, %o7, %l1 !2
252 !2 ldub [%i4 + %i1], %o1
253 ldub [%i4 + %o7], %o1 !2
254 ba lm_0x17c
255 ldub [%l1 - 0x1], %o2
256
257
258 /* 0x0220 0 */ .type longest_match,#function
259 /* 0x0220 0 */ .size longest_match,(.-longest_match)
260
261
262 .L900000113:
263
264 .section ".text",#alloc,#execinstr,#progbits
265 /* 000000 0 */ .align 8
266 /* 000000 */ .skip 24
267 /* 0x0018 */ .align 4
268
269
270 .L900000286:
271
272 .section ".text",#alloc,#execinstr,#progbits
273
274 ! Begin Disassembling Ident
275 .ident "cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16" ! (NO SOURCE LINE)
276 .ident "acomp: Sun C 5.12 SunOS_sparc 2011/11/16" ! (/tmp/acomp.1329237379.172468.02.sd:24)
277 .ident "iropt: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16" ! (/tmp/acomp.1329237379.172468.02.sd:25)
278 .ident "cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16" ! (NO SOURCE LINE)
279 ! End Disassembling Ident