1 /* inffas86.c is a hand tuned assembler version of 2 * 3 * inffast.c -- fast decoding 4 * Copyright (C) 1995-2003 Mark Adler 5 * For conditions of distribution and use, see copyright notice in zlib.h 6 * 7 * Copyright (C) 2003 Chris Anderson <christop@charm.net> 8 * Please use the copyright conditions above. 9 * 10 * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also 11 * slightly quicker on x86 systems because, instead of using rep movsb to copy 12 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single 13 * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates 14 * from http://fedora.linux.duke.edu/fc1_x86_64 15 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with 16 * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version, 17 * when decompressing mozilla-source-1.3.tar.gz. 18 * 19 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from 20 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at 21 * the moment. I have successfully compiled and tested this code with gcc2.96, 22 * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S 23 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX 24 * enabled. I will attempt to merge the MMX code into this version. Newer 25 * versions of this and inffast.S can be found at 26 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ 27 */ 28 29 #include "zutil.h" 30 #include "inftrees.h" 31 #include "inflate.h" 32 #include "inffast.h" 33 34 /* Mark Adler's comments from inffast.c: */ 35 36 /* 37 Decode literal, length, and distance codes and write out the resulting 38 literal and match bytes until either not enough input or output is 39 available, an end-of-block is encountered, or a data error is encountered. 40 When large enough input and output buffers are supplied to inflate(), for 41 example, a 16K input buffer and a 64K output buffer, more than 95% of the 42 inflate execution time is spent in this routine. 43 44 Entry assumptions: 45 46 state->mode == LEN 47 strm->avail_in >= 6 48 strm->avail_out >= 258 49 start >= strm->avail_out 50 state->bits < 8 51 52 On return, state->mode is one of: 53 54 LEN -- ran out of enough output space or enough available input 55 TYPE -- reached end of block code, inflate() to interpret next block 56 BAD -- error in block data 57 58 Notes: 59 60 - The maximum input bits used by a length/distance pair is 15 bits for the 61 length code, 5 bits for the length extra, 15 bits for the distance code, 62 and 13 bits for the distance extra. This totals 48 bits, or six bytes. 63 Therefore if strm->avail_in >= 6, then there is enough input to avoid 64 checking for available input while decoding. 65 66 - The maximum bytes that a single length/distance pair can output is 258 67 bytes, which is the maximum length that can be coded. inflate_fast() 68 requires strm->avail_out >= 258 for each loop to avoid checking for 69 output space. 70 */ 71 void inflate_fast(strm, start) 72 z_streamp strm; 73 unsigned start; /* inflate()'s starting value for strm->avail_out */ 74 { 75 struct inflate_state FAR *state; 76 struct inffast_ar { 77 /* 64 32 x86 x86_64 */ 78 /* ar offset register */ 79 /* 0 0 */ void *esp; /* esp save */ 80 /* 8 4 */ void *ebp; /* ebp save */ 81 /* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */ 82 /* 24 12 */ unsigned char FAR *last; /* r9 while in < last */ 83 /* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */ 84 /* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */ 85 /* 48 24 */ unsigned char FAR *end; /* r10 while out < end */ 86 /* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */ 87 /* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */ 88 /* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */ 89 /* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */ 90 /* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */ 91 /* 92 48 */ unsigned wsize; /* window size */ 92 /* 96 52 */ unsigned write; /* window write index */ 93 /*100 56 */ unsigned lmask; /* r12 mask for lcode */ 94 /*104 60 */ unsigned dmask; /* r13 mask for dcode */ 95 /*108 64 */ unsigned len; /* r14 match length */ 96 /*112 68 */ unsigned dist; /* r15 match distance */ 97 /*116 72 */ unsigned status; /* set when state chng*/ 98 } ar; 99 100 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 ) 101 #define PAD_AVAIL_IN 6 102 #define PAD_AVAIL_OUT 258 103 #else 104 #define PAD_AVAIL_IN 5 105 #define PAD_AVAIL_OUT 257 106 #endif 107 108 /* copy state to local variables */ 109 state = (struct inflate_state FAR *)strm->state; 110 ar.in = strm->next_in; 111 ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN); 112 ar.out = strm->next_out; 113 ar.beg = ar.out - (start - strm->avail_out); 114 ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT); 115 ar.wsize = state->wsize; 116 ar.write = state->wnext; 117 ar.window = state->window; 118 ar.hold = state->hold; 119 ar.bits = state->bits; 120 ar.lcode = state->lencode; 121 ar.dcode = state->distcode; 122 ar.lmask = (1U << state->lenbits) - 1; 123 ar.dmask = (1U << state->distbits) - 1; 124 125 /* decode literals and length/distances until end-of-block or not enough 126 input data or output space */ 127 128 /* align in on 1/2 hold size boundary */ 129 while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) { 130 ar.hold += (unsigned long)*ar.in++ << ar.bits; 131 ar.bits += 8; 132 } 133 134 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 ) 135 __asm__ __volatile__ ( 136 " leaq %0, %%rax\n" 137 " movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */ 138 " movq %%rsp, (%%rax)\n" 139 " movq %%rax, %%rsp\n" /* make rsp point to &ar */ 140 " movq 16(%%rsp), %%rsi\n" /* rsi = in */ 141 " movq 32(%%rsp), %%rdi\n" /* rdi = out */ 142 " movq 24(%%rsp), %%r9\n" /* r9 = last */ 143 " movq 48(%%rsp), %%r10\n" /* r10 = end */ 144 " movq 64(%%rsp), %%rbp\n" /* rbp = lcode */ 145 " movq 72(%%rsp), %%r11\n" /* r11 = dcode */ 146 " movq 80(%%rsp), %%rdx\n" /* rdx = hold */ 147 " movl 88(%%rsp), %%ebx\n" /* ebx = bits */ 148 " movl 100(%%rsp), %%r12d\n" /* r12d = lmask */ 149 " movl 104(%%rsp), %%r13d\n" /* r13d = dmask */ 150 /* r14d = len */ 151 /* r15d = dist */ 152 " cld\n" 153 " cmpq %%rdi, %%r10\n" 154 " je .L_one_time\n" /* if only one decode left */ 155 " cmpq %%rsi, %%r9\n" 156 " je .L_one_time\n" 157 " jmp .L_do_loop\n" 158 159 ".L_one_time:\n" 160 " movq %%r12, %%r8\n" /* r8 = lmask */ 161 " cmpb $32, %%bl\n" 162 " ja .L_get_length_code_one_time\n" 163 164 " lodsl\n" /* eax = *(uint *)in++ */ 165 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 166 " addb $32, %%bl\n" /* bits += 32 */ 167 " shlq %%cl, %%rax\n" 168 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ 169 " jmp .L_get_length_code_one_time\n" 170 171 ".align 32,0x90\n" 172 ".L_while_test:\n" 173 " cmpq %%rdi, %%r10\n" 174 " jbe .L_break_loop\n" 175 " cmpq %%rsi, %%r9\n" 176 " jbe .L_break_loop\n" 177 178 ".L_do_loop:\n" 179 " movq %%r12, %%r8\n" /* r8 = lmask */ 180 " cmpb $32, %%bl\n" 181 " ja .L_get_length_code\n" /* if (32 < bits) */ 182 183 " lodsl\n" /* eax = *(uint *)in++ */ 184 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 185 " addb $32, %%bl\n" /* bits += 32 */ 186 " shlq %%cl, %%rax\n" 187 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ 188 189 ".L_get_length_code:\n" 190 " andq %%rdx, %%r8\n" /* r8 &= hold */ 191 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */ 192 193 " movb %%ah, %%cl\n" /* cl = this.bits */ 194 " subb %%ah, %%bl\n" /* bits -= this.bits */ 195 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */ 196 197 " testb %%al, %%al\n" 198 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ 199 200 " movq %%r12, %%r8\n" /* r8 = lmask */ 201 " shrl $16, %%eax\n" /* output this.val char */ 202 " stosb\n" 203 204 ".L_get_length_code_one_time:\n" 205 " andq %%rdx, %%r8\n" /* r8 &= hold */ 206 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */ 207 208 ".L_dolen:\n" 209 " movb %%ah, %%cl\n" /* cl = this.bits */ 210 " subb %%ah, %%bl\n" /* bits -= this.bits */ 211 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */ 212 213 " testb %%al, %%al\n" 214 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ 215 216 " shrl $16, %%eax\n" /* output this.val char */ 217 " stosb\n" 218 " jmp .L_while_test\n" 219 220 ".align 32,0x90\n" 221 ".L_test_for_length_base:\n" 222 " movl %%eax, %%r14d\n" /* len = this */ 223 " shrl $16, %%r14d\n" /* len = this.val */ 224 " movb %%al, %%cl\n" 225 226 " testb $16, %%al\n" 227 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */ 228 " andb $15, %%cl\n" /* op &= 15 */ 229 " jz .L_decode_distance\n" /* if (!op) */ 230 231 ".L_add_bits_to_len:\n" 232 " subb %%cl, %%bl\n" 233 " xorl %%eax, %%eax\n" 234 " incl %%eax\n" 235 " shll %%cl, %%eax\n" 236 " decl %%eax\n" 237 " andl %%edx, %%eax\n" /* eax &= hold */ 238 " shrq %%cl, %%rdx\n" 239 " addl %%eax, %%r14d\n" /* len += hold & mask[op] */ 240 241 ".L_decode_distance:\n" 242 " movq %%r13, %%r8\n" /* r8 = dmask */ 243 " cmpb $32, %%bl\n" 244 " ja .L_get_distance_code\n" /* if (32 < bits) */ 245 246 " lodsl\n" /* eax = *(uint *)in++ */ 247 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 248 " addb $32, %%bl\n" /* bits += 32 */ 249 " shlq %%cl, %%rax\n" 250 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ 251 252 ".L_get_distance_code:\n" 253 " andq %%rdx, %%r8\n" /* r8 &= hold */ 254 " movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */ 255 256 ".L_dodist:\n" 257 " movl %%eax, %%r15d\n" /* dist = this */ 258 " shrl $16, %%r15d\n" /* dist = this.val */ 259 " movb %%ah, %%cl\n" 260 " subb %%ah, %%bl\n" /* bits -= this.bits */ 261 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */ 262 " movb %%al, %%cl\n" /* cl = this.op */ 263 264 " testb $16, %%al\n" /* if ((op & 16) == 0) */ 265 " jz .L_test_for_second_level_dist\n" 266 " andb $15, %%cl\n" /* op &= 15 */ 267 " jz .L_check_dist_one\n" 268 269 ".L_add_bits_to_dist:\n" 270 " subb %%cl, %%bl\n" 271 " xorl %%eax, %%eax\n" 272 " incl %%eax\n" 273 " shll %%cl, %%eax\n" 274 " decl %%eax\n" /* (1 << op) - 1 */ 275 " andl %%edx, %%eax\n" /* eax &= hold */ 276 " shrq %%cl, %%rdx\n" 277 " addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */ 278 279 ".L_check_window:\n" 280 " movq %%rsi, %%r8\n" /* save in so from can use it's reg */ 281 " movq %%rdi, %%rax\n" 282 " subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */ 283 284 " cmpl %%r15d, %%eax\n" 285 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */ 286 287 " movl %%r14d, %%ecx\n" /* ecx = len */ 288 " movq %%rdi, %%rsi\n" 289 " subq %%r15, %%rsi\n" /* from = out - dist */ 290 291 " sarl %%ecx\n" 292 " jnc .L_copy_two\n" /* if len % 2 == 0 */ 293 294 " rep movsw\n" 295 " movb (%%rsi), %%al\n" 296 " movb %%al, (%%rdi)\n" 297 " incq %%rdi\n" 298 299 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */ 300 " jmp .L_while_test\n" 301 302 ".L_copy_two:\n" 303 " rep movsw\n" 304 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */ 305 " jmp .L_while_test\n" 306 307 ".align 32,0x90\n" 308 ".L_check_dist_one:\n" 309 " cmpl $1, %%r15d\n" /* if dist 1, is a memset */ 310 " jne .L_check_window\n" 311 " cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */ 312 " je .L_check_window\n" 313 314 " movl %%r14d, %%ecx\n" /* ecx = len */ 315 " movb -1(%%rdi), %%al\n" 316 " movb %%al, %%ah\n" 317 318 " sarl %%ecx\n" 319 " jnc .L_set_two\n" 320 " movb %%al, (%%rdi)\n" 321 " incq %%rdi\n" 322 323 ".L_set_two:\n" 324 " rep stosw\n" 325 " jmp .L_while_test\n" 326 327 ".align 32,0x90\n" 328 ".L_test_for_second_level_length:\n" 329 " testb $64, %%al\n" 330 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */ 331 332 " xorl %%eax, %%eax\n" 333 " incl %%eax\n" 334 " shll %%cl, %%eax\n" 335 " decl %%eax\n" 336 " andl %%edx, %%eax\n" /* eax &= hold */ 337 " addl %%r14d, %%eax\n" /* eax += len */ 338 " movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/ 339 " jmp .L_dolen\n" 340 341 ".align 32,0x90\n" 342 ".L_test_for_second_level_dist:\n" 343 " testb $64, %%al\n" 344 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */ 345 346 " xorl %%eax, %%eax\n" 347 " incl %%eax\n" 348 " shll %%cl, %%eax\n" 349 " decl %%eax\n" 350 " andl %%edx, %%eax\n" /* eax &= hold */ 351 " addl %%r15d, %%eax\n" /* eax += dist */ 352 " movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/ 353 " jmp .L_dodist\n" 354 355 ".align 32,0x90\n" 356 ".L_clip_window:\n" 357 " movl %%eax, %%ecx\n" /* ecx = nbytes */ 358 " movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */ 359 " negl %%ecx\n" /* nbytes = -nbytes */ 360 361 " cmpl %%r15d, %%eax\n" 362 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */ 363 364 " addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */ 365 " cmpl $0, 96(%%rsp)\n" 366 " jne .L_wrap_around_window\n" /* if (write != 0) */ 367 368 " movq 56(%%rsp), %%rsi\n" /* from = window */ 369 " subl %%ecx, %%eax\n" /* eax -= nbytes */ 370 " addq %%rax, %%rsi\n" /* from += wsize - nbytes */ 371 372 " movl %%r14d, %%eax\n" /* eax = len */ 373 " cmpl %%ecx, %%r14d\n" 374 " jbe .L_do_copy\n" /* if (nbytes >= len) */ 375 376 " subl %%ecx, %%eax\n" /* eax -= nbytes */ 377 " rep movsb\n" 378 " movq %%rdi, %%rsi\n" 379 " subq %%r15, %%rsi\n" /* from = &out[ -dist ] */ 380 " jmp .L_do_copy\n" 381 382 ".align 32,0x90\n" 383 ".L_wrap_around_window:\n" 384 " movl 96(%%rsp), %%eax\n" /* eax = write */ 385 " cmpl %%eax, %%ecx\n" 386 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */ 387 388 " movl 92(%%rsp), %%esi\n" /* from = wsize */ 389 " addq 56(%%rsp), %%rsi\n" /* from += window */ 390 " addq %%rax, %%rsi\n" /* from += write */ 391 " subq %%rcx, %%rsi\n" /* from -= nbytes */ 392 " subl %%eax, %%ecx\n" /* nbytes -= write */ 393 394 " movl %%r14d, %%eax\n" /* eax = len */ 395 " cmpl %%ecx, %%eax\n" 396 " jbe .L_do_copy\n" /* if (nbytes >= len) */ 397 398 " subl %%ecx, %%eax\n" /* len -= nbytes */ 399 " rep movsb\n" 400 " movq 56(%%rsp), %%rsi\n" /* from = window */ 401 " movl 96(%%rsp), %%ecx\n" /* nbytes = write */ 402 " cmpl %%ecx, %%eax\n" 403 " jbe .L_do_copy\n" /* if (nbytes >= len) */ 404 405 " subl %%ecx, %%eax\n" /* len -= nbytes */ 406 " rep movsb\n" 407 " movq %%rdi, %%rsi\n" 408 " subq %%r15, %%rsi\n" /* from = out - dist */ 409 " jmp .L_do_copy\n" 410 411 ".align 32,0x90\n" 412 ".L_contiguous_in_window:\n" 413 " movq 56(%%rsp), %%rsi\n" /* rsi = window */ 414 " addq %%rax, %%rsi\n" 415 " subq %%rcx, %%rsi\n" /* from += write - nbytes */ 416 417 " movl %%r14d, %%eax\n" /* eax = len */ 418 " cmpl %%ecx, %%eax\n" 419 " jbe .L_do_copy\n" /* if (nbytes >= len) */ 420 421 " subl %%ecx, %%eax\n" /* len -= nbytes */ 422 " rep movsb\n" 423 " movq %%rdi, %%rsi\n" 424 " subq %%r15, %%rsi\n" /* from = out - dist */ 425 " jmp .L_do_copy\n" /* if (nbytes >= len) */ 426 427 ".align 32,0x90\n" 428 ".L_do_copy:\n" 429 " movl %%eax, %%ecx\n" /* ecx = len */ 430 " rep movsb\n" 431 432 " movq %%r8, %%rsi\n" /* move in back to %esi, toss from */ 433 " jmp .L_while_test\n" 434 435 ".L_test_for_end_of_block:\n" 436 " testb $32, %%al\n" 437 " jz .L_invalid_literal_length_code\n" 438 " movl $1, 116(%%rsp)\n" 439 " jmp .L_break_loop_with_status\n" 440 441 ".L_invalid_literal_length_code:\n" 442 " movl $2, 116(%%rsp)\n" 443 " jmp .L_break_loop_with_status\n" 444 445 ".L_invalid_distance_code:\n" 446 " movl $3, 116(%%rsp)\n" 447 " jmp .L_break_loop_with_status\n" 448 449 ".L_invalid_distance_too_far:\n" 450 " movl $4, 116(%%rsp)\n" 451 " jmp .L_break_loop_with_status\n" 452 453 ".L_break_loop:\n" 454 " movl $0, 116(%%rsp)\n" 455 456 ".L_break_loop_with_status:\n" 457 /* put in, out, bits, and hold back into ar and pop esp */ 458 " movq %%rsi, 16(%%rsp)\n" /* in */ 459 " movq %%rdi, 32(%%rsp)\n" /* out */ 460 " movl %%ebx, 88(%%rsp)\n" /* bits */ 461 " movq %%rdx, 80(%%rsp)\n" /* hold */ 462 " movq (%%rsp), %%rax\n" /* restore rbp and rsp */ 463 " movq 8(%%rsp), %%rbp\n" 464 " movq %%rax, %%rsp\n" 465 : 466 : "m" (ar) 467 : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", 468 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" 469 ); 470 #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 ) 471 __asm__ __volatile__ ( 472 " leal %0, %%eax\n" 473 " movl %%esp, (%%eax)\n" /* save esp, ebp */ 474 " movl %%ebp, 4(%%eax)\n" 475 " movl %%eax, %%esp\n" 476 " movl 8(%%esp), %%esi\n" /* esi = in */ 477 " movl 16(%%esp), %%edi\n" /* edi = out */ 478 " movl 40(%%esp), %%edx\n" /* edx = hold */ 479 " movl 44(%%esp), %%ebx\n" /* ebx = bits */ 480 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ 481 482 " cld\n" 483 " jmp .L_do_loop\n" 484 485 ".align 32,0x90\n" 486 ".L_while_test:\n" 487 " cmpl %%edi, 24(%%esp)\n" /* out < end */ 488 " jbe .L_break_loop\n" 489 " cmpl %%esi, 12(%%esp)\n" /* in < last */ 490 " jbe .L_break_loop\n" 491 492 ".L_do_loop:\n" 493 " cmpb $15, %%bl\n" 494 " ja .L_get_length_code\n" /* if (15 < bits) */ 495 496 " xorl %%eax, %%eax\n" 497 " lodsw\n" /* al = *(ushort *)in++ */ 498 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 499 " addb $16, %%bl\n" /* bits += 16 */ 500 " shll %%cl, %%eax\n" 501 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ 502 503 ".L_get_length_code:\n" 504 " movl 56(%%esp), %%eax\n" /* eax = lmask */ 505 " andl %%edx, %%eax\n" /* eax &= hold */ 506 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */ 507 508 ".L_dolen:\n" 509 " movb %%ah, %%cl\n" /* cl = this.bits */ 510 " subb %%ah, %%bl\n" /* bits -= this.bits */ 511 " shrl %%cl, %%edx\n" /* hold >>= this.bits */ 512 513 " testb %%al, %%al\n" 514 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ 515 516 " shrl $16, %%eax\n" /* output this.val char */ 517 " stosb\n" 518 " jmp .L_while_test\n" 519 520 ".align 32,0x90\n" 521 ".L_test_for_length_base:\n" 522 " movl %%eax, %%ecx\n" /* len = this */ 523 " shrl $16, %%ecx\n" /* len = this.val */ 524 " movl %%ecx, 64(%%esp)\n" /* save len */ 525 " movb %%al, %%cl\n" 526 527 " testb $16, %%al\n" 528 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */ 529 " andb $15, %%cl\n" /* op &= 15 */ 530 " jz .L_decode_distance\n" /* if (!op) */ 531 " cmpb %%cl, %%bl\n" 532 " jae .L_add_bits_to_len\n" /* if (op <= bits) */ 533 534 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */ 535 " xorl %%eax, %%eax\n" 536 " lodsw\n" /* al = *(ushort *)in++ */ 537 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 538 " addb $16, %%bl\n" /* bits += 16 */ 539 " shll %%cl, %%eax\n" 540 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ 541 " movb %%ch, %%cl\n" /* move op back to ecx */ 542 543 ".L_add_bits_to_len:\n" 544 " subb %%cl, %%bl\n" 545 " xorl %%eax, %%eax\n" 546 " incl %%eax\n" 547 " shll %%cl, %%eax\n" 548 " decl %%eax\n" 549 " andl %%edx, %%eax\n" /* eax &= hold */ 550 " shrl %%cl, %%edx\n" 551 " addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */ 552 553 ".L_decode_distance:\n" 554 " cmpb $15, %%bl\n" 555 " ja .L_get_distance_code\n" /* if (15 < bits) */ 556 557 " xorl %%eax, %%eax\n" 558 " lodsw\n" /* al = *(ushort *)in++ */ 559 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 560 " addb $16, %%bl\n" /* bits += 16 */ 561 " shll %%cl, %%eax\n" 562 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ 563 564 ".L_get_distance_code:\n" 565 " movl 60(%%esp), %%eax\n" /* eax = dmask */ 566 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */ 567 " andl %%edx, %%eax\n" /* eax &= hold */ 568 " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */ 569 570 ".L_dodist:\n" 571 " movl %%eax, %%ebp\n" /* dist = this */ 572 " shrl $16, %%ebp\n" /* dist = this.val */ 573 " movb %%ah, %%cl\n" 574 " subb %%ah, %%bl\n" /* bits -= this.bits */ 575 " shrl %%cl, %%edx\n" /* hold >>= this.bits */ 576 " movb %%al, %%cl\n" /* cl = this.op */ 577 578 " testb $16, %%al\n" /* if ((op & 16) == 0) */ 579 " jz .L_test_for_second_level_dist\n" 580 " andb $15, %%cl\n" /* op &= 15 */ 581 " jz .L_check_dist_one\n" 582 " cmpb %%cl, %%bl\n" 583 " jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */ 584 585 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */ 586 " xorl %%eax, %%eax\n" 587 " lodsw\n" /* al = *(ushort *)in++ */ 588 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 589 " addb $16, %%bl\n" /* bits += 16 */ 590 " shll %%cl, %%eax\n" 591 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ 592 " movb %%ch, %%cl\n" /* move op back to ecx */ 593 594 ".L_add_bits_to_dist:\n" 595 " subb %%cl, %%bl\n" 596 " xorl %%eax, %%eax\n" 597 " incl %%eax\n" 598 " shll %%cl, %%eax\n" 599 " decl %%eax\n" /* (1 << op) - 1 */ 600 " andl %%edx, %%eax\n" /* eax &= hold */ 601 " shrl %%cl, %%edx\n" 602 " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */ 603 604 ".L_check_window:\n" 605 " movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */ 606 " movl %%edi, %%eax\n" 607 " subl 20(%%esp), %%eax\n" /* nbytes = out - beg */ 608 609 " cmpl %%ebp, %%eax\n" 610 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */ 611 612 " movl 64(%%esp), %%ecx\n" /* ecx = len */ 613 " movl %%edi, %%esi\n" 614 " subl %%ebp, %%esi\n" /* from = out - dist */ 615 616 " sarl %%ecx\n" 617 " jnc .L_copy_two\n" /* if len % 2 == 0 */ 618 619 " rep movsw\n" 620 " movb (%%esi), %%al\n" 621 " movb %%al, (%%edi)\n" 622 " incl %%edi\n" 623 624 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ 625 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ 626 " jmp .L_while_test\n" 627 628 ".L_copy_two:\n" 629 " rep movsw\n" 630 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ 631 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ 632 " jmp .L_while_test\n" 633 634 ".align 32,0x90\n" 635 ".L_check_dist_one:\n" 636 " cmpl $1, %%ebp\n" /* if dist 1, is a memset */ 637 " jne .L_check_window\n" 638 " cmpl %%edi, 20(%%esp)\n" 639 " je .L_check_window\n" /* out == beg, if outside window */ 640 641 " movl 64(%%esp), %%ecx\n" /* ecx = len */ 642 " movb -1(%%edi), %%al\n" 643 " movb %%al, %%ah\n" 644 645 " sarl %%ecx\n" 646 " jnc .L_set_two\n" 647 " movb %%al, (%%edi)\n" 648 " incl %%edi\n" 649 650 ".L_set_two:\n" 651 " rep stosw\n" 652 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ 653 " jmp .L_while_test\n" 654 655 ".align 32,0x90\n" 656 ".L_test_for_second_level_length:\n" 657 " testb $64, %%al\n" 658 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */ 659 660 " xorl %%eax, %%eax\n" 661 " incl %%eax\n" 662 " shll %%cl, %%eax\n" 663 " decl %%eax\n" 664 " andl %%edx, %%eax\n" /* eax &= hold */ 665 " addl 64(%%esp), %%eax\n" /* eax += len */ 666 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/ 667 " jmp .L_dolen\n" 668 669 ".align 32,0x90\n" 670 ".L_test_for_second_level_dist:\n" 671 " testb $64, %%al\n" 672 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */ 673 674 " xorl %%eax, %%eax\n" 675 " incl %%eax\n" 676 " shll %%cl, %%eax\n" 677 " decl %%eax\n" 678 " andl %%edx, %%eax\n" /* eax &= hold */ 679 " addl %%ebp, %%eax\n" /* eax += dist */ 680 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */ 681 " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/ 682 " jmp .L_dodist\n" 683 684 ".align 32,0x90\n" 685 ".L_clip_window:\n" 686 " movl %%eax, %%ecx\n" 687 " movl 48(%%esp), %%eax\n" /* eax = wsize */ 688 " negl %%ecx\n" /* nbytes = -nbytes */ 689 " movl 28(%%esp), %%esi\n" /* from = window */ 690 691 " cmpl %%ebp, %%eax\n" 692 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */ 693 694 " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */ 695 " cmpl $0, 52(%%esp)\n" 696 " jne .L_wrap_around_window\n" /* if (write != 0) */ 697 698 " subl %%ecx, %%eax\n" 699 " addl %%eax, %%esi\n" /* from += wsize - nbytes */ 700 701 " movl 64(%%esp), %%eax\n" /* eax = len */ 702 " cmpl %%ecx, %%eax\n" 703 " jbe .L_do_copy\n" /* if (nbytes >= len) */ 704 705 " subl %%ecx, %%eax\n" /* len -= nbytes */ 706 " rep movsb\n" 707 " movl %%edi, %%esi\n" 708 " subl %%ebp, %%esi\n" /* from = out - dist */ 709 " jmp .L_do_copy\n" 710 711 ".align 32,0x90\n" 712 ".L_wrap_around_window:\n" 713 " movl 52(%%esp), %%eax\n" /* eax = write */ 714 " cmpl %%eax, %%ecx\n" 715 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */ 716 717 " addl 48(%%esp), %%esi\n" /* from += wsize */ 718 " addl %%eax, %%esi\n" /* from += write */ 719 " subl %%ecx, %%esi\n" /* from -= nbytes */ 720 " subl %%eax, %%ecx\n" /* nbytes -= write */ 721 722 " movl 64(%%esp), %%eax\n" /* eax = len */ 723 " cmpl %%ecx, %%eax\n" 724 " jbe .L_do_copy\n" /* if (nbytes >= len) */ 725 726 " subl %%ecx, %%eax\n" /* len -= nbytes */ 727 " rep movsb\n" 728 " movl 28(%%esp), %%esi\n" /* from = window */ 729 " movl 52(%%esp), %%ecx\n" /* nbytes = write */ 730 " cmpl %%ecx, %%eax\n" 731 " jbe .L_do_copy\n" /* if (nbytes >= len) */ 732 733 " subl %%ecx, %%eax\n" /* len -= nbytes */ 734 " rep movsb\n" 735 " movl %%edi, %%esi\n" 736 " subl %%ebp, %%esi\n" /* from = out - dist */ 737 " jmp .L_do_copy\n" 738 739 ".align 32,0x90\n" 740 ".L_contiguous_in_window:\n" 741 " addl %%eax, %%esi\n" 742 " subl %%ecx, %%esi\n" /* from += write - nbytes */ 743 744 " movl 64(%%esp), %%eax\n" /* eax = len */ 745 " cmpl %%ecx, %%eax\n" 746 " jbe .L_do_copy\n" /* if (nbytes >= len) */ 747 748 " subl %%ecx, %%eax\n" /* len -= nbytes */ 749 " rep movsb\n" 750 " movl %%edi, %%esi\n" 751 " subl %%ebp, %%esi\n" /* from = out - dist */ 752 " jmp .L_do_copy\n" /* if (nbytes >= len) */ 753 754 ".align 32,0x90\n" 755 ".L_do_copy:\n" 756 " movl %%eax, %%ecx\n" 757 " rep movsb\n" 758 759 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ 760 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ 761 " jmp .L_while_test\n" 762 763 ".L_test_for_end_of_block:\n" 764 " testb $32, %%al\n" 765 " jz .L_invalid_literal_length_code\n" 766 " movl $1, 72(%%esp)\n" 767 " jmp .L_break_loop_with_status\n" 768 769 ".L_invalid_literal_length_code:\n" 770 " movl $2, 72(%%esp)\n" 771 " jmp .L_break_loop_with_status\n" 772 773 ".L_invalid_distance_code:\n" 774 " movl $3, 72(%%esp)\n" 775 " jmp .L_break_loop_with_status\n" 776 777 ".L_invalid_distance_too_far:\n" 778 " movl 8(%%esp), %%esi\n" 779 " movl $4, 72(%%esp)\n" 780 " jmp .L_break_loop_with_status\n" 781 782 ".L_break_loop:\n" 783 " movl $0, 72(%%esp)\n" 784 785 ".L_break_loop_with_status:\n" 786 /* put in, out, bits, and hold back into ar and pop esp */ 787 " movl %%esi, 8(%%esp)\n" /* save in */ 788 " movl %%edi, 16(%%esp)\n" /* save out */ 789 " movl %%ebx, 44(%%esp)\n" /* save bits */ 790 " movl %%edx, 40(%%esp)\n" /* save hold */ 791 " movl 4(%%esp), %%ebp\n" /* restore esp, ebp */ 792 " movl (%%esp), %%esp\n" 793 : 794 : "m" (ar) 795 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" 796 ); 797 #elif defined( _MSC_VER ) && ! defined( _M_AMD64 ) 798 __asm { 799 lea eax, ar 800 mov [eax], esp /* save esp, ebp */ 801 mov [eax+4], ebp 802 mov esp, eax 803 mov esi, [esp+8] /* esi = in */ 804 mov edi, [esp+16] /* edi = out */ 805 mov edx, [esp+40] /* edx = hold */ 806 mov ebx, [esp+44] /* ebx = bits */ 807 mov ebp, [esp+32] /* ebp = lcode */ 808 809 cld 810 jmp L_do_loop 811 812 ALIGN 4 813 L_while_test: 814 cmp [esp+24], edi 815 jbe L_break_loop 816 cmp [esp+12], esi 817 jbe L_break_loop 818 819 L_do_loop: 820 cmp bl, 15 821 ja L_get_length_code /* if (15 < bits) */ 822 823 xor eax, eax 824 lodsw /* al = *(ushort *)in++ */ 825 mov cl, bl /* cl = bits, needs it for shifting */ 826 add bl, 16 /* bits += 16 */ 827 shl eax, cl 828 or edx, eax /* hold |= *((ushort *)in)++ << bits */ 829 830 L_get_length_code: 831 mov eax, [esp+56] /* eax = lmask */ 832 and eax, edx /* eax &= hold */ 833 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */ 834 835 L_dolen: 836 mov cl, ah /* cl = this.bits */ 837 sub bl, ah /* bits -= this.bits */ 838 shr edx, cl /* hold >>= this.bits */ 839 840 test al, al 841 jnz L_test_for_length_base /* if (op != 0) 45.7% */ 842 843 shr eax, 16 /* output this.val char */ 844 stosb 845 jmp L_while_test 846 847 ALIGN 4 848 L_test_for_length_base: 849 mov ecx, eax /* len = this */ 850 shr ecx, 16 /* len = this.val */ 851 mov [esp+64], ecx /* save len */ 852 mov cl, al 853 854 test al, 16 855 jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */ 856 and cl, 15 /* op &= 15 */ 857 jz L_decode_distance /* if (!op) */ 858 cmp bl, cl 859 jae L_add_bits_to_len /* if (op <= bits) */ 860 861 mov ch, cl /* stash op in ch, freeing cl */ 862 xor eax, eax 863 lodsw /* al = *(ushort *)in++ */ 864 mov cl, bl /* cl = bits, needs it for shifting */ 865 add bl, 16 /* bits += 16 */ 866 shl eax, cl 867 or edx, eax /* hold |= *((ushort *)in)++ << bits */ 868 mov cl, ch /* move op back to ecx */ 869 870 L_add_bits_to_len: 871 sub bl, cl 872 xor eax, eax 873 inc eax 874 shl eax, cl 875 dec eax 876 and eax, edx /* eax &= hold */ 877 shr edx, cl 878 add [esp+64], eax /* len += hold & mask[op] */ 879 880 L_decode_distance: 881 cmp bl, 15 882 ja L_get_distance_code /* if (15 < bits) */ 883 884 xor eax, eax 885 lodsw /* al = *(ushort *)in++ */ 886 mov cl, bl /* cl = bits, needs it for shifting */ 887 add bl, 16 /* bits += 16 */ 888 shl eax, cl 889 or edx, eax /* hold |= *((ushort *)in)++ << bits */ 890 891 L_get_distance_code: 892 mov eax, [esp+60] /* eax = dmask */ 893 mov ecx, [esp+36] /* ecx = dcode */ 894 and eax, edx /* eax &= hold */ 895 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */ 896 897 L_dodist: 898 mov ebp, eax /* dist = this */ 899 shr ebp, 16 /* dist = this.val */ 900 mov cl, ah 901 sub bl, ah /* bits -= this.bits */ 902 shr edx, cl /* hold >>= this.bits */ 903 mov cl, al /* cl = this.op */ 904 905 test al, 16 /* if ((op & 16) == 0) */ 906 jz L_test_for_second_level_dist 907 and cl, 15 /* op &= 15 */ 908 jz L_check_dist_one 909 cmp bl, cl 910 jae L_add_bits_to_dist /* if (op <= bits) 97.6% */ 911 912 mov ch, cl /* stash op in ch, freeing cl */ 913 xor eax, eax 914 lodsw /* al = *(ushort *)in++ */ 915 mov cl, bl /* cl = bits, needs it for shifting */ 916 add bl, 16 /* bits += 16 */ 917 shl eax, cl 918 or edx, eax /* hold |= *((ushort *)in)++ << bits */ 919 mov cl, ch /* move op back to ecx */ 920 921 L_add_bits_to_dist: 922 sub bl, cl 923 xor eax, eax 924 inc eax 925 shl eax, cl 926 dec eax /* (1 << op) - 1 */ 927 and eax, edx /* eax &= hold */ 928 shr edx, cl 929 add ebp, eax /* dist += hold & ((1 << op) - 1) */ 930 931 L_check_window: 932 mov [esp+8], esi /* save in so from can use it's reg */ 933 mov eax, edi 934 sub eax, [esp+20] /* nbytes = out - beg */ 935 936 cmp eax, ebp 937 jb L_clip_window /* if (dist > nbytes) 4.2% */ 938 939 mov ecx, [esp+64] /* ecx = len */ 940 mov esi, edi 941 sub esi, ebp /* from = out - dist */ 942 943 sar ecx, 1 944 jnc L_copy_two 945 946 rep movsw 947 mov al, [esi] 948 mov [edi], al 949 inc edi 950 951 mov esi, [esp+8] /* move in back to %esi, toss from */ 952 mov ebp, [esp+32] /* ebp = lcode */ 953 jmp L_while_test 954 955 L_copy_two: 956 rep movsw 957 mov esi, [esp+8] /* move in back to %esi, toss from */ 958 mov ebp, [esp+32] /* ebp = lcode */ 959 jmp L_while_test 960 961 ALIGN 4 962 L_check_dist_one: 963 cmp ebp, 1 /* if dist 1, is a memset */ 964 jne L_check_window 965 cmp [esp+20], edi 966 je L_check_window /* out == beg, if outside window */ 967 968 mov ecx, [esp+64] /* ecx = len */ 969 mov al, [edi-1] 970 mov ah, al 971 972 sar ecx, 1 973 jnc L_set_two 974 mov [edi], al /* memset out with from[-1] */ 975 inc edi 976 977 L_set_two: 978 rep stosw 979 mov ebp, [esp+32] /* ebp = lcode */ 980 jmp L_while_test 981 982 ALIGN 4 983 L_test_for_second_level_length: 984 test al, 64 985 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */ 986 987 xor eax, eax 988 inc eax 989 shl eax, cl 990 dec eax 991 and eax, edx /* eax &= hold */ 992 add eax, [esp+64] /* eax += len */ 993 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/ 994 jmp L_dolen 995 996 ALIGN 4 997 L_test_for_second_level_dist: 998 test al, 64 999 jnz L_invalid_distance_code /* if ((op & 64) != 0) */ 1000 1001 xor eax, eax 1002 inc eax 1003 shl eax, cl 1004 dec eax 1005 and eax, edx /* eax &= hold */ 1006 add eax, ebp /* eax += dist */ 1007 mov ecx, [esp+36] /* ecx = dcode */ 1008 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/ 1009 jmp L_dodist 1010 1011 ALIGN 4 1012 L_clip_window: 1013 mov ecx, eax 1014 mov eax, [esp+48] /* eax = wsize */ 1015 neg ecx /* nbytes = -nbytes */ 1016 mov esi, [esp+28] /* from = window */ 1017 1018 cmp eax, ebp 1019 jb L_invalid_distance_too_far /* if (dist > wsize) */ 1020 1021 add ecx, ebp /* nbytes = dist - nbytes */ 1022 cmp dword ptr [esp+52], 0 1023 jne L_wrap_around_window /* if (write != 0) */ 1024 1025 sub eax, ecx 1026 add esi, eax /* from += wsize - nbytes */ 1027 1028 mov eax, [esp+64] /* eax = len */ 1029 cmp eax, ecx 1030 jbe L_do_copy /* if (nbytes >= len) */ 1031 1032 sub eax, ecx /* len -= nbytes */ 1033 rep movsb 1034 mov esi, edi 1035 sub esi, ebp /* from = out - dist */ 1036 jmp L_do_copy 1037 1038 ALIGN 4 1039 L_wrap_around_window: 1040 mov eax, [esp+52] /* eax = write */ 1041 cmp ecx, eax 1042 jbe L_contiguous_in_window /* if (write >= nbytes) */ 1043 1044 add esi, [esp+48] /* from += wsize */ 1045 add esi, eax /* from += write */ 1046 sub esi, ecx /* from -= nbytes */ 1047 sub ecx, eax /* nbytes -= write */ 1048 1049 mov eax, [esp+64] /* eax = len */ 1050 cmp eax, ecx 1051 jbe L_do_copy /* if (nbytes >= len) */ 1052 1053 sub eax, ecx /* len -= nbytes */ 1054 rep movsb 1055 mov esi, [esp+28] /* from = window */ 1056 mov ecx, [esp+52] /* nbytes = write */ 1057 cmp eax, ecx 1058 jbe L_do_copy /* if (nbytes >= len) */ 1059 1060 sub eax, ecx /* len -= nbytes */ 1061 rep movsb 1062 mov esi, edi 1063 sub esi, ebp /* from = out - dist */ 1064 jmp L_do_copy 1065 1066 ALIGN 4 1067 L_contiguous_in_window: 1068 add esi, eax 1069 sub esi, ecx /* from += write - nbytes */ 1070 1071 mov eax, [esp+64] /* eax = len */ 1072 cmp eax, ecx 1073 jbe L_do_copy /* if (nbytes >= len) */ 1074 1075 sub eax, ecx /* len -= nbytes */ 1076 rep movsb 1077 mov esi, edi 1078 sub esi, ebp /* from = out - dist */ 1079 jmp L_do_copy 1080 1081 ALIGN 4 1082 L_do_copy: 1083 mov ecx, eax 1084 rep movsb 1085 1086 mov esi, [esp+8] /* move in back to %esi, toss from */ 1087 mov ebp, [esp+32] /* ebp = lcode */ 1088 jmp L_while_test 1089 1090 L_test_for_end_of_block: 1091 test al, 32 1092 jz L_invalid_literal_length_code 1093 mov dword ptr [esp+72], 1 1094 jmp L_break_loop_with_status 1095 1096 L_invalid_literal_length_code: 1097 mov dword ptr [esp+72], 2 1098 jmp L_break_loop_with_status 1099 1100 L_invalid_distance_code: 1101 mov dword ptr [esp+72], 3 1102 jmp L_break_loop_with_status 1103 1104 L_invalid_distance_too_far: 1105 mov esi, [esp+4] 1106 mov dword ptr [esp+72], 4 1107 jmp L_break_loop_with_status 1108 1109 L_break_loop: 1110 mov dword ptr [esp+72], 0 1111 1112 L_break_loop_with_status: 1113 /* put in, out, bits, and hold back into ar and pop esp */ 1114 mov [esp+8], esi /* save in */ 1115 mov [esp+16], edi /* save out */ 1116 mov [esp+44], ebx /* save bits */ 1117 mov [esp+40], edx /* save hold */ 1118 mov ebp, [esp+4] /* restore esp, ebp */ 1119 mov esp, [esp] 1120 } 1121 #else 1122 #error "x86 architecture not defined" 1123 #endif 1124 1125 if (ar.status > 1) { 1126 if (ar.status == 2) 1127 strm->msg = "invalid literal/length code"; 1128 else if (ar.status == 3) 1129 strm->msg = "invalid distance code"; 1130 else 1131 strm->msg = "invalid distance too far back"; 1132 state->mode = BAD; 1133 } 1134 else if ( ar.status == 1 ) { 1135 state->mode = TYPE; 1136 } 1137 1138 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 1139 ar.len = ar.bits >> 3; 1140 ar.in -= ar.len; 1141 ar.bits -= ar.len << 3; 1142 ar.hold &= (1U << ar.bits) - 1; 1143 1144 /* update state and return */ 1145 strm->next_in = ar.in; 1146 strm->next_out = ar.out; 1147 strm->avail_in = (unsigned)(ar.in < ar.last ? 1148 PAD_AVAIL_IN + (ar.last - ar.in) : 1149 PAD_AVAIL_IN - (ar.in - ar.last)); 1150 strm->avail_out = (unsigned)(ar.out < ar.end ? 1151 PAD_AVAIL_OUT + (ar.end - ar.out) : 1152 PAD_AVAIL_OUT - (ar.out - ar.end)); 1153 state->hold = ar.hold; 1154 state->bits = ar.bits; 1155 return; 1156 } 1157