1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding 2 ; * 3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code 4 ; * 5 ; * Copyright (C) 1995-2003 Mark Adler 6 ; * For conditions of distribution and use, see copyright notice in zlib.h 7 ; * 8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net> 9 ; * Please use the copyright conditions above. 10 ; * 11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from 12 ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at 13 ; * the moment. I have successfully compiled and tested this code with gcc2.96, 14 ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S 15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX 16 ; * enabled. I will attempt to merge the MMX code into this version. Newer 17 ; * versions of this and inffast.S can be found at 18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ 19 ; * 20 ; * 2005 : modification by Gilles Vollant 21 ; */ 22 ; For Visual C++ 4.x and higher and ML 6.x and higher 23 ; ml.exe is in directory \MASM611C of Win95 DDK 24 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm 25 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ 26 ; 27 ; 28 ; compile with command line option 29 ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm 30 31 ; if you define NO_GZIP (see inflate.h), compile with 32 ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm 33 34 35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower 36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head 37 ; in inflate_state in inflate.h) 38 zlib1222sup equ 8 39 40 41 IFDEF GUNZIP 42 INFLATE_MODE_TYPE equ 11 43 INFLATE_MODE_BAD equ 26 44 ELSE 45 IFNDEF NO_GUNZIP 46 INFLATE_MODE_TYPE equ 11 47 INFLATE_MODE_BAD equ 26 48 ELSE 49 INFLATE_MODE_TYPE equ 3 50 INFLATE_MODE_BAD equ 17 51 ENDIF 52 ENDIF 53 54 55 ; 75 "inffast.S" 56 ;FILE "inffast.S" 57 58 ;;;GLOBAL _inflate_fast 59 60 ;;;SECTION .text 61 62 63 64 .586p 65 .mmx 66 67 name inflate_fast_x86 68 .MODEL FLAT 69 70 _DATA segment 71 inflate_fast_use_mmx: 72 dd 1 73 74 75 _TEXT segment 76 77 78 79 ALIGN 4 80 db 'Fast decoding Code from Chris Anderson' 81 db 0 82 83 ALIGN 4 84 invalid_literal_length_code_msg: 85 db 'invalid literal/length code' 86 db 0 87 88 ALIGN 4 89 invalid_distance_code_msg: 90 db 'invalid distance code' 91 db 0 92 93 ALIGN 4 94 invalid_distance_too_far_msg: 95 db 'invalid distance too far back' 96 db 0 97 98 99 ALIGN 4 100 inflate_fast_mask: 101 dd 0 102 dd 1 103 dd 3 104 dd 7 105 dd 15 106 dd 31 107 dd 63 108 dd 127 109 dd 255 110 dd 511 111 dd 1023 112 dd 2047 113 dd 4095 114 dd 8191 115 dd 16383 116 dd 32767 117 dd 65535 118 dd 131071 119 dd 262143 120 dd 524287 121 dd 1048575 122 dd 2097151 123 dd 4194303 124 dd 8388607 125 dd 16777215 126 dd 33554431 127 dd 67108863 128 dd 134217727 129 dd 268435455 130 dd 536870911 131 dd 1073741823 132 dd 2147483647 133 dd 4294967295 134 135 136 mode_state equ 0 ;/* state->mode */ 137 wsize_state equ (32+zlib1222sup) ;/* state->wsize */ 138 write_state equ (36+4+zlib1222sup) ;/* state->write */ 139 window_state equ (40+4+zlib1222sup) ;/* state->window */ 140 hold_state equ (44+4+zlib1222sup) ;/* state->hold */ 141 bits_state equ (48+4+zlib1222sup) ;/* state->bits */ 142 lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ 143 distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ 144 lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ 145 distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ 146 147 148 ;;SECTION .text 149 ; 205 "inffast.S" 150 ;GLOBAL inflate_fast_use_mmx 151 152 ;SECTION .data 153 154 155 ; GLOBAL inflate_fast_use_mmx:object 156 ;.size inflate_fast_use_mmx, 4 157 ; 226 "inffast.S" 158 ;SECTION .text 159 160 ALIGN 4 161 _inflate_fast proc near 162 .FPO (16, 4, 0, 0, 1, 0) 163 push edi 164 push esi 165 push ebp 166 push ebx 167 pushfd 168 sub esp,64 169 cld 170 171 172 173 174 mov esi, [esp+88] 175 mov edi, [esi+28] 176 177 178 179 180 181 182 183 mov edx, [esi+4] 184 mov eax, [esi+0] 185 186 add edx,eax 187 sub edx,11 188 189 mov [esp+44],eax 190 mov [esp+20],edx 191 192 mov ebp, [esp+92] 193 mov ecx, [esi+16] 194 mov ebx, [esi+12] 195 196 sub ebp,ecx 197 neg ebp 198 add ebp,ebx 199 200 sub ecx,257 201 add ecx,ebx 202 203 mov [esp+60],ebx 204 mov [esp+40],ebp 205 mov [esp+16],ecx 206 ; 285 "inffast.S" 207 mov eax, [edi+lencode_state] 208 mov ecx, [edi+distcode_state] 209 210 mov [esp+8],eax 211 mov [esp+12],ecx 212 213 mov eax,1 214 mov ecx, [edi+lenbits_state] 215 shl eax,cl 216 dec eax 217 mov [esp+0],eax 218 219 mov eax,1 220 mov ecx, [edi+distbits_state] 221 shl eax,cl 222 dec eax 223 mov [esp+4],eax 224 225 mov eax, [edi+wsize_state] 226 mov ecx, [edi+write_state] 227 mov edx, [edi+window_state] 228 229 mov [esp+52],eax 230 mov [esp+48],ecx 231 mov [esp+56],edx 232 233 mov ebp, [edi+hold_state] 234 mov ebx, [edi+bits_state] 235 ; 321 "inffast.S" 236 mov esi, [esp+44] 237 mov ecx, [esp+20] 238 cmp ecx,esi 239 ja L_align_long 240 241 add ecx,11 242 sub ecx,esi 243 mov eax,12 244 sub eax,ecx 245 lea edi, [esp+28] 246 rep movsb 247 mov ecx,eax 248 xor eax,eax 249 rep stosb 250 lea esi, [esp+28] 251 mov [esp+20],esi 252 jmp L_is_aligned 253 254 255 L_align_long: 256 test esi,3 257 jz L_is_aligned 258 xor eax,eax 259 mov al, [esi] 260 inc esi 261 mov ecx,ebx 262 add ebx,8 263 shl eax,cl 264 or ebp,eax 265 jmp L_align_long 266 267 L_is_aligned: 268 mov edi, [esp+60] 269 ; 366 "inffast.S" 270 L_check_mmx: 271 cmp dword ptr [inflate_fast_use_mmx],2 272 je L_init_mmx 273 ja L_do_loop 274 275 push eax 276 push ebx 277 push ecx 278 push edx 279 pushfd 280 mov eax, [esp] 281 xor dword ptr [esp],0200000h 282 283 284 285 286 popfd 287 pushfd 288 pop edx 289 xor edx,eax 290 jz L_dont_use_mmx 291 xor eax,eax 292 cpuid 293 cmp ebx,0756e6547h 294 jne L_dont_use_mmx 295 cmp ecx,06c65746eh 296 jne L_dont_use_mmx 297 cmp edx,049656e69h 298 jne L_dont_use_mmx 299 mov eax,1 300 cpuid 301 shr eax,8 302 and eax,15 303 cmp eax,6 304 jne L_dont_use_mmx 305 test edx,0800000h 306 jnz L_use_mmx 307 jmp L_dont_use_mmx 308 L_use_mmx: 309 mov dword ptr [inflate_fast_use_mmx],2 310 jmp L_check_mmx_pop 311 L_dont_use_mmx: 312 mov dword ptr [inflate_fast_use_mmx],3 313 L_check_mmx_pop: 314 pop edx 315 pop ecx 316 pop ebx 317 pop eax 318 jmp L_check_mmx 319 ; 426 "inffast.S" 320 ALIGN 4 321 L_do_loop: 322 ; 437 "inffast.S" 323 cmp bl,15 324 ja L_get_length_code 325 326 xor eax,eax 327 lodsw 328 mov cl,bl 329 add bl,16 330 shl eax,cl 331 or ebp,eax 332 333 L_get_length_code: 334 mov edx, [esp+0] 335 mov ecx, [esp+8] 336 and edx,ebp 337 mov eax, [ecx+edx*4] 338 339 L_dolen: 340 341 342 343 344 345 346 mov cl,ah 347 sub bl,ah 348 shr ebp,cl 349 350 351 352 353 354 355 test al,al 356 jnz L_test_for_length_base 357 358 shr eax,16 359 stosb 360 361 L_while_test: 362 363 364 cmp [esp+16],edi 365 jbe L_break_loop 366 367 cmp [esp+20],esi 368 ja L_do_loop 369 jmp L_break_loop 370 371 L_test_for_length_base: 372 ; 502 "inffast.S" 373 mov edx,eax 374 shr edx,16 375 mov cl,al 376 377 test al,16 378 jz L_test_for_second_level_length 379 and cl,15 380 jz L_save_len 381 cmp bl,cl 382 jae L_add_bits_to_len 383 384 mov ch,cl 385 xor eax,eax 386 lodsw 387 mov cl,bl 388 add bl,16 389 shl eax,cl 390 or ebp,eax 391 mov cl,ch 392 393 L_add_bits_to_len: 394 mov eax,1 395 shl eax,cl 396 dec eax 397 sub bl,cl 398 and eax,ebp 399 shr ebp,cl 400 add edx,eax 401 402 L_save_len: 403 mov [esp+24],edx 404 405 406 L_decode_distance: 407 ; 549 "inffast.S" 408 cmp bl,15 409 ja L_get_distance_code 410 411 xor eax,eax 412 lodsw 413 mov cl,bl 414 add bl,16 415 shl eax,cl 416 or ebp,eax 417 418 L_get_distance_code: 419 mov edx, [esp+4] 420 mov ecx, [esp+12] 421 and edx,ebp 422 mov eax, [ecx+edx*4] 423 424 425 L_dodist: 426 mov edx,eax 427 shr edx,16 428 mov cl,ah 429 sub bl,ah 430 shr ebp,cl 431 ; 584 "inffast.S" 432 mov cl,al 433 434 test al,16 435 jz L_test_for_second_level_dist 436 and cl,15 437 jz L_check_dist_one 438 cmp bl,cl 439 jae L_add_bits_to_dist 440 441 mov ch,cl 442 xor eax,eax 443 lodsw 444 mov cl,bl 445 add bl,16 446 shl eax,cl 447 or ebp,eax 448 mov cl,ch 449 450 L_add_bits_to_dist: 451 mov eax,1 452 shl eax,cl 453 dec eax 454 sub bl,cl 455 and eax,ebp 456 shr ebp,cl 457 add edx,eax 458 jmp L_check_window 459 460 L_check_window: 461 ; 625 "inffast.S" 462 mov [esp+44],esi 463 mov eax,edi 464 sub eax, [esp+40] 465 466 cmp eax,edx 467 jb L_clip_window 468 469 mov ecx, [esp+24] 470 mov esi,edi 471 sub esi,edx 472 473 sub ecx,3 474 mov al, [esi] 475 mov [edi],al 476 mov al, [esi+1] 477 mov dl, [esi+2] 478 add esi,3 479 mov [edi+1],al 480 mov [edi+2],dl 481 add edi,3 482 rep movsb 483 484 mov esi, [esp+44] 485 jmp L_while_test 486 487 ALIGN 4 488 L_check_dist_one: 489 cmp edx,1 490 jne L_check_window 491 cmp [esp+40],edi 492 je L_check_window 493 494 dec edi 495 mov ecx, [esp+24] 496 mov al, [edi] 497 sub ecx,3 498 499 mov [edi+1],al 500 mov [edi+2],al 501 mov [edi+3],al 502 add edi,4 503 rep stosb 504 505 jmp L_while_test 506 507 ALIGN 4 508 L_test_for_second_level_length: 509 510 511 512 513 test al,64 514 jnz L_test_for_end_of_block 515 516 mov eax,1 517 shl eax,cl 518 dec eax 519 and eax,ebp 520 add eax,edx 521 mov edx, [esp+8] 522 mov eax, [edx+eax*4] 523 jmp L_dolen 524 525 ALIGN 4 526 L_test_for_second_level_dist: 527 528 529 530 531 test al,64 532 jnz L_invalid_distance_code 533 534 mov eax,1 535 shl eax,cl 536 dec eax 537 and eax,ebp 538 add eax,edx 539 mov edx, [esp+12] 540 mov eax, [edx+eax*4] 541 jmp L_dodist 542 543 ALIGN 4 544 L_clip_window: 545 ; 721 "inffast.S" 546 mov ecx,eax 547 mov eax, [esp+52] 548 neg ecx 549 mov esi, [esp+56] 550 551 cmp eax,edx 552 jb L_invalid_distance_too_far 553 554 add ecx,edx 555 cmp dword ptr [esp+48],0 556 jne L_wrap_around_window 557 558 sub eax,ecx 559 add esi,eax 560 ; 749 "inffast.S" 561 mov eax, [esp+24] 562 cmp eax,ecx 563 jbe L_do_copy1 564 565 sub eax,ecx 566 rep movsb 567 mov esi,edi 568 sub esi,edx 569 jmp L_do_copy1 570 571 cmp eax,ecx 572 jbe L_do_copy1 573 574 sub eax,ecx 575 rep movsb 576 mov esi,edi 577 sub esi,edx 578 jmp L_do_copy1 579 580 L_wrap_around_window: 581 ; 793 "inffast.S" 582 mov eax, [esp+48] 583 cmp ecx,eax 584 jbe L_contiguous_in_window 585 586 add esi, [esp+52] 587 add esi,eax 588 sub esi,ecx 589 sub ecx,eax 590 591 592 mov eax, [esp+24] 593 cmp eax,ecx 594 jbe L_do_copy1 595 596 sub eax,ecx 597 rep movsb 598 mov esi, [esp+56] 599 mov ecx, [esp+48] 600 cmp eax,ecx 601 jbe L_do_copy1 602 603 sub eax,ecx 604 rep movsb 605 mov esi,edi 606 sub esi,edx 607 jmp L_do_copy1 608 609 L_contiguous_in_window: 610 ; 836 "inffast.S" 611 add esi,eax 612 sub esi,ecx 613 614 615 mov eax, [esp+24] 616 cmp eax,ecx 617 jbe L_do_copy1 618 619 sub eax,ecx 620 rep movsb 621 mov esi,edi 622 sub esi,edx 623 624 L_do_copy1: 625 ; 862 "inffast.S" 626 mov ecx,eax 627 rep movsb 628 629 mov esi, [esp+44] 630 jmp L_while_test 631 ; 878 "inffast.S" 632 ALIGN 4 633 L_init_mmx: 634 emms 635 636 637 638 639 640 movd mm0,ebp 641 mov ebp,ebx 642 ; 896 "inffast.S" 643 movd mm4,dword ptr [esp+0] 644 movq mm3,mm4 645 movd mm5,dword ptr [esp+4] 646 movq mm2,mm5 647 pxor mm1,mm1 648 mov ebx, [esp+8] 649 jmp L_do_loop_mmx 650 651 ALIGN 4 652 L_do_loop_mmx: 653 psrlq mm0,mm1 654 655 cmp ebp,32 656 ja L_get_length_code_mmx 657 658 movd mm6,ebp 659 movd mm7,dword ptr [esi] 660 add esi,4 661 psllq mm7,mm6 662 add ebp,32 663 por mm0,mm7 664 665 L_get_length_code_mmx: 666 pand mm4,mm0 667 movd eax,mm4 668 movq mm4,mm3 669 mov eax, [ebx+eax*4] 670 671 L_dolen_mmx: 672 movzx ecx,ah 673 movd mm1,ecx 674 sub ebp,ecx 675 676 test al,al 677 jnz L_test_for_length_base_mmx 678 679 shr eax,16 680 stosb 681 682 L_while_test_mmx: 683 684 685 cmp [esp+16],edi 686 jbe L_break_loop 687 688 cmp [esp+20],esi 689 ja L_do_loop_mmx 690 jmp L_break_loop 691 692 L_test_for_length_base_mmx: 693 694 mov edx,eax 695 shr edx,16 696 697 test al,16 698 jz L_test_for_second_level_length_mmx 699 and eax,15 700 jz L_decode_distance_mmx 701 702 psrlq mm0,mm1 703 movd mm1,eax 704 movd ecx,mm0 705 sub ebp,eax 706 and ecx, [inflate_fast_mask+eax*4] 707 add edx,ecx 708 709 L_decode_distance_mmx: 710 psrlq mm0,mm1 711 712 cmp ebp,32 713 ja L_get_dist_code_mmx 714 715 movd mm6,ebp 716 movd mm7,dword ptr [esi] 717 add esi,4 718 psllq mm7,mm6 719 add ebp,32 720 por mm0,mm7 721 722 L_get_dist_code_mmx: 723 mov ebx, [esp+12] 724 pand mm5,mm0 725 movd eax,mm5 726 movq mm5,mm2 727 mov eax, [ebx+eax*4] 728 729 L_dodist_mmx: 730 731 movzx ecx,ah 732 mov ebx,eax 733 shr ebx,16 734 sub ebp,ecx 735 movd mm1,ecx 736 737 test al,16 738 jz L_test_for_second_level_dist_mmx 739 and eax,15 740 jz L_check_dist_one_mmx 741 742 L_add_bits_to_dist_mmx: 743 psrlq mm0,mm1 744 movd mm1,eax 745 movd ecx,mm0 746 sub ebp,eax 747 and ecx, [inflate_fast_mask+eax*4] 748 add ebx,ecx 749 750 L_check_window_mmx: 751 mov [esp+44],esi 752 mov eax,edi 753 sub eax, [esp+40] 754 755 cmp eax,ebx 756 jb L_clip_window_mmx 757 758 mov ecx,edx 759 mov esi,edi 760 sub esi,ebx 761 762 sub ecx,3 763 mov al, [esi] 764 mov [edi],al 765 mov al, [esi+1] 766 mov dl, [esi+2] 767 add esi,3 768 mov [edi+1],al 769 mov [edi+2],dl 770 add edi,3 771 rep movsb 772 773 mov esi, [esp+44] 774 mov ebx, [esp+8] 775 jmp L_while_test_mmx 776 777 ALIGN 4 778 L_check_dist_one_mmx: 779 cmp ebx,1 780 jne L_check_window_mmx 781 cmp [esp+40],edi 782 je L_check_window_mmx 783 784 dec edi 785 mov ecx,edx 786 mov al, [edi] 787 sub ecx,3 788 789 mov [edi+1],al 790 mov [edi+2],al 791 mov [edi+3],al 792 add edi,4 793 rep stosb 794 795 mov ebx, [esp+8] 796 jmp L_while_test_mmx 797 798 ALIGN 4 799 L_test_for_second_level_length_mmx: 800 test al,64 801 jnz L_test_for_end_of_block 802 803 and eax,15 804 psrlq mm0,mm1 805 movd ecx,mm0 806 and ecx, [inflate_fast_mask+eax*4] 807 add ecx,edx 808 mov eax, [ebx+ecx*4] 809 jmp L_dolen_mmx 810 811 ALIGN 4 812 L_test_for_second_level_dist_mmx: 813 test al,64 814 jnz L_invalid_distance_code 815 816 and eax,15 817 psrlq mm0,mm1 818 movd ecx,mm0 819 and ecx, [inflate_fast_mask+eax*4] 820 mov eax, [esp+12] 821 add ecx,ebx 822 mov eax, [eax+ecx*4] 823 jmp L_dodist_mmx 824 825 ALIGN 4 826 L_clip_window_mmx: 827 828 mov ecx,eax 829 mov eax, [esp+52] 830 neg ecx 831 mov esi, [esp+56] 832 833 cmp eax,ebx 834 jb L_invalid_distance_too_far 835 836 add ecx,ebx 837 cmp dword ptr [esp+48],0 838 jne L_wrap_around_window_mmx 839 840 sub eax,ecx 841 add esi,eax 842 843 cmp edx,ecx 844 jbe L_do_copy1_mmx 845 846 sub edx,ecx 847 rep movsb 848 mov esi,edi 849 sub esi,ebx 850 jmp L_do_copy1_mmx 851 852 cmp edx,ecx 853 jbe L_do_copy1_mmx 854 855 sub edx,ecx 856 rep movsb 857 mov esi,edi 858 sub esi,ebx 859 jmp L_do_copy1_mmx 860 861 L_wrap_around_window_mmx: 862 863 mov eax, [esp+48] 864 cmp ecx,eax 865 jbe L_contiguous_in_window_mmx 866 867 add esi, [esp+52] 868 add esi,eax 869 sub esi,ecx 870 sub ecx,eax 871 872 873 cmp edx,ecx 874 jbe L_do_copy1_mmx 875 876 sub edx,ecx 877 rep movsb 878 mov esi, [esp+56] 879 mov ecx, [esp+48] 880 cmp edx,ecx 881 jbe L_do_copy1_mmx 882 883 sub edx,ecx 884 rep movsb 885 mov esi,edi 886 sub esi,ebx 887 jmp L_do_copy1_mmx 888 889 L_contiguous_in_window_mmx: 890 891 add esi,eax 892 sub esi,ecx 893 894 895 cmp edx,ecx 896 jbe L_do_copy1_mmx 897 898 sub edx,ecx 899 rep movsb 900 mov esi,edi 901 sub esi,ebx 902 903 L_do_copy1_mmx: 904 905 906 mov ecx,edx 907 rep movsb 908 909 mov esi, [esp+44] 910 mov ebx, [esp+8] 911 jmp L_while_test_mmx 912 ; 1174 "inffast.S" 913 L_invalid_distance_code: 914 915 916 917 918 919 mov ecx, invalid_distance_code_msg 920 mov edx,INFLATE_MODE_BAD 921 jmp L_update_stream_state 922 923 L_test_for_end_of_block: 924 925 926 927 928 929 test al,32 930 jz L_invalid_literal_length_code 931 932 mov ecx,0 933 mov edx,INFLATE_MODE_TYPE 934 jmp L_update_stream_state 935 936 L_invalid_literal_length_code: 937 938 939 940 941 942 mov ecx, invalid_literal_length_code_msg 943 mov edx,INFLATE_MODE_BAD 944 jmp L_update_stream_state 945 946 L_invalid_distance_too_far: 947 948 949 950 mov esi, [esp+44] 951 mov ecx, invalid_distance_too_far_msg 952 mov edx,INFLATE_MODE_BAD 953 jmp L_update_stream_state 954 955 L_update_stream_state: 956 957 mov eax, [esp+88] 958 test ecx,ecx 959 jz L_skip_msg 960 mov [eax+24],ecx 961 L_skip_msg: 962 mov eax, [eax+28] 963 mov [eax+mode_state],edx 964 jmp L_break_loop 965 966 ALIGN 4 967 L_break_loop: 968 ; 1243 "inffast.S" 969 cmp dword ptr [inflate_fast_use_mmx],2 970 jne L_update_next_in 971 972 973 974 mov ebx,ebp 975 976 L_update_next_in: 977 ; 1266 "inffast.S" 978 mov eax, [esp+88] 979 mov ecx,ebx 980 mov edx, [eax+28] 981 shr ecx,3 982 sub esi,ecx 983 shl ecx,3 984 sub ebx,ecx 985 mov [eax+12],edi 986 mov [edx+bits_state],ebx 987 mov ecx,ebx 988 989 lea ebx, [esp+28] 990 cmp [esp+20],ebx 991 jne L_buf_not_used 992 993 sub esi,ebx 994 mov ebx, [eax+0] 995 mov [esp+20],ebx 996 add esi,ebx 997 mov ebx, [eax+4] 998 sub ebx,11 999 add [esp+20],ebx 1000 1001 L_buf_not_used: 1002 mov [eax+0],esi 1003 1004 mov ebx,1 1005 shl ebx,cl 1006 dec ebx 1007 1008 1009 1010 1011 1012 cmp dword ptr [inflate_fast_use_mmx],2 1013 jne L_update_hold 1014 1015 1016 1017 psrlq mm0,mm1 1018 movd ebp,mm0 1019 1020 emms 1021 1022 L_update_hold: 1023 1024 1025 1026 and ebp,ebx 1027 mov [edx+hold_state],ebp 1028 1029 1030 1031 1032 mov ebx, [esp+20] 1033 cmp ebx,esi 1034 jbe L_last_is_smaller 1035 1036 sub ebx,esi 1037 add ebx,11 1038 mov [eax+4],ebx 1039 jmp L_fixup_out 1040 L_last_is_smaller: 1041 sub esi,ebx 1042 neg esi 1043 add esi,11 1044 mov [eax+4],esi 1045 1046 1047 1048 1049 L_fixup_out: 1050 1051 mov ebx, [esp+16] 1052 cmp ebx,edi 1053 jbe L_end_is_smaller 1054 1055 sub ebx,edi 1056 add ebx,257 1057 mov [eax+16],ebx 1058 jmp L_done 1059 L_end_is_smaller: 1060 sub edi,ebx 1061 neg edi 1062 add edi,257 1063 mov [eax+16],edi 1064 1065 1066 1067 1068 1069 L_done: 1070 add esp,64 1071 popfd 1072 pop ebx 1073 pop ebp 1074 pop esi 1075 pop edi 1076 ret 1077 _inflate_fast endp 1078 1079 _TEXT ends 1080 end