1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
   2 ; *
   3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
   4 ; *
   5 ; * Copyright (C) 1995-2003 Mark Adler
   6 ; * For conditions of distribution and use, see copyright notice in zlib.h
   7 ; *
   8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
   9 ; * Please use the copyright conditions above.
  10 ; *
  11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
  12 ; * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
  13 ; * the moment.  I have successfully compiled and tested this code with gcc2.96,
  14 ; * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
  15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
  16 ; * enabled.  I will attempt to merge the MMX code into this version.  Newer
  17 ; * versions of this and inffast.S can be found at
  18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
  19 ; *
  20 ; * 2005 : modification by Gilles Vollant
  21 ; */
  22 ; For Visual C++ 4.x and higher and ML 6.x and higher
  23 ;   ml.exe is in directory \MASM611C of Win95 DDK
  24 ;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
  25 ;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
  26 ;
  27 ;
  28 ;   compile with command line option
  29 ;   ml  /coff /Zi /c /Flinffas32.lst inffas32.asm
  30 
  31 ;   if you define NO_GZIP (see inflate.h), compile with
  32 ;   ml  /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
  33 
  34 
  35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
  36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
  37 ;        in inflate_state in inflate.h)
  38 zlib1222sup      equ    8
  39 
  40 
  41 IFDEF GUNZIP
  42   INFLATE_MODE_TYPE    equ 11
  43   INFLATE_MODE_BAD     equ 26
  44 ELSE
  45   IFNDEF NO_GUNZIP
  46     INFLATE_MODE_TYPE    equ 11
  47     INFLATE_MODE_BAD     equ 26
  48   ELSE
  49     INFLATE_MODE_TYPE    equ 3
  50     INFLATE_MODE_BAD     equ 17
  51   ENDIF
  52 ENDIF
  53 
  54 
  55 ; 75 "inffast.S"
  56 ;FILE "inffast.S"
  57 
  58 ;;;GLOBAL _inflate_fast
  59 
  60 ;;;SECTION .text
  61 
  62 
  63 
  64         .586p
  65         .mmx
  66 
  67         name    inflate_fast_x86
  68         .MODEL  FLAT
  69 
  70 _DATA                   segment
  71 inflate_fast_use_mmx:
  72         dd      1
  73 
  74 
  75 _TEXT                   segment
  76 
  77 
  78 
  79 ALIGN 4
  80         db      'Fast decoding Code from Chris Anderson'
  81         db      0
  82 
  83 ALIGN 4
  84 invalid_literal_length_code_msg:
  85         db      'invalid literal/length code'
  86         db      0
  87 
  88 ALIGN 4
  89 invalid_distance_code_msg:
  90         db      'invalid distance code'
  91         db      0
  92 
  93 ALIGN 4
  94 invalid_distance_too_far_msg:
  95         db      'invalid distance too far back'
  96         db      0
  97 
  98 
  99 ALIGN 4
 100 inflate_fast_mask:
 101 dd      0
 102 dd      1
 103 dd      3
 104 dd      7
 105 dd      15
 106 dd      31
 107 dd      63
 108 dd      127
 109 dd      255
 110 dd      511
 111 dd      1023
 112 dd      2047
 113 dd      4095
 114 dd      8191
 115 dd      16383
 116 dd      32767
 117 dd      65535
 118 dd      131071
 119 dd      262143
 120 dd      524287
 121 dd      1048575
 122 dd      2097151
 123 dd      4194303
 124 dd      8388607
 125 dd      16777215
 126 dd      33554431
 127 dd      67108863
 128 dd      134217727
 129 dd      268435455
 130 dd      536870911
 131 dd      1073741823
 132 dd      2147483647
 133 dd      4294967295
 134 
 135 
 136 mode_state       equ    0       ;/* state->mode      */
 137 wsize_state      equ    (32+zlib1222sup)        ;/* state->wsize */
 138 write_state      equ    (36+4+zlib1222sup)      ;/* state->write */
 139 window_state     equ    (40+4+zlib1222sup)      ;/* state->window */
 140 hold_state       equ    (44+4+zlib1222sup)      ;/* state->hold      */
 141 bits_state       equ    (48+4+zlib1222sup)      ;/* state->bits      */
 142 lencode_state    equ    (64+4+zlib1222sup)      ;/* state->lencode */
 143 distcode_state   equ    (68+4+zlib1222sup)      ;/* state->distcode */
 144 lenbits_state    equ    (72+4+zlib1222sup)      ;/* state->lenbits */
 145 distbits_state   equ    (76+4+zlib1222sup)      ;/* state->distbits */
 146 
 147 
 148 ;;SECTION .text
 149 ; 205 "inffast.S"
 150 ;GLOBAL inflate_fast_use_mmx
 151 
 152 ;SECTION .data
 153 
 154 
 155 ; GLOBAL inflate_fast_use_mmx:object
 156 ;.size inflate_fast_use_mmx, 4
 157 ; 226 "inffast.S"
 158 ;SECTION .text
 159 
 160 ALIGN 4
 161 _inflate_fast proc near
 162 .FPO (16, 4, 0, 0, 1, 0)
 163         push  edi
 164         push  esi
 165         push  ebp
 166         push  ebx
 167         pushfd
 168         sub  esp,64
 169         cld
 170 
 171 
 172 
 173 
 174         mov  esi, [esp+88]
 175         mov  edi, [esi+28]
 176 
 177 
 178 
 179 
 180 
 181 
 182 
 183         mov  edx, [esi+4]
 184         mov  eax, [esi+0]
 185 
 186         add  edx,eax
 187         sub  edx,11
 188 
 189         mov  [esp+44],eax
 190         mov  [esp+20],edx
 191 
 192         mov  ebp, [esp+92]
 193         mov  ecx, [esi+16]
 194         mov  ebx, [esi+12]
 195 
 196         sub  ebp,ecx
 197         neg  ebp
 198         add  ebp,ebx
 199 
 200         sub  ecx,257
 201         add  ecx,ebx
 202 
 203         mov  [esp+60],ebx
 204         mov  [esp+40],ebp
 205         mov  [esp+16],ecx
 206 ; 285 "inffast.S"
 207         mov  eax, [edi+lencode_state]
 208         mov  ecx, [edi+distcode_state]
 209 
 210         mov  [esp+8],eax
 211         mov  [esp+12],ecx
 212 
 213         mov  eax,1
 214         mov  ecx, [edi+lenbits_state]
 215         shl  eax,cl
 216         dec  eax
 217         mov  [esp+0],eax
 218 
 219         mov  eax,1
 220         mov  ecx, [edi+distbits_state]
 221         shl  eax,cl
 222         dec  eax
 223         mov  [esp+4],eax
 224 
 225         mov  eax, [edi+wsize_state]
 226         mov  ecx, [edi+write_state]
 227         mov  edx, [edi+window_state]
 228 
 229         mov  [esp+52],eax
 230         mov  [esp+48],ecx
 231         mov  [esp+56],edx
 232 
 233         mov  ebp, [edi+hold_state]
 234         mov  ebx, [edi+bits_state]
 235 ; 321 "inffast.S"
 236         mov  esi, [esp+44]
 237         mov  ecx, [esp+20]
 238         cmp  ecx,esi
 239         ja   L_align_long
 240 
 241         add  ecx,11
 242         sub  ecx,esi
 243         mov  eax,12
 244         sub  eax,ecx
 245         lea  edi, [esp+28]
 246         rep movsb
 247         mov  ecx,eax
 248         xor  eax,eax
 249         rep stosb
 250         lea  esi, [esp+28]
 251         mov  [esp+20],esi
 252         jmp  L_is_aligned
 253 
 254 
 255 L_align_long:
 256         test  esi,3
 257         jz   L_is_aligned
 258         xor  eax,eax
 259         mov  al, [esi]
 260         inc  esi
 261         mov  ecx,ebx
 262         add  ebx,8
 263         shl  eax,cl
 264         or  ebp,eax
 265         jmp L_align_long
 266 
 267 L_is_aligned:
 268         mov  edi, [esp+60]
 269 ; 366 "inffast.S"
 270 L_check_mmx:
 271         cmp  dword ptr [inflate_fast_use_mmx],2
 272         je   L_init_mmx
 273         ja   L_do_loop
 274 
 275         push  eax
 276         push  ebx
 277         push  ecx
 278         push  edx
 279         pushfd
 280         mov  eax, [esp]
 281         xor  dword ptr [esp],0200000h
 282 
 283 
 284 
 285 
 286         popfd
 287         pushfd
 288         pop  edx
 289         xor  edx,eax
 290         jz   L_dont_use_mmx
 291         xor  eax,eax
 292         cpuid
 293         cmp  ebx,0756e6547h
 294         jne  L_dont_use_mmx
 295         cmp  ecx,06c65746eh
 296         jne  L_dont_use_mmx
 297         cmp  edx,049656e69h
 298         jne  L_dont_use_mmx
 299         mov  eax,1
 300         cpuid
 301         shr  eax,8
 302         and  eax,15
 303         cmp  eax,6
 304         jne  L_dont_use_mmx
 305         test  edx,0800000h
 306         jnz  L_use_mmx
 307         jmp  L_dont_use_mmx
 308 L_use_mmx:
 309         mov  dword ptr [inflate_fast_use_mmx],2
 310         jmp  L_check_mmx_pop
 311 L_dont_use_mmx:
 312         mov  dword ptr [inflate_fast_use_mmx],3
 313 L_check_mmx_pop:
 314         pop  edx
 315         pop  ecx
 316         pop  ebx
 317         pop  eax
 318         jmp  L_check_mmx
 319 ; 426 "inffast.S"
 320 ALIGN 4
 321 L_do_loop:
 322 ; 437 "inffast.S"
 323         cmp  bl,15
 324         ja   L_get_length_code
 325 
 326         xor  eax,eax
 327         lodsw
 328         mov  cl,bl
 329         add  bl,16
 330         shl  eax,cl
 331         or  ebp,eax
 332 
 333 L_get_length_code:
 334         mov  edx, [esp+0]
 335         mov  ecx, [esp+8]
 336         and  edx,ebp
 337         mov  eax, [ecx+edx*4]
 338 
 339 L_dolen:
 340 
 341 
 342 
 343 
 344 
 345 
 346         mov  cl,ah
 347         sub  bl,ah
 348         shr  ebp,cl
 349 
 350 
 351 
 352 
 353 
 354 
 355         test  al,al
 356         jnz   L_test_for_length_base
 357 
 358         shr  eax,16
 359         stosb
 360 
 361 L_while_test:
 362 
 363 
 364         cmp  [esp+16],edi
 365         jbe  L_break_loop
 366 
 367         cmp  [esp+20],esi
 368         ja   L_do_loop
 369         jmp  L_break_loop
 370 
 371 L_test_for_length_base:
 372 ; 502 "inffast.S"
 373         mov  edx,eax
 374         shr  edx,16
 375         mov  cl,al
 376 
 377         test  al,16
 378         jz   L_test_for_second_level_length
 379         and  cl,15
 380         jz   L_save_len
 381         cmp  bl,cl
 382         jae  L_add_bits_to_len
 383 
 384         mov  ch,cl
 385         xor  eax,eax
 386         lodsw
 387         mov  cl,bl
 388         add  bl,16
 389         shl  eax,cl
 390         or  ebp,eax
 391         mov  cl,ch
 392 
 393 L_add_bits_to_len:
 394         mov  eax,1
 395         shl  eax,cl
 396         dec  eax
 397         sub  bl,cl
 398         and  eax,ebp
 399         shr  ebp,cl
 400         add  edx,eax
 401 
 402 L_save_len:
 403         mov  [esp+24],edx
 404 
 405 
 406 L_decode_distance:
 407 ; 549 "inffast.S"
 408         cmp  bl,15
 409         ja   L_get_distance_code
 410 
 411         xor  eax,eax
 412         lodsw
 413         mov  cl,bl
 414         add  bl,16
 415         shl  eax,cl
 416         or  ebp,eax
 417 
 418 L_get_distance_code:
 419         mov  edx, [esp+4]
 420         mov  ecx, [esp+12]
 421         and  edx,ebp
 422         mov  eax, [ecx+edx*4]
 423 
 424 
 425 L_dodist:
 426         mov  edx,eax
 427         shr  edx,16
 428         mov  cl,ah
 429         sub  bl,ah
 430         shr  ebp,cl
 431 ; 584 "inffast.S"
 432         mov  cl,al
 433 
 434         test  al,16
 435         jz  L_test_for_second_level_dist
 436         and  cl,15
 437         jz  L_check_dist_one
 438         cmp  bl,cl
 439         jae  L_add_bits_to_dist
 440 
 441         mov  ch,cl
 442         xor  eax,eax
 443         lodsw
 444         mov  cl,bl
 445         add  bl,16
 446         shl  eax,cl
 447         or  ebp,eax
 448         mov  cl,ch
 449 
 450 L_add_bits_to_dist:
 451         mov  eax,1
 452         shl  eax,cl
 453         dec  eax
 454         sub  bl,cl
 455         and  eax,ebp
 456         shr  ebp,cl
 457         add  edx,eax
 458         jmp  L_check_window
 459 
 460 L_check_window:
 461 ; 625 "inffast.S"
 462         mov  [esp+44],esi
 463         mov  eax,edi
 464         sub  eax, [esp+40]
 465 
 466         cmp  eax,edx
 467         jb   L_clip_window
 468 
 469         mov  ecx, [esp+24]
 470         mov  esi,edi
 471         sub  esi,edx
 472 
 473         sub  ecx,3
 474         mov  al, [esi]
 475         mov  [edi],al
 476         mov  al, [esi+1]
 477         mov  dl, [esi+2]
 478         add  esi,3
 479         mov  [edi+1],al
 480         mov  [edi+2],dl
 481         add  edi,3
 482         rep movsb
 483 
 484         mov  esi, [esp+44]
 485         jmp  L_while_test
 486 
 487 ALIGN 4
 488 L_check_dist_one:
 489         cmp  edx,1
 490         jne  L_check_window
 491         cmp  [esp+40],edi
 492         je  L_check_window
 493 
 494         dec  edi
 495         mov  ecx, [esp+24]
 496         mov  al, [edi]
 497         sub  ecx,3
 498 
 499         mov  [edi+1],al
 500         mov  [edi+2],al
 501         mov  [edi+3],al
 502         add  edi,4
 503         rep stosb
 504 
 505         jmp  L_while_test
 506 
 507 ALIGN 4
 508 L_test_for_second_level_length:
 509 
 510 
 511 
 512 
 513         test  al,64
 514         jnz   L_test_for_end_of_block
 515 
 516         mov  eax,1
 517         shl  eax,cl
 518         dec  eax
 519         and  eax,ebp
 520         add  eax,edx
 521         mov  edx, [esp+8]
 522         mov  eax, [edx+eax*4]
 523         jmp  L_dolen
 524 
 525 ALIGN 4
 526 L_test_for_second_level_dist:
 527 
 528 
 529 
 530 
 531         test  al,64
 532         jnz   L_invalid_distance_code
 533 
 534         mov  eax,1
 535         shl  eax,cl
 536         dec  eax
 537         and  eax,ebp
 538         add  eax,edx
 539         mov  edx, [esp+12]
 540         mov  eax, [edx+eax*4]
 541         jmp  L_dodist
 542 
 543 ALIGN 4
 544 L_clip_window:
 545 ; 721 "inffast.S"
 546         mov  ecx,eax
 547         mov  eax, [esp+52]
 548         neg  ecx
 549         mov  esi, [esp+56]
 550 
 551         cmp  eax,edx
 552         jb   L_invalid_distance_too_far
 553 
 554         add  ecx,edx
 555         cmp  dword ptr [esp+48],0
 556         jne  L_wrap_around_window
 557 
 558         sub  eax,ecx
 559         add  esi,eax
 560 ; 749 "inffast.S"
 561         mov  eax, [esp+24]
 562         cmp  eax,ecx
 563         jbe  L_do_copy1
 564 
 565         sub  eax,ecx
 566         rep movsb
 567         mov  esi,edi
 568         sub  esi,edx
 569         jmp  L_do_copy1
 570 
 571         cmp  eax,ecx
 572         jbe  L_do_copy1
 573 
 574         sub  eax,ecx
 575         rep movsb
 576         mov  esi,edi
 577         sub  esi,edx
 578         jmp  L_do_copy1
 579 
 580 L_wrap_around_window:
 581 ; 793 "inffast.S"
 582         mov  eax, [esp+48]
 583         cmp  ecx,eax
 584         jbe  L_contiguous_in_window
 585 
 586         add  esi, [esp+52]
 587         add  esi,eax
 588         sub  esi,ecx
 589         sub  ecx,eax
 590 
 591 
 592         mov  eax, [esp+24]
 593         cmp  eax,ecx
 594         jbe  L_do_copy1
 595 
 596         sub  eax,ecx
 597         rep movsb
 598         mov  esi, [esp+56]
 599         mov  ecx, [esp+48]
 600         cmp  eax,ecx
 601         jbe  L_do_copy1
 602 
 603         sub  eax,ecx
 604         rep movsb
 605         mov  esi,edi
 606         sub  esi,edx
 607         jmp  L_do_copy1
 608 
 609 L_contiguous_in_window:
 610 ; 836 "inffast.S"
 611         add  esi,eax
 612         sub  esi,ecx
 613 
 614 
 615         mov  eax, [esp+24]
 616         cmp  eax,ecx
 617         jbe  L_do_copy1
 618 
 619         sub  eax,ecx
 620         rep movsb
 621         mov  esi,edi
 622         sub  esi,edx
 623 
 624 L_do_copy1:
 625 ; 862 "inffast.S"
 626         mov  ecx,eax
 627         rep movsb
 628 
 629         mov  esi, [esp+44]
 630         jmp  L_while_test
 631 ; 878 "inffast.S"
 632 ALIGN 4
 633 L_init_mmx:
 634         emms
 635 
 636 
 637 
 638 
 639 
 640         movd mm0,ebp
 641         mov  ebp,ebx
 642 ; 896 "inffast.S"
 643         movd mm4,dword ptr [esp+0]
 644         movq mm3,mm4
 645         movd mm5,dword ptr [esp+4]
 646         movq mm2,mm5
 647         pxor mm1,mm1
 648         mov  ebx, [esp+8]
 649         jmp  L_do_loop_mmx
 650 
 651 ALIGN 4
 652 L_do_loop_mmx:
 653         psrlq mm0,mm1
 654 
 655         cmp  ebp,32
 656         ja  L_get_length_code_mmx
 657 
 658         movd mm6,ebp
 659         movd mm7,dword ptr [esi]
 660         add  esi,4
 661         psllq mm7,mm6
 662         add  ebp,32
 663         por mm0,mm7
 664 
 665 L_get_length_code_mmx:
 666         pand mm4,mm0
 667         movd eax,mm4
 668         movq mm4,mm3
 669         mov  eax, [ebx+eax*4]
 670 
 671 L_dolen_mmx:
 672         movzx  ecx,ah
 673         movd mm1,ecx
 674         sub  ebp,ecx
 675 
 676         test  al,al
 677         jnz L_test_for_length_base_mmx
 678 
 679         shr  eax,16
 680         stosb
 681 
 682 L_while_test_mmx:
 683 
 684 
 685         cmp  [esp+16],edi
 686         jbe L_break_loop
 687 
 688         cmp  [esp+20],esi
 689         ja L_do_loop_mmx
 690         jmp L_break_loop
 691 
 692 L_test_for_length_base_mmx:
 693 
 694         mov  edx,eax
 695         shr  edx,16
 696 
 697         test  al,16
 698         jz  L_test_for_second_level_length_mmx
 699         and  eax,15
 700         jz L_decode_distance_mmx
 701 
 702         psrlq mm0,mm1
 703         movd mm1,eax
 704         movd ecx,mm0
 705         sub  ebp,eax
 706         and  ecx, [inflate_fast_mask+eax*4]
 707         add  edx,ecx
 708 
 709 L_decode_distance_mmx:
 710         psrlq mm0,mm1
 711 
 712         cmp  ebp,32
 713         ja L_get_dist_code_mmx
 714 
 715         movd mm6,ebp
 716         movd mm7,dword ptr [esi]
 717         add  esi,4
 718         psllq mm7,mm6
 719         add  ebp,32
 720         por mm0,mm7
 721 
 722 L_get_dist_code_mmx:
 723         mov  ebx, [esp+12]
 724         pand mm5,mm0
 725         movd eax,mm5
 726         movq mm5,mm2
 727         mov  eax, [ebx+eax*4]
 728 
 729 L_dodist_mmx:
 730 
 731         movzx  ecx,ah
 732         mov  ebx,eax
 733         shr  ebx,16
 734         sub  ebp,ecx
 735         movd mm1,ecx
 736 
 737         test  al,16
 738         jz L_test_for_second_level_dist_mmx
 739         and  eax,15
 740         jz L_check_dist_one_mmx
 741 
 742 L_add_bits_to_dist_mmx:
 743         psrlq mm0,mm1
 744         movd mm1,eax
 745         movd ecx,mm0
 746         sub  ebp,eax
 747         and  ecx, [inflate_fast_mask+eax*4]
 748         add  ebx,ecx
 749 
 750 L_check_window_mmx:
 751         mov  [esp+44],esi
 752         mov  eax,edi
 753         sub  eax, [esp+40]
 754 
 755         cmp  eax,ebx
 756         jb L_clip_window_mmx
 757 
 758         mov  ecx,edx
 759         mov  esi,edi
 760         sub  esi,ebx
 761 
 762         sub  ecx,3
 763         mov  al, [esi]
 764         mov  [edi],al
 765         mov  al, [esi+1]
 766         mov  dl, [esi+2]
 767         add  esi,3
 768         mov  [edi+1],al
 769         mov  [edi+2],dl
 770         add  edi,3
 771         rep movsb
 772 
 773         mov  esi, [esp+44]
 774         mov  ebx, [esp+8]
 775         jmp  L_while_test_mmx
 776 
 777 ALIGN 4
 778 L_check_dist_one_mmx:
 779         cmp  ebx,1
 780         jne  L_check_window_mmx
 781         cmp  [esp+40],edi
 782         je   L_check_window_mmx
 783 
 784         dec  edi
 785         mov  ecx,edx
 786         mov  al, [edi]
 787         sub  ecx,3
 788 
 789         mov  [edi+1],al
 790         mov  [edi+2],al
 791         mov  [edi+3],al
 792         add  edi,4
 793         rep stosb
 794 
 795         mov  ebx, [esp+8]
 796         jmp  L_while_test_mmx
 797 
 798 ALIGN 4
 799 L_test_for_second_level_length_mmx:
 800         test  al,64
 801         jnz L_test_for_end_of_block
 802 
 803         and  eax,15
 804         psrlq mm0,mm1
 805         movd ecx,mm0
 806         and  ecx, [inflate_fast_mask+eax*4]
 807         add  ecx,edx
 808         mov  eax, [ebx+ecx*4]
 809         jmp L_dolen_mmx
 810 
 811 ALIGN 4
 812 L_test_for_second_level_dist_mmx:
 813         test  al,64
 814         jnz L_invalid_distance_code
 815 
 816         and  eax,15
 817         psrlq mm0,mm1
 818         movd ecx,mm0
 819         and  ecx, [inflate_fast_mask+eax*4]
 820         mov  eax, [esp+12]
 821         add  ecx,ebx
 822         mov  eax, [eax+ecx*4]
 823         jmp  L_dodist_mmx
 824 
 825 ALIGN 4
 826 L_clip_window_mmx:
 827 
 828         mov  ecx,eax
 829         mov  eax, [esp+52]
 830         neg  ecx
 831         mov  esi, [esp+56]
 832 
 833         cmp  eax,ebx
 834         jb  L_invalid_distance_too_far
 835 
 836         add  ecx,ebx
 837         cmp  dword ptr [esp+48],0
 838         jne  L_wrap_around_window_mmx
 839 
 840         sub  eax,ecx
 841         add  esi,eax
 842 
 843         cmp  edx,ecx
 844         jbe  L_do_copy1_mmx
 845 
 846         sub  edx,ecx
 847         rep movsb
 848         mov  esi,edi
 849         sub  esi,ebx
 850         jmp  L_do_copy1_mmx
 851 
 852         cmp  edx,ecx
 853         jbe  L_do_copy1_mmx
 854 
 855         sub  edx,ecx
 856         rep movsb
 857         mov  esi,edi
 858         sub  esi,ebx
 859         jmp  L_do_copy1_mmx
 860 
 861 L_wrap_around_window_mmx:
 862 
 863         mov  eax, [esp+48]
 864         cmp  ecx,eax
 865         jbe  L_contiguous_in_window_mmx
 866 
 867         add  esi, [esp+52]
 868         add  esi,eax
 869         sub  esi,ecx
 870         sub  ecx,eax
 871 
 872 
 873         cmp  edx,ecx
 874         jbe  L_do_copy1_mmx
 875 
 876         sub  edx,ecx
 877         rep movsb
 878         mov  esi, [esp+56]
 879         mov  ecx, [esp+48]
 880         cmp  edx,ecx
 881         jbe  L_do_copy1_mmx
 882 
 883         sub  edx,ecx
 884         rep movsb
 885         mov  esi,edi
 886         sub  esi,ebx
 887         jmp  L_do_copy1_mmx
 888 
 889 L_contiguous_in_window_mmx:
 890 
 891         add  esi,eax
 892         sub  esi,ecx
 893 
 894 
 895         cmp  edx,ecx
 896         jbe  L_do_copy1_mmx
 897 
 898         sub  edx,ecx
 899         rep movsb
 900         mov  esi,edi
 901         sub  esi,ebx
 902 
 903 L_do_copy1_mmx:
 904 
 905 
 906         mov  ecx,edx
 907         rep movsb
 908 
 909         mov  esi, [esp+44]
 910         mov  ebx, [esp+8]
 911         jmp  L_while_test_mmx
 912 ; 1174 "inffast.S"
 913 L_invalid_distance_code:
 914 
 915 
 916 
 917 
 918 
 919         mov  ecx, invalid_distance_code_msg
 920         mov  edx,INFLATE_MODE_BAD
 921         jmp  L_update_stream_state
 922 
 923 L_test_for_end_of_block:
 924 
 925 
 926 
 927 
 928 
 929         test  al,32
 930         jz  L_invalid_literal_length_code
 931 
 932         mov  ecx,0
 933         mov  edx,INFLATE_MODE_TYPE
 934         jmp  L_update_stream_state
 935 
 936 L_invalid_literal_length_code:
 937 
 938 
 939 
 940 
 941 
 942         mov  ecx, invalid_literal_length_code_msg
 943         mov  edx,INFLATE_MODE_BAD
 944         jmp  L_update_stream_state
 945 
 946 L_invalid_distance_too_far:
 947 
 948 
 949 
 950         mov  esi, [esp+44]
 951         mov  ecx, invalid_distance_too_far_msg
 952         mov  edx,INFLATE_MODE_BAD
 953         jmp  L_update_stream_state
 954 
 955 L_update_stream_state:
 956 
 957         mov  eax, [esp+88]
 958         test  ecx,ecx
 959         jz  L_skip_msg
 960         mov  [eax+24],ecx
 961 L_skip_msg:
 962         mov  eax, [eax+28]
 963         mov  [eax+mode_state],edx
 964         jmp  L_break_loop
 965 
 966 ALIGN 4
 967 L_break_loop:
 968 ; 1243 "inffast.S"
 969         cmp  dword ptr [inflate_fast_use_mmx],2
 970         jne  L_update_next_in
 971 
 972 
 973 
 974         mov  ebx,ebp
 975 
 976 L_update_next_in:
 977 ; 1266 "inffast.S"
 978         mov  eax, [esp+88]
 979         mov  ecx,ebx
 980         mov  edx, [eax+28]
 981         shr  ecx,3
 982         sub  esi,ecx
 983         shl  ecx,3
 984         sub  ebx,ecx
 985         mov  [eax+12],edi
 986         mov  [edx+bits_state],ebx
 987         mov  ecx,ebx
 988 
 989         lea  ebx, [esp+28]
 990         cmp  [esp+20],ebx
 991         jne  L_buf_not_used
 992 
 993         sub  esi,ebx
 994         mov  ebx, [eax+0]
 995         mov  [esp+20],ebx
 996         add  esi,ebx
 997         mov  ebx, [eax+4]
 998         sub  ebx,11
 999         add  [esp+20],ebx
1000 
1001 L_buf_not_used:
1002         mov  [eax+0],esi
1003 
1004         mov  ebx,1
1005         shl  ebx,cl
1006         dec  ebx
1007 
1008 
1009 
1010 
1011 
1012         cmp  dword ptr [inflate_fast_use_mmx],2
1013         jne  L_update_hold
1014 
1015 
1016 
1017         psrlq mm0,mm1
1018         movd ebp,mm0
1019 
1020         emms
1021 
1022 L_update_hold:
1023 
1024 
1025 
1026         and  ebp,ebx
1027         mov  [edx+hold_state],ebp
1028 
1029 
1030 
1031 
1032         mov  ebx, [esp+20]
1033         cmp  ebx,esi
1034         jbe  L_last_is_smaller
1035 
1036         sub  ebx,esi
1037         add  ebx,11
1038         mov  [eax+4],ebx
1039         jmp  L_fixup_out
1040 L_last_is_smaller:
1041         sub  esi,ebx
1042         neg  esi
1043         add  esi,11
1044         mov  [eax+4],esi
1045 
1046 
1047 
1048 
1049 L_fixup_out:
1050 
1051         mov  ebx, [esp+16]
1052         cmp  ebx,edi
1053         jbe  L_end_is_smaller
1054 
1055         sub  ebx,edi
1056         add  ebx,257
1057         mov  [eax+16],ebx
1058         jmp  L_done
1059 L_end_is_smaller:
1060         sub  edi,ebx
1061         neg  edi
1062         add  edi,257
1063         mov  [eax+16],edi
1064 
1065 
1066 
1067 
1068 
1069 L_done:
1070         add  esp,64
1071         popfd
1072         pop  ebx
1073         pop  ebp
1074         pop  esi
1075         pop  edi
1076         ret
1077 _inflate_fast endp
1078 
1079 _TEXT   ends
1080 end