1 #!/usr/local/bin/perl
   2 #
   3 # The inner loop instruction sequence and the IP/FP modifications are from
   4 # Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk>
   5 #
   6 
   7 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
   8 push(@INC,"${dir}","${dir}../../perlasm");
   9 require "x86asm.pl";
  10 require "cbc.pl";
  11 require "desboth.pl";
  12 
  13 # base code is in microsft
  14 # op dest, source
  15 # format.
  16 #
  17 
  18 &asm_init($ARGV[0],"des-586.pl");
  19 
  20 $L="edi";
  21 $R="esi";
  22 $trans="ebp";
  23 $small_footprint=1 if (grep(/\-DOPENSSL_SMALL_FOOTPRINT/,@ARGV));
  24 # one can discuss setting this variable to 1 unconditionally, as
  25 # the folded loop is only 3% slower than unrolled, but >7 times smaller
  26 
  27 &public_label("DES_SPtrans");
  28 
  29 &DES_encrypt_internal();
  30 &DES_decrypt_internal();
  31 &DES_encrypt("DES_encrypt1",1);
  32 &DES_encrypt("DES_encrypt2",0);
  33 &DES_encrypt3("DES_encrypt3",1);
  34 &DES_encrypt3("DES_decrypt3",0);
  35 &cbc("DES_ncbc_encrypt","DES_encrypt1","DES_encrypt1",0,4,5,3,5,-1);
  36 &cbc("DES_ede3_cbc_encrypt","DES_encrypt3","DES_decrypt3",0,6,7,3,4,5);
  37 &DES_SPtrans();
  38 
  39 &asm_finish();
  40 
  41 sub DES_encrypt_internal()
  42         {
  43         &function_begin_B("_x86_DES_encrypt");
  44 
  45         if ($small_footprint)
  46             {
  47             &lea("edx",&DWP(128,"ecx"));
  48             &push("edx");
  49             &push("ecx");
  50             &set_label("eloop");
  51                 &D_ENCRYPT(0,$L,$R,0,$trans,"eax","ebx","ecx","edx",&swtmp(0));
  52                 &comment("");
  53                 &D_ENCRYPT(1,$R,$L,2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
  54                 &comment("");
  55                 &add("ecx",16);
  56                 &cmp("ecx",&swtmp(1));
  57                 &mov(&swtmp(0),"ecx");
  58                 &jb(&label("eloop"));
  59             &add("esp",8);
  60             }
  61         else
  62             {
  63             &push("ecx");
  64             for ($i=0; $i<16; $i+=2)
  65                 {
  66                 &comment("Round $i");
  67                 &D_ENCRYPT($i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
  68                 &comment("Round ".sprintf("%d",$i+1));
  69                 &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
  70                 }
  71             &add("esp",4);
  72         }
  73         &ret();
  74 
  75         &function_end_B("_x86_DES_encrypt");
  76         }
  77 
  78 sub DES_decrypt_internal()
  79         {
  80         &function_begin_B("_x86_DES_decrypt");
  81 
  82         if ($small_footprint)
  83             {
  84             &push("ecx");
  85             &lea("ecx",&DWP(128,"ecx"));
  86             &push("ecx");
  87             &set_label("dloop");
  88                 &D_ENCRYPT(0,$L,$R,-2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
  89                 &comment("");
  90                 &D_ENCRYPT(1,$R,$L,-4,$trans,"eax","ebx","ecx","edx",&swtmp(0));
  91                 &comment("");
  92                 &sub("ecx",16);
  93                 &cmp("ecx",&swtmp(1));
  94                 &mov(&swtmp(0),"ecx");
  95                 &ja(&label("dloop"));
  96             &add("esp",8);
  97             }
  98         else
  99             {
 100             &push("ecx");
 101             for ($i=15; $i>0; $i-=2)
 102                 {
 103                 &comment("Round $i");
 104                 &D_ENCRYPT(15-$i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
 105                 &comment("Round ".sprintf("%d",$i-1));
 106                 &D_ENCRYPT(15-$i+1,$R,$L,($i-1)*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
 107                 }
 108             &add("esp",4);
 109             }
 110         &ret();
 111 
 112         &function_end_B("_x86_DES_decrypt");
 113         }
 114 
 115 sub DES_encrypt
 116         {
 117         local($name,$do_ip)=@_;
 118 
 119         &function_begin_B($name);
 120 
 121         &push("esi");
 122         &push("edi");
 123 
 124         &comment("");
 125         &comment("Load the 2 words");
 126 
 127         if ($do_ip)
 128                 {
 129                 &mov($R,&wparam(0));
 130                  &xor(      "ecx",          "ecx"           );
 131 
 132                 &push("ebx");
 133                 &push("ebp");
 134 
 135                 &mov("eax",&DWP(0,$R,"",0));
 136                  &mov("ebx",&wparam(2));        # get encrypt flag
 137                 &mov($L,&DWP(4,$R,"",0));
 138                 &comment("");
 139                 &comment("IP");
 140                 &IP_new("eax",$L,$R,3);
 141                 }
 142         else
 143                 {
 144                 &mov("eax",&wparam(0));
 145                  &xor(      "ecx",          "ecx"           );
 146 
 147                 &push("ebx");
 148                 &push("ebp");
 149 
 150                 &mov($R,&DWP(0,"eax","",0));
 151                  &mov("ebx",&wparam(2));        # get encrypt flag
 152                 &rotl($R,3);
 153                 &mov($L,&DWP(4,"eax","",0));
 154                 &rotl($L,3);
 155                 }
 156 
 157         # PIC-ification:-)
 158         &call       (&label("pic_point"));
 159         &set_label("pic_point");
 160         &blindpop($trans);
 161         &lea        ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans));
 162 
 163         &mov(       "ecx",  &wparam(1)  );
 164 
 165         &cmp("ebx","0");
 166         &je(&label("decrypt"));
 167         &call("_x86_DES_encrypt");
 168         &jmp(&label("done"));
 169         &set_label("decrypt");
 170         &call("_x86_DES_decrypt");
 171         &set_label("done");
 172 
 173         if ($do_ip)
 174                 {
 175                 &comment("");
 176                 &comment("FP");
 177                 &mov("edx",&wparam(0));
 178                 &FP_new($L,$R,"eax",3);
 179 
 180                 &mov(&DWP(0,"edx","",0),"eax");
 181                 &mov(&DWP(4,"edx","",0),$R);
 182                 }
 183         else
 184                 {
 185                 &comment("");
 186                 &comment("Fixup");
 187                 &rotr($L,3);                # r
 188                  &mov("eax",&wparam(0));
 189                 &rotr($R,3);                # l
 190                  &mov(&DWP(0,"eax","",0),$L);
 191                  &mov(&DWP(4,"eax","",0),$R);
 192                 }
 193 
 194         &pop("ebp");
 195         &pop("ebx");
 196         &pop("edi");
 197         &pop("esi");
 198         &ret();
 199 
 200         &function_end_B($name);
 201         }
 202 
 203 sub D_ENCRYPT
 204         {
 205         local($r,$L,$R,$S,$trans,$u,$tmp1,$tmp2,$t,$wp1)=@_;
 206 
 207          &mov(      $u,             &DWP(&n2a($S*4),$tmp2,"",0));
 208         &xor(       $tmp1,          $tmp1);
 209          &mov(      $t,             &DWP(&n2a(($S+1)*4),$tmp2,"",0));
 210         &xor(       $u,             $R);
 211         &xor(       $tmp2,          $tmp2);
 212          &xor(      $t,             $R);
 213         &and(       $u,             "0xfcfcfcfc"    );
 214          &and(      $t,             "0xcfcfcfcf"    );
 215         &movb(      &LB($tmp1), &LB($u)     );
 216          &movb(     &LB($tmp2), &HB($u)     );
 217         &rotr(      $t,             4               );
 218         &xor(       $L,             &DWP("     ",$trans,$tmp1,0));
 219          &movb(     &LB($tmp1), &LB($t)     );
 220          &xor(      $L,             &DWP("0x200",$trans,$tmp2,0));
 221          &movb(     &LB($tmp2), &HB($t)     );
 222         &shr(       $u,             16);
 223          &xor(      $L,             &DWP("0x100",$trans,$tmp1,0));
 224          &movb(     &LB($tmp1), &HB($u)     );
 225         &shr(       $t,             16);
 226          &xor(      $L,             &DWP("0x300",$trans,$tmp2,0));
 227         &movb(      &LB($tmp2), &HB($t)     );
 228          &and(      $u,             "0xff"  );
 229         &and(       $t,             "0xff"  );
 230          &xor(      $L,             &DWP("0x600",$trans,$tmp1,0));
 231          &xor(      $L,             &DWP("0x700",$trans,$tmp2,0));
 232         &mov(       $tmp2,          $wp1    );
 233          &xor(      $L,             &DWP("0x400",$trans,$u,0));
 234          &xor(      $L,             &DWP("0x500",$trans,$t,0));
 235         }
 236 
 237 sub n2a
 238         {
 239         sprintf("%d",$_[0]);
 240         }
 241 
 242 # now has a side affect of rotating $a by $shift
 243 sub R_PERM_OP
 244         {
 245         local($a,$b,$tt,$shift,$mask,$last)=@_;
 246 
 247         &rotl(      $a,             $shift          ) if ($shift != 0);
 248         &mov(       $tt,            $a              );
 249         &xor(       $a,             $b              );
 250         &and(       $a,             $mask           );
 251         # This can never succeed, and besides it is difficult to see what the
 252         # idea was - Ben 13 Feb 99
 253         if (!$last eq $b)
 254                 {
 255                 &xor(       $b,             $a              );
 256                 &xor(       $tt,            $a              );
 257                 }
 258         else
 259                 {
 260                 &xor(       $tt,            $a              );
 261                 &xor(       $b,             $a              );
 262                 }
 263         &comment("");
 264         }
 265 
 266 sub IP_new
 267         {
 268         local($l,$r,$tt,$lr)=@_;
 269 
 270         &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l);
 271         &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l);
 272         &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r);
 273         &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r);
 274         &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r);
 275 
 276         if ($lr != 3)
 277                 {
 278                 if (($lr-3) < 0)
 279                         { &rotr($tt,        3-$lr); }
 280                 else    { &rotl($tt,        $lr-3); }
 281                 }
 282         if ($lr != 2)
 283                 {
 284                 if (($lr-2) < 0)
 285                         { &rotr($r, 2-$lr); }
 286                 else    { &rotl($r, $lr-2); }
 287                 }
 288         }
 289 
 290 sub FP_new
 291         {
 292         local($l,$r,$tt,$lr)=@_;
 293 
 294         if ($lr != 2)
 295                 {
 296                 if (($lr-2) < 0)
 297                         { &rotl($r, 2-$lr); }
 298                 else    { &rotr($r, $lr-2); }
 299                 }
 300         if ($lr != 3)
 301                 {
 302                 if (($lr-3) < 0)
 303                         { &rotl($l, 3-$lr); }
 304                 else    { &rotr($l, $lr-3); }
 305                 }
 306 
 307         &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r);
 308         &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r);
 309         &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l);
 310         &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l);
 311         &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r);
 312         &rotr($tt   , 4);
 313         }
 314 
 315 sub DES_SPtrans
 316         {
 317         &set_label("DES_SPtrans",64);
 318         &data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802);
 319         &data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002);
 320         &data_word(0x00080802, 0x02080800, 0x02080000, 0x00000802);
 321         &data_word(0x02000802, 0x02000000, 0x00000000, 0x00080002);
 322         &data_word(0x00080000, 0x00000002, 0x02000800, 0x00080800);
 323         &data_word(0x02080802, 0x02080000, 0x00000802, 0x02000800);
 324         &data_word(0x00000002, 0x00000800, 0x00080800, 0x02080002);
 325         &data_word(0x00000800, 0x02000802, 0x02080002, 0x00000000);
 326         &data_word(0x00000000, 0x02080802, 0x02000800, 0x00080002);
 327         &data_word(0x02080800, 0x00080000, 0x00000802, 0x02000800);
 328         &data_word(0x02080002, 0x00000800, 0x00080800, 0x02000002);
 329         &data_word(0x00080802, 0x00000002, 0x02000002, 0x02080000);
 330         &data_word(0x02080802, 0x00080800, 0x02080000, 0x02000802);
 331         &data_word(0x02000000, 0x00000802, 0x00080002, 0x00000000);
 332         &data_word(0x00080000, 0x02000000, 0x02000802, 0x02080800);
 333         &data_word(0x00000002, 0x02080002, 0x00000800, 0x00080802);
 334         # nibble 1
 335         &data_word(0x40108010, 0x00000000, 0x00108000, 0x40100000);
 336         &data_word(0x40000010, 0x00008010, 0x40008000, 0x00108000);
 337         &data_word(0x00008000, 0x40100010, 0x00000010, 0x40008000);
 338         &data_word(0x00100010, 0x40108000, 0x40100000, 0x00000010);
 339         &data_word(0x00100000, 0x40008010, 0x40100010, 0x00008000);
 340         &data_word(0x00108010, 0x40000000, 0x00000000, 0x00100010);
 341         &data_word(0x40008010, 0x00108010, 0x40108000, 0x40000010);
 342         &data_word(0x40000000, 0x00100000, 0x00008010, 0x40108010);
 343         &data_word(0x00100010, 0x40108000, 0x40008000, 0x00108010);
 344         &data_word(0x40108010, 0x00100010, 0x40000010, 0x00000000);
 345         &data_word(0x40000000, 0x00008010, 0x00100000, 0x40100010);
 346         &data_word(0x00008000, 0x40000000, 0x00108010, 0x40008010);
 347         &data_word(0x40108000, 0x00008000, 0x00000000, 0x40000010);
 348         &data_word(0x00000010, 0x40108010, 0x00108000, 0x40100000);
 349         &data_word(0x40100010, 0x00100000, 0x00008010, 0x40008000);
 350         &data_word(0x40008010, 0x00000010, 0x40100000, 0x00108000);
 351         # nibble 2
 352         &data_word(0x04000001, 0x04040100, 0x00000100, 0x04000101);
 353         &data_word(0x00040001, 0x04000000, 0x04000101, 0x00040100);
 354         &data_word(0x04000100, 0x00040000, 0x04040000, 0x00000001);
 355         &data_word(0x04040101, 0x00000101, 0x00000001, 0x04040001);
 356         &data_word(0x00000000, 0x00040001, 0x04040100, 0x00000100);
 357         &data_word(0x00000101, 0x04040101, 0x00040000, 0x04000001);
 358         &data_word(0x04040001, 0x04000100, 0x00040101, 0x04040000);
 359         &data_word(0x00040100, 0x00000000, 0x04000000, 0x00040101);
 360         &data_word(0x04040100, 0x00000100, 0x00000001, 0x00040000);
 361         &data_word(0x00000101, 0x00040001, 0x04040000, 0x04000101);
 362         &data_word(0x00000000, 0x04040100, 0x00040100, 0x04040001);
 363         &data_word(0x00040001, 0x04000000, 0x04040101, 0x00000001);
 364         &data_word(0x00040101, 0x04000001, 0x04000000, 0x04040101);
 365         &data_word(0x00040000, 0x04000100, 0x04000101, 0x00040100);
 366         &data_word(0x04000100, 0x00000000, 0x04040001, 0x00000101);
 367         &data_word(0x04000001, 0x00040101, 0x00000100, 0x04040000);
 368         # nibble 3
 369         &data_word(0x00401008, 0x10001000, 0x00000008, 0x10401008);
 370         &data_word(0x00000000, 0x10400000, 0x10001008, 0x00400008);
 371         &data_word(0x10401000, 0x10000008, 0x10000000, 0x00001008);
 372         &data_word(0x10000008, 0x00401008, 0x00400000, 0x10000000);
 373         &data_word(0x10400008, 0x00401000, 0x00001000, 0x00000008);
 374         &data_word(0x00401000, 0x10001008, 0x10400000, 0x00001000);
 375         &data_word(0x00001008, 0x00000000, 0x00400008, 0x10401000);
 376         &data_word(0x10001000, 0x10400008, 0x10401008, 0x00400000);
 377         &data_word(0x10400008, 0x00001008, 0x00400000, 0x10000008);
 378         &data_word(0x00401000, 0x10001000, 0x00000008, 0x10400000);
 379         &data_word(0x10001008, 0x00000000, 0x00001000, 0x00400008);
 380         &data_word(0x00000000, 0x10400008, 0x10401000, 0x00001000);
 381         &data_word(0x10000000, 0x10401008, 0x00401008, 0x00400000);
 382         &data_word(0x10401008, 0x00000008, 0x10001000, 0x00401008);
 383         &data_word(0x00400008, 0x00401000, 0x10400000, 0x10001008);
 384         &data_word(0x00001008, 0x10000000, 0x10000008, 0x10401000);
 385         # nibble 4
 386         &data_word(0x08000000, 0x00010000, 0x00000400, 0x08010420);
 387         &data_word(0x08010020, 0x08000400, 0x00010420, 0x08010000);
 388         &data_word(0x00010000, 0x00000020, 0x08000020, 0x00010400);
 389         &data_word(0x08000420, 0x08010020, 0x08010400, 0x00000000);
 390         &data_word(0x00010400, 0x08000000, 0x00010020, 0x00000420);
 391         &data_word(0x08000400, 0x00010420, 0x00000000, 0x08000020);
 392         &data_word(0x00000020, 0x08000420, 0x08010420, 0x00010020);
 393         &data_word(0x08010000, 0x00000400, 0x00000420, 0x08010400);
 394         &data_word(0x08010400, 0x08000420, 0x00010020, 0x08010000);
 395         &data_word(0x00010000, 0x00000020, 0x08000020, 0x08000400);
 396         &data_word(0x08000000, 0x00010400, 0x08010420, 0x00000000);
 397         &data_word(0x00010420, 0x08000000, 0x00000400, 0x00010020);
 398         &data_word(0x08000420, 0x00000400, 0x00000000, 0x08010420);
 399         &data_word(0x08010020, 0x08010400, 0x00000420, 0x00010000);
 400         &data_word(0x00010400, 0x08010020, 0x08000400, 0x00000420);
 401         &data_word(0x00000020, 0x00010420, 0x08010000, 0x08000020);
 402         # nibble 5
 403         &data_word(0x80000040, 0x00200040, 0x00000000, 0x80202000);
 404         &data_word(0x00200040, 0x00002000, 0x80002040, 0x00200000);
 405         &data_word(0x00002040, 0x80202040, 0x00202000, 0x80000000);
 406         &data_word(0x80002000, 0x80000040, 0x80200000, 0x00202040);
 407         &data_word(0x00200000, 0x80002040, 0x80200040, 0x00000000);
 408         &data_word(0x00002000, 0x00000040, 0x80202000, 0x80200040);
 409         &data_word(0x80202040, 0x80200000, 0x80000000, 0x00002040);
 410         &data_word(0x00000040, 0x00202000, 0x00202040, 0x80002000);
 411         &data_word(0x00002040, 0x80000000, 0x80002000, 0x00202040);
 412         &data_word(0x80202000, 0x00200040, 0x00000000, 0x80002000);
 413         &data_word(0x80000000, 0x00002000, 0x80200040, 0x00200000);
 414         &data_word(0x00200040, 0x80202040, 0x00202000, 0x00000040);
 415         &data_word(0x80202040, 0x00202000, 0x00200000, 0x80002040);
 416         &data_word(0x80000040, 0x80200000, 0x00202040, 0x00000000);
 417         &data_word(0x00002000, 0x80000040, 0x80002040, 0x80202000);
 418         &data_word(0x80200000, 0x00002040, 0x00000040, 0x80200040);
 419         # nibble 6
 420         &data_word(0x00004000, 0x00000200, 0x01000200, 0x01000004);
 421         &data_word(0x01004204, 0x00004004, 0x00004200, 0x00000000);
 422         &data_word(0x01000000, 0x01000204, 0x00000204, 0x01004000);
 423         &data_word(0x00000004, 0x01004200, 0x01004000, 0x00000204);
 424         &data_word(0x01000204, 0x00004000, 0x00004004, 0x01004204);
 425         &data_word(0x00000000, 0x01000200, 0x01000004, 0x00004200);
 426         &data_word(0x01004004, 0x00004204, 0x01004200, 0x00000004);
 427         &data_word(0x00004204, 0x01004004, 0x00000200, 0x01000000);
 428         &data_word(0x00004204, 0x01004000, 0x01004004, 0x00000204);
 429         &data_word(0x00004000, 0x00000200, 0x01000000, 0x01004004);
 430         &data_word(0x01000204, 0x00004204, 0x00004200, 0x00000000);
 431         &data_word(0x00000200, 0x01000004, 0x00000004, 0x01000200);
 432         &data_word(0x00000000, 0x01000204, 0x01000200, 0x00004200);
 433         &data_word(0x00000204, 0x00004000, 0x01004204, 0x01000000);
 434         &data_word(0x01004200, 0x00000004, 0x00004004, 0x01004204);
 435         &data_word(0x01000004, 0x01004200, 0x01004000, 0x00004004);
 436         # nibble 7
 437         &data_word(0x20800080, 0x20820000, 0x00020080, 0x00000000);
 438         &data_word(0x20020000, 0x00800080, 0x20800000, 0x20820080);
 439         &data_word(0x00000080, 0x20000000, 0x00820000, 0x00020080);
 440         &data_word(0x00820080, 0x20020080, 0x20000080, 0x20800000);
 441         &data_word(0x00020000, 0x00820080, 0x00800080, 0x20020000);
 442         &data_word(0x20820080, 0x20000080, 0x00000000, 0x00820000);
 443         &data_word(0x20000000, 0x00800000, 0x20020080, 0x20800080);
 444         &data_word(0x00800000, 0x00020000, 0x20820000, 0x00000080);
 445         &data_word(0x00800000, 0x00020000, 0x20000080, 0x20820080);
 446         &data_word(0x00020080, 0x20000000, 0x00000000, 0x00820000);
 447         &data_word(0x20800080, 0x20020080, 0x20020000, 0x00800080);
 448         &data_word(0x20820000, 0x00000080, 0x00800080, 0x20020000);
 449         &data_word(0x20820080, 0x00800000, 0x20800000, 0x20000080);
 450         &data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000);
 451         &data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000);
 452         &data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080);
 453         }