1 #!/usr/local/bin/perl
   2 
   3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
   4 push(@INC,"${dir}","${dir}../../perlasm");
   5 require "x86asm.pl";
   6 
   7 &asm_init($ARGV[0],$0);
   8 
   9 &bn_mul_comba("bn_mul_comba8",8);
  10 &bn_mul_comba("bn_mul_comba4",4);
  11 &bn_sqr_comba("bn_sqr_comba8",8);
  12 &bn_sqr_comba("bn_sqr_comba4",4);
  13 
  14 &asm_finish();
  15 
  16 sub mul_add_c
  17         {
  18         local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
  19 
  20         # pos == -1 if eax and edx are pre-loaded, 0 to load from next
  21         # words, and 1 if load return value
  22 
  23         &comment("mul a[$ai]*b[$bi]");
  24 
  25         # "eax" and "edx" will always be pre-loaded.
  26         # &mov("eax",&DWP($ai*4,$a,"",0)) ;
  27         # &mov("edx",&DWP($bi*4,$b,"",0));
  28 
  29         &mul("edx");
  30         &add($c0,"eax");
  31          &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;        # laod next a
  32          &mov("eax",&wparam(0)) if $pos > 0;                 # load r[]
  33          ###
  34         &adc($c1,"edx");
  35          &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0;        # laod next b
  36          &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;        # laod next b
  37          ###
  38         &adc($c2,0);
  39          # is pos > 1, it means it is the last loop
  40          &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0;                # save r[];
  41         &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;         # laod next a
  42         }
  43 
  44 sub sqr_add_c
  45         {
  46         local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
  47 
  48         # pos == -1 if eax and edx are pre-loaded, 0 to load from next
  49         # words, and 1 if load return value
  50 
  51         &comment("sqr a[$ai]*a[$bi]");
  52 
  53         # "eax" and "edx" will always be pre-loaded.
  54         # &mov("eax",&DWP($ai*4,$a,"",0)) ;
  55         # &mov("edx",&DWP($bi*4,$b,"",0));
  56 
  57         if ($ai == $bi)
  58                 { &mul("eax");}
  59         else
  60                 { &mul("edx");}
  61         &add($c0,"eax");
  62          &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;        # load next a
  63          ###
  64         &adc($c1,"edx");
  65          &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
  66          ###
  67         &adc($c2,0);
  68          # is pos > 1, it means it is the last loop
  69          &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;           # save r[];
  70         &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;         # load next b
  71         }
  72 
  73 sub sqr_add_c2
  74         {
  75         local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
  76 
  77         # pos == -1 if eax and edx are pre-loaded, 0 to load from next
  78         # words, and 1 if load return value
  79 
  80         &comment("sqr a[$ai]*a[$bi]");
  81 
  82         # "eax" and "edx" will always be pre-loaded.
  83         # &mov("eax",&DWP($ai*4,$a,"",0)) ;
  84         # &mov("edx",&DWP($bi*4,$a,"",0));
  85 
  86         if ($ai == $bi)
  87                 { &mul("eax");}
  88         else
  89                 { &mul("edx");}
  90         &add("eax","eax");
  91          ###
  92         &adc("edx","edx");
  93          ###
  94         &adc($c2,0);
  95          &add($c0,"eax");
  96         &adc($c1,"edx");
  97          &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;        # load next a
  98          &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;        # load next b
  99         &adc($c2,0);
 100         &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;            # save r[];
 101          &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
 102          ###
 103         }
 104 
 105 sub bn_mul_comba
 106         {
 107         local($name,$num)=@_;
 108         local($a,$b,$c0,$c1,$c2);
 109         local($i,$as,$ae,$bs,$be,$ai,$bi);
 110         local($tot,$end);
 111 
 112         &function_begin_B($name,"");
 113 
 114         $c0="ebx";
 115         $c1="ecx";
 116         $c2="ebp";
 117         $a="esi";
 118         $b="edi";
 119 
 120         $as=0;
 121         $ae=0;
 122         $bs=0;
 123         $be=0;
 124         $tot=$num+$num-1;
 125 
 126         &push("esi");
 127          &mov($a,&wparam(1));
 128         &push("edi");
 129          &mov($b,&wparam(2));
 130         &push("ebp");
 131          &push("ebx");
 132 
 133         &xor($c0,$c0);
 134          &mov("eax",&DWP(0,$a,"",0));   # load the first word
 135         &xor($c1,$c1);
 136          &mov("edx",&DWP(0,$b,"",0));   # load the first second
 137 
 138         for ($i=0; $i<$tot; $i++)
 139                 {
 140                 $ai=$as;
 141                 $bi=$bs;
 142                 $end=$be+1;
 143 
 144                 &comment("################## Calculate word $i");
 145 
 146                 for ($j=$bs; $j<$end; $j++)
 147                         {
 148                         &xor($c2,$c2) if ($j == $bs);
 149                         if (($j+1) == $end)
 150                                 {
 151                                 $v=1;
 152                                 $v=2 if (($i+1) == $tot);
 153                                 }
 154                         else
 155                                 { $v=0; }
 156                         if (($j+1) != $end)
 157                                 {
 158                                 $na=($ai-1);
 159                                 $nb=($bi+1);
 160                                 }
 161                         else
 162                                 {
 163                                 $na=$as+($i < ($num-1));
 164                                 $nb=$bs+($i >= ($num-1));
 165                                 }
 166 #printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
 167                         &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
 168                         if ($v)
 169                                 {
 170                                 &comment("saved r[$i]");
 171                                 # &mov("eax",&wparam(0));
 172                                 # &mov(&DWP($i*4,"eax","",0),$c0);
 173                                 ($c0,$c1,$c2)=($c1,$c2,$c0);
 174                                 }
 175                         $ai--;
 176                         $bi++;
 177                         }
 178                 $as++ if ($i < ($num-1));
 179                 $ae++ if ($i >= ($num-1));
 180 
 181                 $bs++ if ($i >= ($num-1));
 182                 $be++ if ($i < ($num-1));
 183                 }
 184         &comment("save r[$i]");
 185         # &mov("eax",&wparam(0));
 186         &mov(&DWP($i*4,"eax","",0),$c0);
 187 
 188         &pop("ebx");
 189         &pop("ebp");
 190         &pop("edi");
 191         &pop("esi");
 192         &ret();
 193         &function_end_B($name);
 194         }
 195 
 196 sub bn_sqr_comba
 197         {
 198         local($name,$num)=@_;
 199         local($r,$a,$c0,$c1,$c2)=@_;
 200         local($i,$as,$ae,$bs,$be,$ai,$bi);
 201         local($b,$tot,$end,$half);
 202 
 203         &function_begin_B($name,"");
 204 
 205         $c0="ebx";
 206         $c1="ecx";
 207         $c2="ebp";
 208         $a="esi";
 209         $r="edi";
 210 
 211         &push("esi");
 212          &push("edi");
 213         &push("ebp");
 214          &push("ebx");
 215         &mov($r,&wparam(0));
 216          &mov($a,&wparam(1));
 217         &xor($c0,$c0);
 218          &xor($c1,$c1);
 219         &mov("eax",&DWP(0,$a,"",0)); # load the first word
 220 
 221         $as=0;
 222         $ae=0;
 223         $bs=0;
 224         $be=0;
 225         $tot=$num+$num-1;
 226 
 227         for ($i=0; $i<$tot; $i++)
 228                 {
 229                 $ai=$as;
 230                 $bi=$bs;
 231                 $end=$be+1;
 232 
 233                 &comment("############### Calculate word $i");
 234                 for ($j=$bs; $j<$end; $j++)
 235                         {
 236                         &xor($c2,$c2) if ($j == $bs);
 237                         if (($ai-1) < ($bi+1))
 238                                 {
 239                                 $v=1;
 240                                 $v=2 if ($i+1) == $tot;
 241                                 }
 242                         else
 243                                 { $v=0; }
 244                         if (!$v)
 245                                 {
 246                                 $na=$ai-1;
 247                                 $nb=$bi+1;
 248                                 }
 249                         else
 250                                 {
 251                                 $na=$as+($i < ($num-1));
 252                                 $nb=$bs+($i >= ($num-1));
 253                                 }
 254                         if ($ai == $bi)
 255                                 {
 256                                 &sqr_add_c($r,$a,$ai,$bi,
 257                                         $c0,$c1,$c2,$v,$i,$na,$nb);
 258                                 }
 259                         else
 260                                 {
 261                                 &sqr_add_c2($r,$a,$ai,$bi,
 262                                         $c0,$c1,$c2,$v,$i,$na,$nb);
 263                                 }
 264                         if ($v)
 265                                 {
 266                                 &comment("saved r[$i]");
 267                                 #&mov(&DWP($i*4,$r,"",0),$c0);
 268                                 ($c0,$c1,$c2)=($c1,$c2,$c0);
 269                                 last;
 270                                 }
 271                         $ai--;
 272                         $bi++;
 273                         }
 274                 $as++ if ($i < ($num-1));
 275                 $ae++ if ($i >= ($num-1));
 276 
 277                 $bs++ if ($i >= ($num-1));
 278                 $be++ if ($i < ($num-1));
 279                 }
 280         &mov(&DWP($i*4,$r,"",0),$c0);
 281         &pop("ebx");
 282         &pop("ebp");
 283         &pop("edi");
 284         &pop("esi");
 285         &ret();
 286         &function_end_B($name);
 287         }