1 /* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
   2  *
   3  *                     The LLVM Compiler Infrastructure
   4  *
   5  * This file is dual licensed under the MIT and the University of Illinois Open
   6  * Source Licenses. See LICENSE.TXT for details.
   7  *
   8  * ===----------------------------------------------------------------------===
   9  *
  10  * This file implements __udivmoddi4 for the compiler_rt library.
  11  *
  12  * ===----------------------------------------------------------------------===
  13  */
  14 
  15 #include "int_lib.h"
  16 
  17 /* Effects: if rem != 0, *rem = a % b
  18  * Returns: a / b
  19  */
  20 
  21 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
  22 
  23 COMPILER_RT_ABI du_int
  24 __udivmoddi4(du_int a, du_int b, du_int* rem)
  25 {
  26     const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
  27     const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
  28     udwords n;
  29     n.all = a;
  30     udwords d;
  31     d.all = b;
  32     udwords q;
  33     udwords r;
  34     unsigned sr;
  35     /* special cases, X is unknown, K != 0 */
  36     if (n.s.high == 0)
  37     {
  38         if (d.s.high == 0)
  39         {
  40             /* 0 X
  41              * ---
  42              * 0 X
  43              */
  44             if (rem)
  45                 *rem = n.s.low % d.s.low;
  46             return n.s.low / d.s.low;
  47         }
  48         /* 0 X
  49          * ---
  50          * K X
  51          */
  52         if (rem)
  53             *rem = n.s.low;
  54         return 0;
  55     }
  56     /* n.s.high != 0 */
  57     if (d.s.low == 0)
  58     {
  59         if (d.s.high == 0)
  60         {
  61             /* K X
  62              * ---
  63              * 0 0
  64              */ 
  65             if (rem)
  66                 *rem = n.s.high % d.s.low;
  67             return n.s.high / d.s.low;
  68         }
  69         /* d.s.high != 0 */
  70         if (n.s.low == 0)
  71         {
  72             /* K 0
  73              * ---
  74              * K 0
  75              */
  76             if (rem)
  77             {
  78                 r.s.high = n.s.high % d.s.high;
  79                 r.s.low = 0;
  80                 *rem = r.all;
  81             }
  82             return n.s.high / d.s.high;
  83         }
  84         /* K K
  85          * ---
  86          * K 0
  87          */
  88         if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
  89         {
  90             if (rem)
  91             {
  92                 r.s.low = n.s.low;
  93                 r.s.high = n.s.high & (d.s.high - 1);
  94                 *rem = r.all;
  95             }
  96             return n.s.high >> __builtin_ctz(d.s.high);
  97         }
  98         /* K K
  99          * ---
 100          * K 0
 101          */
 102         sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
 103         /* 0 <= sr <= n_uword_bits - 2 or sr large */
 104         if (sr > n_uword_bits - 2)
 105         {
 106            if (rem)
 107                 *rem = n.all;
 108             return 0;
 109         }
 110         ++sr;
 111         /* 1 <= sr <= n_uword_bits - 1 */
 112         /* q.all = n.all << (n_udword_bits - sr); */
 113         q.s.low = 0;
 114         q.s.high = n.s.low << (n_uword_bits - sr);
 115         /* r.all = n.all >> sr; */
 116         r.s.high = n.s.high >> sr;
 117         r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
 118     }
 119     else  /* d.s.low != 0 */
 120     {
 121         if (d.s.high == 0)
 122         {
 123             /* K X
 124              * ---
 125              * 0 K
 126              */
 127             if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
 128             {
 129                 if (rem)
 130                     *rem = n.s.low & (d.s.low - 1);
 131                 if (d.s.low == 1)
 132                     return n.all;
 133                 sr = __builtin_ctz(d.s.low);
 134                 q.s.high = n.s.high >> sr;
 135                 q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
 136                 return q.all;
 137             }
 138             /* K X
 139              * ---
 140              * 0 K
 141              */
 142             sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
 143             /* 2 <= sr <= n_udword_bits - 1
 144              * q.all = n.all << (n_udword_bits - sr);
 145              * r.all = n.all >> sr;
 146              */
 147             if (sr == n_uword_bits)
 148             {
 149                 q.s.low = 0;
 150                 q.s.high = n.s.low;
 151                 r.s.high = 0;
 152                 r.s.low = n.s.high;
 153             }
 154             else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
 155             {
 156                 q.s.low = 0;
 157                 q.s.high = n.s.low << (n_uword_bits - sr);
 158                 r.s.high = n.s.high >> sr;
 159                 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
 160             }
 161             else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
 162             {
 163                 q.s.low = n.s.low << (n_udword_bits - sr);
 164                 q.s.high = (n.s.high << (n_udword_bits - sr)) |
 165                            (n.s.low >> (sr - n_uword_bits));
 166                 r.s.high = 0;
 167                 r.s.low = n.s.high >> (sr - n_uword_bits);
 168             }
 169         }
 170         else
 171         {
 172             /* K X
 173              * ---
 174              * K K
 175              */
 176             sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
 177             /* 0 <= sr <= n_uword_bits - 1 or sr large */
 178             if (sr > n_uword_bits - 1)
 179             {
 180                 if (rem)
 181                     *rem = n.all;
 182                 return 0;
 183             }
 184             ++sr;
 185             /* 1 <= sr <= n_uword_bits */
 186             /*  q.all = n.all << (n_udword_bits - sr); */
 187             q.s.low = 0;
 188             if (sr == n_uword_bits)
 189             {
 190                 q.s.high = n.s.low;
 191                 r.s.high = 0;
 192                 r.s.low = n.s.high;
 193             }
 194             else
 195             {
 196                 q.s.high = n.s.low << (n_uword_bits - sr);
 197                 r.s.high = n.s.high >> sr;
 198                 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
 199             }
 200         }
 201     }
 202     /* Not a special case
 203      * q and r are initialized with:
 204      * q.all = n.all << (n_udword_bits - sr);
 205      * r.all = n.all >> sr;
 206      * 1 <= sr <= n_udword_bits - 1
 207      */
 208     su_int carry = 0;
 209     for (; sr > 0; --sr)
 210     {
 211         /* r:q = ((r:q)  << 1) | carry */
 212         r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
 213         r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
 214         q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
 215         q.s.low  = (q.s.low  << 1) | carry;
 216         /* carry = 0;
 217          * if (r.all >= d.all)
 218          * {
 219          *      r.all -= d.all;
 220          *      carry = 1;
 221          * }
 222          */
 223         const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
 224         carry = s & 1;
 225         r.all -= d.all & s;
 226     }
 227     q.all = (q.all << 1) | carry;
 228     if (rem)
 229         *rem = r.all;
 230     return q.all;
 231 }