1  | 
     | 
     | 
    //===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===//  | 
    
    
    2  | 
     | 
     | 
    //  | 
    
    
    3  | 
     | 
     | 
    //                     The LLVM Compiler Infrastructure  | 
    
    
    4  | 
     | 
     | 
    //  | 
    
    
    5  | 
     | 
     | 
    // This file is dual licensed under the MIT and the University of Illinois Open  | 
    
    
    6  | 
     | 
     | 
    // Source Licenses. See LICENSE.TXT for details.  | 
    
    
    7  | 
     | 
     | 
    //  | 
    
    
    8  | 
     | 
     | 
    //===----------------------------------------------------------------------===//  | 
    
    
    9  | 
     | 
     | 
    //  | 
    
    
    10  | 
     | 
     | 
    // This file implements single-precision soft-float division  | 
    
    
    11  | 
     | 
     | 
    // with the IEEE-754 default rounding (to nearest, ties to even).  | 
    
    
    12  | 
     | 
     | 
    //  | 
    
    
    13  | 
     | 
     | 
    // For simplicity, this implementation currently flushes denormals to zero.  | 
    
    
    14  | 
     | 
     | 
    // It should be a fairly straightforward exercise to implement gradual  | 
    
    
    15  | 
     | 
     | 
    // underflow with correct rounding.  | 
    
    
    16  | 
     | 
     | 
    //  | 
    
    
    17  | 
     | 
     | 
    //===----------------------------------------------------------------------===//  | 
    
    
    18  | 
     | 
     | 
     | 
    
    
    19  | 
     | 
     | 
    #define SINGLE_PRECISION  | 
    
    
    20  | 
     | 
     | 
    #include "fp_lib.h"  | 
    
    
    21  | 
     | 
     | 
     | 
    
    
    22  | 
     | 
     | 
    ARM_EABI_FNALIAS(fdiv, divsf3)  | 
    
    
    23  | 
     | 
     | 
     | 
    
    
    24  | 
     | 
     | 
    COMPILER_RT_ABI fp_t  | 
    
    
    25  | 
     | 
     | 
    __divsf3(fp_t a, fp_t b) { | 
    
    
    26  | 
     | 
     | 
     | 
    
    
    27  | 
     | 
     | 
        const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;  | 
    
    
    28  | 
     | 
     | 
        const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;  | 
    
    
    29  | 
     | 
     | 
        const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;  | 
    
    
    30  | 
     | 
     | 
     | 
    
    
    31  | 
     | 
     | 
        rep_t aSignificand = toRep(a) & significandMask;  | 
    
    
    32  | 
     | 
     | 
        rep_t bSignificand = toRep(b) & significandMask;  | 
    
    
    33  | 
     | 
     | 
        int scale = 0;  | 
    
    
    34  | 
     | 
     | 
     | 
    
    
    35  | 
     | 
     | 
        // Detect if a or b is zero, denormal, infinity, or NaN.  | 
    
    
    36  | 
     | 
     | 
        if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { | 
    
    
    37  | 
     | 
     | 
     | 
    
    
    38  | 
     | 
     | 
            const rep_t aAbs = toRep(a) & absMask;  | 
    
    
    39  | 
     | 
     | 
            const rep_t bAbs = toRep(b) & absMask;  | 
    
    
    40  | 
     | 
     | 
     | 
    
    
    41  | 
     | 
     | 
            // NaN / anything = qNaN  | 
    
    
    42  | 
     | 
     | 
            if (aAbs > infRep) return fromRep(toRep(a) | quietBit);  | 
    
    
    43  | 
     | 
     | 
            // anything / NaN = qNaN  | 
    
    
    44  | 
     | 
     | 
            if (bAbs > infRep) return fromRep(toRep(b) | quietBit);  | 
    
    
    45  | 
     | 
     | 
     | 
    
    
    46  | 
     | 
     | 
            if (aAbs == infRep) { | 
    
    
    47  | 
     | 
     | 
                // infinity / infinity = NaN  | 
    
    
    48  | 
     | 
     | 
                if (bAbs == infRep) return fromRep(qnanRep);  | 
    
    
    49  | 
     | 
     | 
                // infinity / anything else = +/- infinity  | 
    
    
    50  | 
     | 
     | 
                else return fromRep(aAbs | quotientSign);  | 
    
    
    51  | 
     | 
     | 
            }  | 
    
    
    52  | 
     | 
     | 
     | 
    
    
    53  | 
     | 
     | 
            // anything else / infinity = +/- 0  | 
    
    
    54  | 
     | 
     | 
            if (bAbs == infRep) return fromRep(quotientSign);  | 
    
    
    55  | 
     | 
     | 
     | 
    
    
    56  | 
     | 
     | 
            if (!aAbs) { | 
    
    
    57  | 
     | 
     | 
                // zero / zero = NaN  | 
    
    
    58  | 
     | 
     | 
                if (!bAbs) return fromRep(qnanRep);  | 
    
    
    59  | 
     | 
     | 
                // zero / anything else = +/- zero  | 
    
    
    60  | 
     | 
     | 
                else return fromRep(quotientSign);  | 
    
    
    61  | 
     | 
     | 
            }  | 
    
    
    62  | 
     | 
     | 
            // anything else / zero = +/- infinity  | 
    
    
    63  | 
     | 
     | 
            if (!bAbs) return fromRep(infRep | quotientSign);  | 
    
    
    64  | 
     | 
     | 
     | 
    
    
    65  | 
     | 
     | 
            // one or both of a or b is denormal, the other (if applicable) is a  | 
    
    
    66  | 
     | 
     | 
            // normal number.  Renormalize one or both of a and b, and set scale to  | 
    
    
    67  | 
     | 
     | 
            // include the necessary exponent adjustment.  | 
    
    
    68  | 
     | 
     | 
            if (aAbs < implicitBit) scale += normalize(&aSignificand);  | 
    
    
    69  | 
     | 
     | 
            if (bAbs < implicitBit) scale -= normalize(&bSignificand);  | 
    
    
    70  | 
     | 
     | 
        }  | 
    
    
    71  | 
     | 
     | 
     | 
    
    
    72  | 
     | 
     | 
        // Or in the implicit significand bit.  (If we fell through from the  | 
    
    
    73  | 
     | 
     | 
        // denormal path it was already set by normalize( ), but setting it twice  | 
    
    
    74  | 
     | 
     | 
        // won't hurt anything.)  | 
    
    
    75  | 
     | 
     | 
        aSignificand |= implicitBit;  | 
    
    
    76  | 
     | 
     | 
        bSignificand |= implicitBit;  | 
    
    
    77  | 
     | 
     | 
        int quotientExponent = aExponent - bExponent + scale;  | 
    
    
    78  | 
     | 
     | 
     | 
    
    
    79  | 
     | 
     | 
        // Align the significand of b as a Q31 fixed-point number in the range  | 
    
    
    80  | 
     | 
     | 
        // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax  | 
    
    
    81  | 
     | 
     | 
        // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This  | 
    
    
    82  | 
     | 
     | 
        // is accurate to about 3.5 binary digits.  | 
    
    
    83  | 
     | 
     | 
        uint32_t q31b = bSignificand << 8;  | 
    
    
    84  | 
     | 
     | 
        uint32_t reciprocal = UINT32_C(0x7504f333) - q31b;  | 
    
    
    85  | 
     | 
     | 
     | 
    
    
    86  | 
     | 
     | 
        // Now refine the reciprocal estimate using a Newton-Raphson iteration:  | 
    
    
    87  | 
     | 
     | 
        //  | 
    
    
    88  | 
     | 
     | 
        //     x1 = x0 * (2 - x0 * b)  | 
    
    
    89  | 
     | 
     | 
        //  | 
    
    
    90  | 
     | 
     | 
        // This doubles the number of correct binary digits in the approximation  | 
    
    
    91  | 
     | 
     | 
        // with each iteration, so after three iterations, we have about 28 binary  | 
    
    
    92  | 
     | 
     | 
        // digits of accuracy.  | 
    
    
    93  | 
     | 
     | 
        uint32_t correction;  | 
    
    
    94  | 
     | 
     | 
        correction = -((uint64_t)reciprocal * q31b >> 32);  | 
    
    
    95  | 
     | 
     | 
        reciprocal = (uint64_t)reciprocal * correction >> 31;  | 
    
    
    96  | 
     | 
     | 
        correction = -((uint64_t)reciprocal * q31b >> 32);  | 
    
    
    97  | 
     | 
     | 
        reciprocal = (uint64_t)reciprocal * correction >> 31;  | 
    
    
    98  | 
     | 
     | 
        correction = -((uint64_t)reciprocal * q31b >> 32);  | 
    
    
    99  | 
     | 
     | 
        reciprocal = (uint64_t)reciprocal * correction >> 31;  | 
    
    
    100  | 
     | 
     | 
     | 
    
    
    101  | 
     | 
     | 
        // Exhaustive testing shows that the error in reciprocal after three steps  | 
    
    
    102  | 
     | 
     | 
        // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our  | 
    
    
    103  | 
     | 
     | 
        // expectations.  We bump the reciprocal by a tiny value to force the error  | 
    
    
    104  | 
     | 
     | 
        // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to  | 
    
    
    105  | 
     | 
     | 
        // be specific).  This also causes 1/1 to give a sensible approximation  | 
    
    
    106  | 
     | 
     | 
        // instead of zero (due to overflow).  | 
    
    
    107  | 
     | 
     | 
        reciprocal -= 2;  | 
    
    
    108  | 
     | 
     | 
     | 
    
    
    109  | 
     | 
     | 
        // The numerical reciprocal is accurate to within 2^-28, lies in the  | 
    
    
    110  | 
     | 
     | 
        // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller  | 
    
    
    111  | 
     | 
     | 
        // than the true reciprocal of b.  Multiplying a by this reciprocal thus  | 
    
    
    112  | 
     | 
     | 
        // gives a numerical q = a/b in Q24 with the following properties:  | 
    
    
    113  | 
     | 
     | 
        //  | 
    
    
    114  | 
     | 
     | 
        //    1. q < a/b  | 
    
    
    115  | 
     | 
     | 
        //    2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0)  | 
    
    
    116  | 
     | 
     | 
        //    3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes  | 
    
    
    117  | 
     | 
     | 
        //       from the fact that we truncate the product, and the 2^27 term  | 
    
    
    118  | 
     | 
     | 
        //       is the error in the reciprocal of b scaled by the maximum  | 
    
    
    119  | 
     | 
     | 
        //       possible value of a.  As a consequence of this error bound,  | 
    
    
    120  | 
     | 
     | 
        //       either q or nextafter(q) is the correctly rounded  | 
    
    
    121  | 
     | 
     | 
        rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32;  | 
    
    
    122  | 
     | 
     | 
     | 
    
    
    123  | 
     | 
     | 
        // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).  | 
    
    
    124  | 
     | 
     | 
        // In either case, we are going to compute a residual of the form  | 
    
    
    125  | 
     | 
     | 
        //  | 
    
    
    126  | 
     | 
     | 
        //     r = a - q*b  | 
    
    
    127  | 
     | 
     | 
        //  | 
    
    
    128  | 
     | 
     | 
        // We know from the construction of q that r satisfies:  | 
    
    
    129  | 
     | 
     | 
        //  | 
    
    
    130  | 
     | 
     | 
        //     0 <= r < ulp(q)*b  | 
    
    
    131  | 
     | 
     | 
        //  | 
    
    
    132  | 
     | 
     | 
        // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we  | 
    
    
    133  | 
     | 
     | 
        // already have the correct result.  The exact halfway case cannot occur.  | 
    
    
    134  | 
     | 
     | 
        // We also take this time to right shift quotient if it falls in the [1,2)  | 
    
    
    135  | 
     | 
     | 
        // range and adjust the exponent accordingly.  | 
    
    
    136  | 
     | 
     | 
        rep_t residual;  | 
    
    
    137  | 
     | 
     | 
        if (quotient < (implicitBit << 1)) { | 
    
    
    138  | 
     | 
     | 
            residual = (aSignificand << 24) - quotient * bSignificand;  | 
    
    
    139  | 
     | 
     | 
            quotientExponent--;  | 
    
    
    140  | 
     | 
     | 
        } else { | 
    
    
    141  | 
     | 
     | 
            quotient >>= 1;  | 
    
    
    142  | 
     | 
     | 
            residual = (aSignificand << 23) - quotient * bSignificand;  | 
    
    
    143  | 
     | 
     | 
        }  | 
    
    
    144  | 
     | 
     | 
     | 
    
    
    145  | 
     | 
     | 
        const int writtenExponent = quotientExponent + exponentBias;  | 
    
    
    146  | 
     | 
     | 
     | 
    
    
    147  | 
     | 
     | 
        if (writtenExponent >= maxExponent) { | 
    
    
    148  | 
     | 
     | 
            // If we have overflowed the exponent, return infinity.  | 
    
    
    149  | 
     | 
     | 
            return fromRep(infRep | quotientSign);  | 
    
    
    150  | 
     | 
     | 
        }  | 
    
    
    151  | 
     | 
     | 
     | 
    
    
    152  | 
     | 
     | 
        else if (writtenExponent < 1) { | 
    
    
    153  | 
     | 
     | 
            // Flush denormals to zero.  In the future, it would be nice to add  | 
    
    
    154  | 
     | 
     | 
            // code to round them correctly.  | 
    
    
    155  | 
     | 
     | 
            return fromRep(quotientSign);  | 
    
    
    156  | 
     | 
     | 
        }  | 
    
    
    157  | 
     | 
     | 
     | 
    
    
    158  | 
     | 
     | 
        else { | 
    
    
    159  | 
     | 
     | 
            const bool round = (residual << 1) > bSignificand;  | 
    
    
    160  | 
     | 
     | 
            // Clear the implicit bit  | 
    
    
    161  | 
     | 
     | 
            rep_t absResult = quotient & significandMask;  | 
    
    
    162  | 
     | 
     | 
            // Insert the exponent  | 
    
    
    163  | 
     | 
     | 
            absResult |= (rep_t)writtenExponent << significandBits;  | 
    
    
    164  | 
     | 
     | 
            // Round  | 
    
    
    165  | 
     | 
     | 
            absResult += round;  | 
    
    
    166  | 
     | 
     | 
            // Insert the sign and return  | 
    
    
    167  | 
     | 
     | 
            return fromRep(absResult | quotientSign);  | 
    
    
    168  | 
     | 
     | 
        }  | 
    
    
    169  | 
     | 
     | 
    }  |