1 |
|
|
/* $OpenBSD: s_fma.c,v 1.7 2016/09/12 19:47:02 guenther Exp $ */ |
2 |
|
|
|
3 |
|
|
/*- |
4 |
|
|
* Copyright (c) 2005 David Schultz <das@FreeBSD.ORG> |
5 |
|
|
* All rights reserved. |
6 |
|
|
* |
7 |
|
|
* Redistribution and use in source and binary forms, with or without |
8 |
|
|
* modification, are permitted provided that the following conditions |
9 |
|
|
* are met: |
10 |
|
|
* 1. Redistributions of source code must retain the above copyright |
11 |
|
|
* notice, this list of conditions and the following disclaimer. |
12 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
13 |
|
|
* notice, this list of conditions and the following disclaimer in the |
14 |
|
|
* documentation and/or other materials provided with the distribution. |
15 |
|
|
* |
16 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
17 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
20 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
21 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
22 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
23 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
24 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
25 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
26 |
|
|
* SUCH DAMAGE. |
27 |
|
|
*/ |
28 |
|
|
|
29 |
|
|
#include <fenv.h> |
30 |
|
|
#include <float.h> |
31 |
|
|
#include <math.h> |
32 |
|
|
|
33 |
|
|
/* |
34 |
|
|
* Fused multiply-add: Compute x * y + z with a single rounding error. |
35 |
|
|
* |
36 |
|
|
* We use scaling to avoid overflow/underflow, along with the |
37 |
|
|
* canonical precision-doubling technique adapted from: |
38 |
|
|
* |
39 |
|
|
* Dekker, T. A Floating-Point Technique for Extending the |
40 |
|
|
* Available Precision. Numer. Math. 18, 224-242 (1971). |
41 |
|
|
* |
42 |
|
|
* This algorithm is sensitive to the rounding precision. FPUs such |
43 |
|
|
* as the i387 must be set in double-precision mode if variables are |
44 |
|
|
* to be stored in FP registers in order to avoid incorrect results. |
45 |
|
|
* This is the default on FreeBSD, but not on many other systems. |
46 |
|
|
* |
47 |
|
|
* Hardware instructions should be used on architectures that support it, |
48 |
|
|
* since this implementation will likely be several times slower. |
49 |
|
|
*/ |
50 |
|
|
#if LDBL_MANT_DIG != 113 |
51 |
|
|
double |
52 |
|
|
fma(double x, double y, double z) |
53 |
|
|
{ |
54 |
|
|
static const double split = 0x1p27 + 1.0; |
55 |
|
|
double xs, ys, zs; |
56 |
|
|
double c, cc, hx, hy, p, q, tx, ty; |
57 |
|
|
double r, rr, s; |
58 |
|
|
int oround; |
59 |
|
|
int ex, ey, ez; |
60 |
|
|
int spread; |
61 |
|
|
|
62 |
|
|
/* |
63 |
|
|
* Handle special cases. The order of operations and the particular |
64 |
|
|
* return values here are crucial in handling special cases involving |
65 |
|
|
* infinities, NaNs, overflows, and signed zeroes correctly. |
66 |
|
|
*/ |
67 |
|
|
if (x == 0.0 || y == 0.0) |
68 |
|
|
return (x * y + z); |
69 |
|
|
if (z == 0.0) |
70 |
|
|
return (x * y); |
71 |
|
|
if (!isfinite(x) || !isfinite(y)) |
72 |
|
|
return (x * y + z); |
73 |
|
|
if (!isfinite(z)) |
74 |
|
|
return (z); |
75 |
|
|
|
76 |
|
|
xs = frexp(x, &ex); |
77 |
|
|
ys = frexp(y, &ey); |
78 |
|
|
zs = frexp(z, &ez); |
79 |
|
|
oround = fegetround(); |
80 |
|
|
spread = ex + ey - ez; |
81 |
|
|
|
82 |
|
|
/* |
83 |
|
|
* If x * y and z are many orders of magnitude apart, the scaling |
84 |
|
|
* will overflow, so we handle these cases specially. Rounding |
85 |
|
|
* modes other than FE_TONEAREST are painful. |
86 |
|
|
*/ |
87 |
|
|
if (spread > DBL_MANT_DIG * 2) { |
88 |
|
|
fenv_t env; |
89 |
|
|
feraiseexcept(FE_INEXACT); |
90 |
|
|
switch(oround) { |
91 |
|
|
case FE_TONEAREST: |
92 |
|
|
return (x * y); |
93 |
|
|
case FE_TOWARDZERO: |
94 |
|
|
if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0)) |
95 |
|
|
return (x * y); |
96 |
|
|
feholdexcept(&env); |
97 |
|
|
r = x * y; |
98 |
|
|
if (!fetestexcept(FE_INEXACT)) |
99 |
|
|
r = nextafter(r, 0); |
100 |
|
|
feupdateenv(&env); |
101 |
|
|
return (r); |
102 |
|
|
case FE_DOWNWARD: |
103 |
|
|
if (z > 0.0) |
104 |
|
|
return (x * y); |
105 |
|
|
feholdexcept(&env); |
106 |
|
|
r = x * y; |
107 |
|
|
if (!fetestexcept(FE_INEXACT)) |
108 |
|
|
r = nextafter(r, -INFINITY); |
109 |
|
|
feupdateenv(&env); |
110 |
|
|
return (r); |
111 |
|
|
default: /* FE_UPWARD */ |
112 |
|
|
if (z < 0.0) |
113 |
|
|
return (x * y); |
114 |
|
|
feholdexcept(&env); |
115 |
|
|
r = x * y; |
116 |
|
|
if (!fetestexcept(FE_INEXACT)) |
117 |
|
|
r = nextafter(r, INFINITY); |
118 |
|
|
feupdateenv(&env); |
119 |
|
|
return (r); |
120 |
|
|
} |
121 |
|
|
} |
122 |
|
|
if (spread < -DBL_MANT_DIG) { |
123 |
|
|
feraiseexcept(FE_INEXACT); |
124 |
|
|
if (!isnormal(z)) |
125 |
|
|
feraiseexcept(FE_UNDERFLOW); |
126 |
|
|
switch (oround) { |
127 |
|
|
case FE_TONEAREST: |
128 |
|
|
return (z); |
129 |
|
|
case FE_TOWARDZERO: |
130 |
|
|
if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0)) |
131 |
|
|
return (z); |
132 |
|
|
else |
133 |
|
|
return (nextafter(z, 0)); |
134 |
|
|
case FE_DOWNWARD: |
135 |
|
|
if ((x > 0.0) ^ (y < 0.0)) |
136 |
|
|
return (z); |
137 |
|
|
else |
138 |
|
|
return (nextafter(z, -INFINITY)); |
139 |
|
|
default: /* FE_UPWARD */ |
140 |
|
|
if ((x > 0.0) ^ (y < 0.0)) |
141 |
|
|
return (nextafter(z, INFINITY)); |
142 |
|
|
else |
143 |
|
|
return (z); |
144 |
|
|
} |
145 |
|
|
} |
146 |
|
|
|
147 |
|
|
/* |
148 |
|
|
* Use Dekker's algorithm to perform the multiplication and |
149 |
|
|
* subsequent addition in twice the machine precision. |
150 |
|
|
* Arrange so that x * y = c + cc, and x * y + z = r + rr. |
151 |
|
|
*/ |
152 |
|
|
fesetround(FE_TONEAREST); |
153 |
|
|
|
154 |
|
|
p = xs * split; |
155 |
|
|
hx = xs - p; |
156 |
|
|
hx += p; |
157 |
|
|
tx = xs - hx; |
158 |
|
|
|
159 |
|
|
p = ys * split; |
160 |
|
|
hy = ys - p; |
161 |
|
|
hy += p; |
162 |
|
|
ty = ys - hy; |
163 |
|
|
|
164 |
|
|
p = hx * hy; |
165 |
|
|
q = hx * ty + tx * hy; |
166 |
|
|
c = p + q; |
167 |
|
|
cc = p - c + q + tx * ty; |
168 |
|
|
|
169 |
|
|
zs = ldexp(zs, -spread); |
170 |
|
|
r = c + zs; |
171 |
|
|
s = r - c; |
172 |
|
|
rr = (c - (r - s)) + (zs - s) + cc; |
173 |
|
|
|
174 |
|
|
spread = ex + ey; |
175 |
|
|
if (spread + ilogb(r) > -1023) { |
176 |
|
|
fesetround(oround); |
177 |
|
|
r = r + rr; |
178 |
|
|
} else { |
179 |
|
|
/* |
180 |
|
|
* The result is subnormal, so we round before scaling to |
181 |
|
|
* avoid double rounding. |
182 |
|
|
*/ |
183 |
|
|
p = ldexp(copysign(0x1p-1022, r), -spread); |
184 |
|
|
c = r + p; |
185 |
|
|
s = c - r; |
186 |
|
|
cc = (r - (c - s)) + (p - s) + rr; |
187 |
|
|
fesetround(oround); |
188 |
|
|
r = (c + cc) - p; |
189 |
|
|
} |
190 |
|
|
return (ldexp(r, spread)); |
191 |
|
|
} |
192 |
|
|
#else /* LDBL_MANT_DIG == 113 */ |
193 |
|
|
/* |
194 |
|
|
* 113 bits of precision is more than twice the precision of a double, |
195 |
|
|
* so it is enough to represent the intermediate product exactly. |
196 |
|
|
*/ |
197 |
|
|
double |
198 |
|
|
fma(double x, double y, double z) |
199 |
|
|
{ |
200 |
|
|
return ((long double)x * y + z); |
201 |
|
|
} |
202 |
|
|
#endif /* LDBL_MANT_DIG != 113 */ |
203 |
|
|
DEF_STD(fma); |
204 |
|
|
LDBL_MAYBE_UNUSED_CLONE(fma); |