1 |
|
|
/* $OpenBSD: s_fmal.c,v 1.3 2013/11/12 19:00:38 martynas Exp $ */ |
2 |
|
|
|
3 |
|
|
/*- |
4 |
|
|
* Copyright (c) 2005 David Schultz <das@FreeBSD.ORG> |
5 |
|
|
* All rights reserved. |
6 |
|
|
* |
7 |
|
|
* Redistribution and use in source and binary forms, with or without |
8 |
|
|
* modification, are permitted provided that the following conditions |
9 |
|
|
* are met: |
10 |
|
|
* 1. Redistributions of source code must retain the above copyright |
11 |
|
|
* notice, this list of conditions and the following disclaimer. |
12 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
13 |
|
|
* notice, this list of conditions and the following disclaimer in the |
14 |
|
|
* documentation and/or other materials provided with the distribution. |
15 |
|
|
* |
16 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
17 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
20 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
21 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
22 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
23 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
24 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
25 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
26 |
|
|
* SUCH DAMAGE. |
27 |
|
|
*/ |
28 |
|
|
|
29 |
|
|
#include <fenv.h> |
30 |
|
|
#include <float.h> |
31 |
|
|
#include <math.h> |
32 |
|
|
|
33 |
|
|
/* |
34 |
|
|
* Fused multiply-add: Compute x * y + z with a single rounding error. |
35 |
|
|
* |
36 |
|
|
* We use scaling to avoid overflow/underflow, along with the |
37 |
|
|
* canonical precision-doubling technique adapted from: |
38 |
|
|
* |
39 |
|
|
* Dekker, T. A Floating-Point Technique for Extending the |
40 |
|
|
* Available Precision. Numer. Math. 18, 224-242 (1971). |
41 |
|
|
*/ |
42 |
|
|
long double |
43 |
|
|
fmal(long double x, long double y, long double z) |
44 |
|
|
{ |
45 |
|
|
#if LDBL_MANT_DIG == 64 |
46 |
|
|
static const long double split = 0x1p32L + 1.0; |
47 |
|
|
#elif LDBL_MANT_DIG == 113 |
48 |
|
|
static const long double split = 0x1p57L + 1.0; |
49 |
|
|
#endif |
50 |
|
|
long double xs, ys, zs; |
51 |
|
|
long double c, cc, hx, hy, p, q, tx, ty; |
52 |
|
|
long double r, rr, s; |
53 |
|
|
int oround; |
54 |
|
|
int ex, ey, ez; |
55 |
|
|
int spread; |
56 |
|
|
|
57 |
|
|
/* |
58 |
|
|
* Handle special cases. The order of operations and the particular |
59 |
|
|
* return values here are crucial in handling special cases involving |
60 |
|
|
* infinities, NaNs, overflows, and signed zeroes correctly. |
61 |
|
|
*/ |
62 |
|
|
if (x == 0.0 || y == 0.0) |
63 |
|
|
return (x * y + z); |
64 |
|
|
if (z == 0.0) |
65 |
|
|
return (x * y); |
66 |
|
|
if (!isfinite(x) || !isfinite(y)) |
67 |
|
|
return (x * y + z); |
68 |
|
|
if (!isfinite(z)) |
69 |
|
|
return (z); |
70 |
|
|
|
71 |
|
|
xs = frexpl(x, &ex); |
72 |
|
|
ys = frexpl(y, &ey); |
73 |
|
|
zs = frexpl(z, &ez); |
74 |
|
|
oround = fegetround(); |
75 |
|
|
spread = ex + ey - ez; |
76 |
|
|
|
77 |
|
|
/* |
78 |
|
|
* If x * y and z are many orders of magnitude apart, the scaling |
79 |
|
|
* will overflow, so we handle these cases specially. Rounding |
80 |
|
|
* modes other than FE_TONEAREST are painful. |
81 |
|
|
*/ |
82 |
|
|
if (spread > LDBL_MANT_DIG * 2) { |
83 |
|
|
fenv_t env; |
84 |
|
|
feraiseexcept(FE_INEXACT); |
85 |
|
|
switch(oround) { |
86 |
|
|
case FE_TONEAREST: |
87 |
|
|
return (x * y); |
88 |
|
|
case FE_TOWARDZERO: |
89 |
|
|
if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0)) |
90 |
|
|
return (x * y); |
91 |
|
|
feholdexcept(&env); |
92 |
|
|
r = x * y; |
93 |
|
|
if (!fetestexcept(FE_INEXACT)) |
94 |
|
|
r = nextafterl(r, 0); |
95 |
|
|
feupdateenv(&env); |
96 |
|
|
return (r); |
97 |
|
|
case FE_DOWNWARD: |
98 |
|
|
if (z > 0.0) |
99 |
|
|
return (x * y); |
100 |
|
|
feholdexcept(&env); |
101 |
|
|
r = x * y; |
102 |
|
|
if (!fetestexcept(FE_INEXACT)) |
103 |
|
|
r = nextafterl(r, -INFINITY); |
104 |
|
|
feupdateenv(&env); |
105 |
|
|
return (r); |
106 |
|
|
default: /* FE_UPWARD */ |
107 |
|
|
if (z < 0.0) |
108 |
|
|
return (x * y); |
109 |
|
|
feholdexcept(&env); |
110 |
|
|
r = x * y; |
111 |
|
|
if (!fetestexcept(FE_INEXACT)) |
112 |
|
|
r = nextafterl(r, INFINITY); |
113 |
|
|
feupdateenv(&env); |
114 |
|
|
return (r); |
115 |
|
|
} |
116 |
|
|
} |
117 |
|
|
if (spread < -LDBL_MANT_DIG) { |
118 |
|
|
feraiseexcept(FE_INEXACT); |
119 |
|
|
if (!isnormal(z)) |
120 |
|
|
feraiseexcept(FE_UNDERFLOW); |
121 |
|
|
switch (oround) { |
122 |
|
|
case FE_TONEAREST: |
123 |
|
|
return (z); |
124 |
|
|
case FE_TOWARDZERO: |
125 |
|
|
if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0)) |
126 |
|
|
return (z); |
127 |
|
|
else |
128 |
|
|
return (nextafterl(z, 0)); |
129 |
|
|
case FE_DOWNWARD: |
130 |
|
|
if ((x > 0.0) ^ (y < 0.0)) |
131 |
|
|
return (z); |
132 |
|
|
else |
133 |
|
|
return (nextafterl(z, -INFINITY)); |
134 |
|
|
default: /* FE_UPWARD */ |
135 |
|
|
if ((x > 0.0) ^ (y < 0.0)) |
136 |
|
|
return (nextafterl(z, INFINITY)); |
137 |
|
|
else |
138 |
|
|
return (z); |
139 |
|
|
} |
140 |
|
|
} |
141 |
|
|
|
142 |
|
|
/* |
143 |
|
|
* Use Dekker's algorithm to perform the multiplication and |
144 |
|
|
* subsequent addition in twice the machine precision. |
145 |
|
|
* Arrange so that x * y = c + cc, and x * y + z = r + rr. |
146 |
|
|
*/ |
147 |
|
|
fesetround(FE_TONEAREST); |
148 |
|
|
|
149 |
|
|
p = xs * split; |
150 |
|
|
hx = xs - p; |
151 |
|
|
hx += p; |
152 |
|
|
tx = xs - hx; |
153 |
|
|
|
154 |
|
|
p = ys * split; |
155 |
|
|
hy = ys - p; |
156 |
|
|
hy += p; |
157 |
|
|
ty = ys - hy; |
158 |
|
|
|
159 |
|
|
p = hx * hy; |
160 |
|
|
q = hx * ty + tx * hy; |
161 |
|
|
c = p + q; |
162 |
|
|
cc = p - c + q + tx * ty; |
163 |
|
|
|
164 |
|
|
zs = ldexpl(zs, -spread); |
165 |
|
|
r = c + zs; |
166 |
|
|
s = r - c; |
167 |
|
|
rr = (c - (r - s)) + (zs - s) + cc; |
168 |
|
|
|
169 |
|
|
spread = ex + ey; |
170 |
|
|
if (spread + ilogbl(r) > -16383) { |
171 |
|
|
fesetround(oround); |
172 |
|
|
r = r + rr; |
173 |
|
|
} else { |
174 |
|
|
/* |
175 |
|
|
* The result is subnormal, so we round before scaling to |
176 |
|
|
* avoid double rounding. |
177 |
|
|
*/ |
178 |
|
|
p = ldexpl(copysignl(0x1p-16382L, r), -spread); |
179 |
|
|
c = r + p; |
180 |
|
|
s = c - r; |
181 |
|
|
cc = (r - (c - s)) + (p - s) + rr; |
182 |
|
|
fesetround(oround); |
183 |
|
|
r = (c + cc) - p; |
184 |
|
|
} |
185 |
|
|
return (ldexpl(r, spread)); |
186 |
|
|
} |