diff options
Diffstat (limited to 'arch/mips/math-emu/dp_maddf.c')
-rw-r--r-- | arch/mips/math-emu/dp_maddf.c | 358 |
1 files changed, 358 insertions, 0 deletions
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c new file mode 100644 index 000000000..931e66f68 --- /dev/null +++ b/arch/mips/math-emu/dp_maddf.c | |||
@@ -0,0 +1,358 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-only | ||
2 | /* | ||
3 | * IEEE754 floating point arithmetic | ||
4 | * double precision: MADDF.f (Fused Multiply Add) | ||
5 | * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) | ||
6 | * | ||
7 | * MIPS floating point support | ||
8 | * Copyright (C) 2015 Imagination Technologies, Ltd. | ||
9 | * Author: Markos Chandras <markos.chandras@imgtec.com> | ||
10 | */ | ||
11 | |||
12 | #include "ieee754dp.h" | ||
13 | |||
14 | |||
15 | /* 128 bits shift right logical with rounding. */ | ||
16 | static void srl128(u64 *hptr, u64 *lptr, int count) | ||
17 | { | ||
18 | u64 low; | ||
19 | |||
20 | if (count >= 128) { | ||
21 | *lptr = *hptr != 0 || *lptr != 0; | ||
22 | *hptr = 0; | ||
23 | } else if (count >= 64) { | ||
24 | if (count == 64) { | ||
25 | *lptr = *hptr | (*lptr != 0); | ||
26 | } else { | ||
27 | low = *lptr; | ||
28 | *lptr = *hptr >> (count - 64); | ||
29 | *lptr |= (*hptr << (128 - count)) != 0 || low != 0; | ||
30 | } | ||
31 | *hptr = 0; | ||
32 | } else { | ||
33 | low = *lptr; | ||
34 | *lptr = low >> count | *hptr << (64 - count); | ||
35 | *lptr |= (low << (64 - count)) != 0; | ||
36 | *hptr = *hptr >> count; | ||
37 | } | ||
38 | } | ||
39 | |||
40 | static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, | ||
41 | union ieee754dp y, enum maddf_flags flags) | ||
42 | { | ||
43 | int re; | ||
44 | int rs; | ||
45 | unsigned int lxm; | ||
46 | unsigned int hxm; | ||
47 | unsigned int lym; | ||
48 | unsigned int hym; | ||
49 | u64 lrm; | ||
50 | u64 hrm; | ||
51 | u64 lzm; | ||
52 | u64 hzm; | ||
53 | u64 t; | ||
54 | u64 at; | ||
55 | int s; | ||
56 | |||
57 | COMPXDP; | ||
58 | COMPYDP; | ||
59 | COMPZDP; | ||
60 | |||
61 | EXPLODEXDP; | ||
62 | EXPLODEYDP; | ||
63 | EXPLODEZDP; | ||
64 | |||
65 | FLUSHXDP; | ||
66 | FLUSHYDP; | ||
67 | FLUSHZDP; | ||
68 | |||
69 | ieee754_clearcx(); | ||
70 | |||
71 | rs = xs ^ ys; | ||
72 | if (flags & MADDF_NEGATE_PRODUCT) | ||
73 | rs ^= 1; | ||
74 | if (flags & MADDF_NEGATE_ADDITION) | ||
75 | zs ^= 1; | ||
76 | |||
77 | /* | ||
78 | * Handle the cases when at least one of x, y or z is a NaN. | ||
79 | * Order of precedence is sNaN, qNaN and z, x, y. | ||
80 | */ | ||
81 | if (zc == IEEE754_CLASS_SNAN) | ||
82 | return ieee754dp_nanxcpt(z); | ||
83 | if (xc == IEEE754_CLASS_SNAN) | ||
84 | return ieee754dp_nanxcpt(x); | ||
85 | if (yc == IEEE754_CLASS_SNAN) | ||
86 | return ieee754dp_nanxcpt(y); | ||
87 | if (zc == IEEE754_CLASS_QNAN) | ||
88 | return z; | ||
89 | if (xc == IEEE754_CLASS_QNAN) | ||
90 | return x; | ||
91 | if (yc == IEEE754_CLASS_QNAN) | ||
92 | return y; | ||
93 | |||
94 | if (zc == IEEE754_CLASS_DNORM) | ||
95 | DPDNORMZ; | ||
96 | /* ZERO z cases are handled separately below */ | ||
97 | |||
98 | switch (CLPAIR(xc, yc)) { | ||
99 | |||
100 | /* | ||
101 | * Infinity handling | ||
102 | */ | ||
103 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): | ||
104 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): | ||
105 | ieee754_setcx(IEEE754_INVALID_OPERATION); | ||
106 | return ieee754dp_indef(); | ||
107 | |||
108 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): | ||
109 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): | ||
110 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): | ||
111 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): | ||
112 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): | ||
113 | if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { | ||
114 | /* | ||
115 | * Cases of addition of infinities with opposite signs | ||
116 | * or subtraction of infinities with same signs. | ||
117 | */ | ||
118 | ieee754_setcx(IEEE754_INVALID_OPERATION); | ||
119 | return ieee754dp_indef(); | ||
120 | } | ||
121 | /* | ||
122 | * z is here either not an infinity, or an infinity having the | ||
123 | * same sign as product (x*y). The result must be an infinity, | ||
124 | * and its sign is determined only by the sign of product (x*y). | ||
125 | */ | ||
126 | return ieee754dp_inf(rs); | ||
127 | |||
128 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): | ||
129 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): | ||
130 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): | ||
131 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): | ||
132 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): | ||
133 | if (zc == IEEE754_CLASS_INF) | ||
134 | return ieee754dp_inf(zs); | ||
135 | if (zc == IEEE754_CLASS_ZERO) { | ||
136 | /* Handle cases +0 + (-0) and similar ones. */ | ||
137 | if (zs == rs) | ||
138 | /* | ||
139 | * Cases of addition of zeros of equal signs | ||
140 | * or subtraction of zeroes of opposite signs. | ||
141 | * The sign of the resulting zero is in any | ||
142 | * such case determined only by the sign of z. | ||
143 | */ | ||
144 | return z; | ||
145 | |||
146 | return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); | ||
147 | } | ||
148 | /* x*y is here 0, and z is not 0, so just return z */ | ||
149 | return z; | ||
150 | |||
151 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): | ||
152 | DPDNORMX; | ||
153 | fallthrough; | ||
154 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): | ||
155 | if (zc == IEEE754_CLASS_INF) | ||
156 | return ieee754dp_inf(zs); | ||
157 | DPDNORMY; | ||
158 | break; | ||
159 | |||
160 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): | ||
161 | if (zc == IEEE754_CLASS_INF) | ||
162 | return ieee754dp_inf(zs); | ||
163 | DPDNORMX; | ||
164 | break; | ||
165 | |||
166 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): | ||
167 | if (zc == IEEE754_CLASS_INF) | ||
168 | return ieee754dp_inf(zs); | ||
169 | /* continue to real computations */ | ||
170 | } | ||
171 | |||
172 | /* Finally get to do some computation */ | ||
173 | |||
174 | /* | ||
175 | * Do the multiplication bit first | ||
176 | * | ||
177 | * rm = xm * ym, re = xe + ye basically | ||
178 | * | ||
179 | * At this point xm and ym should have been normalized. | ||
180 | */ | ||
181 | assert(xm & DP_HIDDEN_BIT); | ||
182 | assert(ym & DP_HIDDEN_BIT); | ||
183 | |||
184 | re = xe + ye; | ||
185 | |||
186 | /* shunt to top of word */ | ||
187 | xm <<= 64 - (DP_FBITS + 1); | ||
188 | ym <<= 64 - (DP_FBITS + 1); | ||
189 | |||
190 | /* | ||
191 | * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. | ||
192 | */ | ||
193 | |||
194 | lxm = xm; | ||
195 | hxm = xm >> 32; | ||
196 | lym = ym; | ||
197 | hym = ym >> 32; | ||
198 | |||
199 | lrm = DPXMULT(lxm, lym); | ||
200 | hrm = DPXMULT(hxm, hym); | ||
201 | |||
202 | t = DPXMULT(lxm, hym); | ||
203 | |||
204 | at = lrm + (t << 32); | ||
205 | hrm += at < lrm; | ||
206 | lrm = at; | ||
207 | |||
208 | hrm = hrm + (t >> 32); | ||
209 | |||
210 | t = DPXMULT(hxm, lym); | ||
211 | |||
212 | at = lrm + (t << 32); | ||
213 | hrm += at < lrm; | ||
214 | lrm = at; | ||
215 | |||
216 | hrm = hrm + (t >> 32); | ||
217 | |||
218 | /* Put explicit bit at bit 126 if necessary */ | ||
219 | if ((int64_t)hrm < 0) { | ||
220 | lrm = (hrm << 63) | (lrm >> 1); | ||
221 | hrm = hrm >> 1; | ||
222 | re++; | ||
223 | } | ||
224 | |||
225 | assert(hrm & (1 << 62)); | ||
226 | |||
227 | if (zc == IEEE754_CLASS_ZERO) { | ||
228 | /* | ||
229 | * Move explicit bit from bit 126 to bit 55 since the | ||
230 | * ieee754dp_format code expects the mantissa to be | ||
231 | * 56 bits wide (53 + 3 rounding bits). | ||
232 | */ | ||
233 | srl128(&hrm, &lrm, (126 - 55)); | ||
234 | return ieee754dp_format(rs, re, lrm); | ||
235 | } | ||
236 | |||
237 | /* Move explicit bit from bit 52 to bit 126 */ | ||
238 | lzm = 0; | ||
239 | hzm = zm << 10; | ||
240 | assert(hzm & (1 << 62)); | ||
241 | |||
242 | /* Make the exponents the same */ | ||
243 | if (ze > re) { | ||
244 | /* | ||
245 | * Have to shift y fraction right to align. | ||
246 | */ | ||
247 | s = ze - re; | ||
248 | srl128(&hrm, &lrm, s); | ||
249 | re += s; | ||
250 | } else if (re > ze) { | ||
251 | /* | ||
252 | * Have to shift x fraction right to align. | ||
253 | */ | ||
254 | s = re - ze; | ||
255 | srl128(&hzm, &lzm, s); | ||
256 | ze += s; | ||
257 | } | ||
258 | assert(ze == re); | ||
259 | assert(ze <= DP_EMAX); | ||
260 | |||
261 | /* Do the addition */ | ||
262 | if (zs == rs) { | ||
263 | /* | ||
264 | * Generate 128 bit result by adding two 127 bit numbers | ||
265 | * leaving result in hzm:lzm, zs and ze. | ||
266 | */ | ||
267 | hzm = hzm + hrm + (lzm > (lzm + lrm)); | ||
268 | lzm = lzm + lrm; | ||
269 | if ((int64_t)hzm < 0) { /* carry out */ | ||
270 | srl128(&hzm, &lzm, 1); | ||
271 | ze++; | ||
272 | } | ||
273 | } else { | ||
274 | if (hzm > hrm || (hzm == hrm && lzm >= lrm)) { | ||
275 | hzm = hzm - hrm - (lzm < lrm); | ||
276 | lzm = lzm - lrm; | ||
277 | } else { | ||
278 | hzm = hrm - hzm - (lrm < lzm); | ||
279 | lzm = lrm - lzm; | ||
280 | zs = rs; | ||
281 | } | ||
282 | if (lzm == 0 && hzm == 0) | ||
283 | return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); | ||
284 | |||
285 | /* | ||
286 | * Put explicit bit at bit 126 if necessary. | ||
287 | */ | ||
288 | if (hzm == 0) { | ||
289 | /* left shift by 63 or 64 bits */ | ||
290 | if ((int64_t)lzm < 0) { | ||
291 | /* MSB of lzm is the explicit bit */ | ||
292 | hzm = lzm >> 1; | ||
293 | lzm = lzm << 63; | ||
294 | ze -= 63; | ||
295 | } else { | ||
296 | hzm = lzm; | ||
297 | lzm = 0; | ||
298 | ze -= 64; | ||
299 | } | ||
300 | } | ||
301 | |||
302 | t = 0; | ||
303 | while ((hzm >> (62 - t)) == 0) | ||
304 | t++; | ||
305 | |||
306 | assert(t <= 62); | ||
307 | if (t) { | ||
308 | hzm = hzm << t | lzm >> (64 - t); | ||
309 | lzm = lzm << t; | ||
310 | ze -= t; | ||
311 | } | ||
312 | } | ||
313 | |||
314 | /* | ||
315 | * Move explicit bit from bit 126 to bit 55 since the | ||
316 | * ieee754dp_format code expects the mantissa to be | ||
317 | * 56 bits wide (53 + 3 rounding bits). | ||
318 | */ | ||
319 | srl128(&hzm, &lzm, (126 - 55)); | ||
320 | |||
321 | return ieee754dp_format(zs, ze, lzm); | ||
322 | } | ||
323 | |||
324 | union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, | ||
325 | union ieee754dp y) | ||
326 | { | ||
327 | return _dp_maddf(z, x, y, 0); | ||
328 | } | ||
329 | |||
330 | union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, | ||
331 | union ieee754dp y) | ||
332 | { | ||
333 | return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); | ||
334 | } | ||
335 | |||
336 | union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x, | ||
337 | union ieee754dp y) | ||
338 | { | ||
339 | return _dp_maddf(z, x, y, 0); | ||
340 | } | ||
341 | |||
342 | union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x, | ||
343 | union ieee754dp y) | ||
344 | { | ||
345 | return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION); | ||
346 | } | ||
347 | |||
348 | union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x, | ||
349 | union ieee754dp y) | ||
350 | { | ||
351 | return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); | ||
352 | } | ||
353 | |||
354 | union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x, | ||
355 | union ieee754dp y) | ||
356 | { | ||
357 | return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); | ||
358 | } | ||