diff options
author | 2025-03-08 22:04:20 +0800 | |
---|---|---|
committer | 2025-03-08 22:04:20 +0800 | |
commit | a07bb8fd1299070229f0e8f3dcb57ffd5ef9870a (patch) | |
tree | 84f21bd0bf7071bc5fc7dd989e77d7ceb5476682 /arch/mips/math-emu/sp_maddf.c | |
download | ohosKernel-a07bb8fd1299070229f0e8f3dcb57ffd5ef9870a.tar.gz ohosKernel-a07bb8fd1299070229f0e8f3dcb57ffd5ef9870a.zip |
Initial commit: OpenHarmony-v4.0-ReleaseOpenHarmony-v4.0-Release
Diffstat (limited to 'arch/mips/math-emu/sp_maddf.c')
-rw-r--r-- | arch/mips/math-emu/sp_maddf.c | 278 |
1 files changed, 278 insertions, 0 deletions
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c new file mode 100644 index 000000000..473ee222d --- /dev/null +++ b/arch/mips/math-emu/sp_maddf.c | |||
@@ -0,0 +1,278 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-only | ||
2 | /* | ||
3 | * IEEE754 floating point arithmetic | ||
4 | * single precision: MADDF.f (Fused Multiply Add) | ||
5 | * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) | ||
6 | * | ||
7 | * MIPS floating point support | ||
8 | * Copyright (C) 2015 Imagination Technologies, Ltd. | ||
9 | * Author: Markos Chandras <markos.chandras@imgtec.com> | ||
10 | */ | ||
11 | |||
12 | #include "ieee754sp.h" | ||
13 | |||
14 | |||
15 | static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, | ||
16 | union ieee754sp y, enum maddf_flags flags) | ||
17 | { | ||
18 | int re; | ||
19 | int rs; | ||
20 | unsigned int rm; | ||
21 | u64 rm64; | ||
22 | u64 zm64; | ||
23 | int s; | ||
24 | |||
25 | COMPXSP; | ||
26 | COMPYSP; | ||
27 | COMPZSP; | ||
28 | |||
29 | EXPLODEXSP; | ||
30 | EXPLODEYSP; | ||
31 | EXPLODEZSP; | ||
32 | |||
33 | FLUSHXSP; | ||
34 | FLUSHYSP; | ||
35 | FLUSHZSP; | ||
36 | |||
37 | ieee754_clearcx(); | ||
38 | |||
39 | rs = xs ^ ys; | ||
40 | if (flags & MADDF_NEGATE_PRODUCT) | ||
41 | rs ^= 1; | ||
42 | if (flags & MADDF_NEGATE_ADDITION) | ||
43 | zs ^= 1; | ||
44 | |||
45 | /* | ||
46 | * Handle the cases when at least one of x, y or z is a NaN. | ||
47 | * Order of precedence is sNaN, qNaN and z, x, y. | ||
48 | */ | ||
49 | if (zc == IEEE754_CLASS_SNAN) | ||
50 | return ieee754sp_nanxcpt(z); | ||
51 | if (xc == IEEE754_CLASS_SNAN) | ||
52 | return ieee754sp_nanxcpt(x); | ||
53 | if (yc == IEEE754_CLASS_SNAN) | ||
54 | return ieee754sp_nanxcpt(y); | ||
55 | if (zc == IEEE754_CLASS_QNAN) | ||
56 | return z; | ||
57 | if (xc == IEEE754_CLASS_QNAN) | ||
58 | return x; | ||
59 | if (yc == IEEE754_CLASS_QNAN) | ||
60 | return y; | ||
61 | |||
62 | if (zc == IEEE754_CLASS_DNORM) | ||
63 | SPDNORMZ; | ||
64 | /* ZERO z cases are handled separately below */ | ||
65 | |||
66 | switch (CLPAIR(xc, yc)) { | ||
67 | |||
68 | |||
69 | /* | ||
70 | * Infinity handling | ||
71 | */ | ||
72 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): | ||
73 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): | ||
74 | ieee754_setcx(IEEE754_INVALID_OPERATION); | ||
75 | return ieee754sp_indef(); | ||
76 | |||
77 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): | ||
78 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): | ||
79 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): | ||
80 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): | ||
81 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): | ||
82 | if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { | ||
83 | /* | ||
84 | * Cases of addition of infinities with opposite signs | ||
85 | * or subtraction of infinities with same signs. | ||
86 | */ | ||
87 | ieee754_setcx(IEEE754_INVALID_OPERATION); | ||
88 | return ieee754sp_indef(); | ||
89 | } | ||
90 | /* | ||
91 | * z is here either not an infinity, or an infinity having the | ||
92 | * same sign as product (x*y). The result must be an infinity, | ||
93 | * and its sign is determined only by the sign of product (x*y). | ||
94 | */ | ||
95 | return ieee754sp_inf(rs); | ||
96 | |||
97 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): | ||
98 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): | ||
99 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): | ||
100 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): | ||
101 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): | ||
102 | if (zc == IEEE754_CLASS_INF) | ||
103 | return ieee754sp_inf(zs); | ||
104 | if (zc == IEEE754_CLASS_ZERO) { | ||
105 | /* Handle cases +0 + (-0) and similar ones. */ | ||
106 | if (zs == rs) | ||
107 | /* | ||
108 | * Cases of addition of zeros of equal signs | ||
109 | * or subtraction of zeroes of opposite signs. | ||
110 | * The sign of the resulting zero is in any | ||
111 | * such case determined only by the sign of z. | ||
112 | */ | ||
113 | return z; | ||
114 | |||
115 | return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); | ||
116 | } | ||
117 | /* x*y is here 0, and z is not 0, so just return z */ | ||
118 | return z; | ||
119 | |||
120 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): | ||
121 | SPDNORMX; | ||
122 | fallthrough; | ||
123 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): | ||
124 | if (zc == IEEE754_CLASS_INF) | ||
125 | return ieee754sp_inf(zs); | ||
126 | SPDNORMY; | ||
127 | break; | ||
128 | |||
129 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): | ||
130 | if (zc == IEEE754_CLASS_INF) | ||
131 | return ieee754sp_inf(zs); | ||
132 | SPDNORMX; | ||
133 | break; | ||
134 | |||
135 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): | ||
136 | if (zc == IEEE754_CLASS_INF) | ||
137 | return ieee754sp_inf(zs); | ||
138 | /* continue to real computations */ | ||
139 | } | ||
140 | |||
141 | /* Finally get to do some computation */ | ||
142 | |||
143 | /* | ||
144 | * Do the multiplication bit first | ||
145 | * | ||
146 | * rm = xm * ym, re = xe + ye basically | ||
147 | * | ||
148 | * At this point xm and ym should have been normalized. | ||
149 | */ | ||
150 | |||
151 | /* rm = xm * ym, re = xe+ye basically */ | ||
152 | assert(xm & SP_HIDDEN_BIT); | ||
153 | assert(ym & SP_HIDDEN_BIT); | ||
154 | |||
155 | re = xe + ye; | ||
156 | |||
157 | /* Multiple 24 bit xm and ym to give 48 bit results */ | ||
158 | rm64 = (uint64_t)xm * ym; | ||
159 | |||
160 | /* Shunt to top of word */ | ||
161 | rm64 = rm64 << 16; | ||
162 | |||
163 | /* Put explicit bit at bit 62 if necessary */ | ||
164 | if ((int64_t) rm64 < 0) { | ||
165 | rm64 = rm64 >> 1; | ||
166 | re++; | ||
167 | } | ||
168 | |||
169 | assert(rm64 & (1 << 62)); | ||
170 | |||
171 | if (zc == IEEE754_CLASS_ZERO) { | ||
172 | /* | ||
173 | * Move explicit bit from bit 62 to bit 26 since the | ||
174 | * ieee754sp_format code expects the mantissa to be | ||
175 | * 27 bits wide (24 + 3 rounding bits). | ||
176 | */ | ||
177 | rm = XSPSRS64(rm64, (62 - 26)); | ||
178 | return ieee754sp_format(rs, re, rm); | ||
179 | } | ||
180 | |||
181 | /* Move explicit bit from bit 23 to bit 62 */ | ||
182 | zm64 = (uint64_t)zm << (62 - 23); | ||
183 | assert(zm64 & (1 << 62)); | ||
184 | |||
185 | /* Make the exponents the same */ | ||
186 | if (ze > re) { | ||
187 | /* | ||
188 | * Have to shift r fraction right to align. | ||
189 | */ | ||
190 | s = ze - re; | ||
191 | rm64 = XSPSRS64(rm64, s); | ||
192 | re += s; | ||
193 | } else if (re > ze) { | ||
194 | /* | ||
195 | * Have to shift z fraction right to align. | ||
196 | */ | ||
197 | s = re - ze; | ||
198 | zm64 = XSPSRS64(zm64, s); | ||
199 | ze += s; | ||
200 | } | ||
201 | assert(ze == re); | ||
202 | assert(ze <= SP_EMAX); | ||
203 | |||
204 | /* Do the addition */ | ||
205 | if (zs == rs) { | ||
206 | /* | ||
207 | * Generate 64 bit result by adding two 63 bit numbers | ||
208 | * leaving result in zm64, zs and ze. | ||
209 | */ | ||
210 | zm64 = zm64 + rm64; | ||
211 | if ((int64_t)zm64 < 0) { /* carry out */ | ||
212 | zm64 = XSPSRS1(zm64); | ||
213 | ze++; | ||
214 | } | ||
215 | } else { | ||
216 | if (zm64 >= rm64) { | ||
217 | zm64 = zm64 - rm64; | ||
218 | } else { | ||
219 | zm64 = rm64 - zm64; | ||
220 | zs = rs; | ||
221 | } | ||
222 | if (zm64 == 0) | ||
223 | return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); | ||
224 | |||
225 | /* | ||
226 | * Put explicit bit at bit 62 if necessary. | ||
227 | */ | ||
228 | while ((zm64 >> 62) == 0) { | ||
229 | zm64 <<= 1; | ||
230 | ze--; | ||
231 | } | ||
232 | } | ||
233 | |||
234 | /* | ||
235 | * Move explicit bit from bit 62 to bit 26 since the | ||
236 | * ieee754sp_format code expects the mantissa to be | ||
237 | * 27 bits wide (24 + 3 rounding bits). | ||
238 | */ | ||
239 | zm = XSPSRS64(zm64, (62 - 26)); | ||
240 | |||
241 | return ieee754sp_format(zs, ze, zm); | ||
242 | } | ||
243 | |||
244 | union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, | ||
245 | union ieee754sp y) | ||
246 | { | ||
247 | return _sp_maddf(z, x, y, 0); | ||
248 | } | ||
249 | |||
250 | union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, | ||
251 | union ieee754sp y) | ||
252 | { | ||
253 | return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); | ||
254 | } | ||
255 | |||
256 | union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x, | ||
257 | union ieee754sp y) | ||
258 | { | ||
259 | return _sp_maddf(z, x, y, 0); | ||
260 | } | ||
261 | |||
262 | union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x, | ||
263 | union ieee754sp y) | ||
264 | { | ||
265 | return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION); | ||
266 | } | ||
267 | |||
268 | union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x, | ||
269 | union ieee754sp y) | ||
270 | { | ||
271 | return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); | ||
272 | } | ||
273 | |||
274 | union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x, | ||
275 | union ieee754sp y) | ||
276 | { | ||
277 | return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); | ||
278 | } | ||