diff options
Diffstat (limited to 'arch/mips/math-emu/dp_sqrt.c')
-rw-r--r-- | arch/mips/math-emu/dp_sqrt.c | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c new file mode 100644 index 000000000..1ee38f824 --- /dev/null +++ b/arch/mips/math-emu/dp_sqrt.c | |||
@@ -0,0 +1,152 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-only | ||
2 | /* IEEE754 floating point arithmetic | ||
3 | * double precision square root | ||
4 | */ | ||
5 | /* | ||
6 | * MIPS floating point support | ||
7 | * Copyright (C) 1994-2000 Algorithmics Ltd. | ||
8 | */ | ||
9 | |||
10 | #include "ieee754dp.h" | ||
11 | |||
12 | static const unsigned int table[] = { | ||
13 | 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, | ||
14 | 29598, 36145, 43202, 50740, 58733, 67158, 75992, | ||
15 | 85215, 83599, 71378, 60428, 50647, 41945, 34246, | ||
16 | 27478, 21581, 16499, 12183, 8588, 5674, 3403, | ||
17 | 1742, 661, 130 | ||
18 | }; | ||
19 | |||
20 | union ieee754dp ieee754dp_sqrt(union ieee754dp x) | ||
21 | { | ||
22 | struct _ieee754_csr oldcsr; | ||
23 | union ieee754dp y, z, t; | ||
24 | unsigned int scalx, yh; | ||
25 | COMPXDP; | ||
26 | |||
27 | EXPLODEXDP; | ||
28 | ieee754_clearcx(); | ||
29 | FLUSHXDP; | ||
30 | |||
31 | /* x == INF or NAN? */ | ||
32 | switch (xc) { | ||
33 | case IEEE754_CLASS_SNAN: | ||
34 | return ieee754dp_nanxcpt(x); | ||
35 | |||
36 | case IEEE754_CLASS_QNAN: | ||
37 | /* sqrt(Nan) = Nan */ | ||
38 | return x; | ||
39 | |||
40 | case IEEE754_CLASS_ZERO: | ||
41 | /* sqrt(0) = 0 */ | ||
42 | return x; | ||
43 | |||
44 | case IEEE754_CLASS_INF: | ||
45 | if (xs) { | ||
46 | /* sqrt(-Inf) = Nan */ | ||
47 | ieee754_setcx(IEEE754_INVALID_OPERATION); | ||
48 | return ieee754dp_indef(); | ||
49 | } | ||
50 | /* sqrt(+Inf) = Inf */ | ||
51 | return x; | ||
52 | |||
53 | case IEEE754_CLASS_DNORM: | ||
54 | DPDNORMX; | ||
55 | fallthrough; | ||
56 | case IEEE754_CLASS_NORM: | ||
57 | if (xs) { | ||
58 | /* sqrt(-x) = Nan */ | ||
59 | ieee754_setcx(IEEE754_INVALID_OPERATION); | ||
60 | return ieee754dp_indef(); | ||
61 | } | ||
62 | break; | ||
63 | } | ||
64 | |||
65 | /* save old csr; switch off INX enable & flag; set RN rounding */ | ||
66 | oldcsr = ieee754_csr; | ||
67 | ieee754_csr.mx &= ~IEEE754_INEXACT; | ||
68 | ieee754_csr.sx &= ~IEEE754_INEXACT; | ||
69 | ieee754_csr.rm = FPU_CSR_RN; | ||
70 | |||
71 | /* adjust exponent to prevent overflow */ | ||
72 | scalx = 0; | ||
73 | if (xe > 512) { /* x > 2**-512? */ | ||
74 | xe -= 512; /* x = x / 2**512 */ | ||
75 | scalx += 256; | ||
76 | } else if (xe < -512) { /* x < 2**-512? */ | ||
77 | xe += 512; /* x = x * 2**512 */ | ||
78 | scalx -= 256; | ||
79 | } | ||
80 | |||
81 | x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); | ||
82 | y = x; | ||
83 | |||
84 | /* magic initial approximation to almost 8 sig. bits */ | ||
85 | yh = y.bits >> 32; | ||
86 | yh = (yh >> 1) + 0x1ff80000; | ||
87 | yh = yh - table[(yh >> 15) & 31]; | ||
88 | y.bits = ((u64) yh << 32) | (y.bits & 0xffffffff); | ||
89 | |||
90 | /* Heron's rule once with correction to improve to ~18 sig. bits */ | ||
91 | /* t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; */ | ||
92 | t = ieee754dp_div(x, y); | ||
93 | y = ieee754dp_add(y, t); | ||
94 | y.bits -= 0x0010000600000000LL; | ||
95 | y.bits &= 0xffffffff00000000LL; | ||
96 | |||
97 | /* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */ | ||
98 | /* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */ | ||
99 | t = ieee754dp_mul(y, y); | ||
100 | z = t; | ||
101 | t.bexp += 0x001; | ||
102 | t = ieee754dp_add(t, z); | ||
103 | z = ieee754dp_mul(ieee754dp_sub(x, z), y); | ||
104 | |||
105 | /* t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; */ | ||
106 | t = ieee754dp_div(z, ieee754dp_add(t, x)); | ||
107 | t.bexp += 0x001; | ||
108 | y = ieee754dp_add(y, t); | ||
109 | |||
110 | /* twiddle last bit to force y correctly rounded */ | ||
111 | |||
112 | /* set RZ, clear INEX flag */ | ||
113 | ieee754_csr.rm = FPU_CSR_RZ; | ||
114 | ieee754_csr.sx &= ~IEEE754_INEXACT; | ||
115 | |||
116 | /* t=x/y; ...chopped quotient, possibly inexact */ | ||
117 | t = ieee754dp_div(x, y); | ||
118 | |||
119 | if (ieee754_csr.sx & IEEE754_INEXACT || t.bits != y.bits) { | ||
120 | |||
121 | if (!(ieee754_csr.sx & IEEE754_INEXACT)) | ||
122 | /* t = t-ulp */ | ||
123 | t.bits -= 1; | ||
124 | |||
125 | /* add inexact to result status */ | ||
126 | oldcsr.cx |= IEEE754_INEXACT; | ||
127 | oldcsr.sx |= IEEE754_INEXACT; | ||
128 | |||
129 | switch (oldcsr.rm) { | ||
130 | case FPU_CSR_RU: | ||
131 | y.bits += 1; | ||
132 | fallthrough; | ||
133 | case FPU_CSR_RN: | ||
134 | t.bits += 1; | ||
135 | break; | ||
136 | } | ||
137 | |||
138 | /* y=y+t; ...chopped sum */ | ||
139 | y = ieee754dp_add(y, t); | ||
140 | |||
141 | /* adjust scalx for correctly rounded sqrt(x) */ | ||
142 | scalx -= 1; | ||
143 | } | ||
144 | |||
145 | /* py[n0]=py[n0]+scalx; ...scale back y */ | ||
146 | y.bexp += scalx; | ||
147 | |||
148 | /* restore rounding mode, possibly set inexact */ | ||
149 | ieee754_csr = oldcsr; | ||
150 | |||
151 | return y; | ||
152 | } | ||