diff options
author | 2025-03-08 22:04:20 +0800 | |
---|---|---|
committer | 2025-03-08 22:04:20 +0800 | |
commit | a07bb8fd1299070229f0e8f3dcb57ffd5ef9870a (patch) | |
tree | 84f21bd0bf7071bc5fc7dd989e77d7ceb5476682 /arch/mips/crypto/chacha-core.S | |
download | ohosKernel-a07bb8fd1299070229f0e8f3dcb57ffd5ef9870a.tar.gz ohosKernel-a07bb8fd1299070229f0e8f3dcb57ffd5ef9870a.zip |
Initial commit: OpenHarmony-v4.0-ReleaseOpenHarmony-v4.0-Release
Diffstat (limited to 'arch/mips/crypto/chacha-core.S')
-rw-r--r-- | arch/mips/crypto/chacha-core.S | 497 |
1 files changed, 497 insertions, 0 deletions
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S new file mode 100644 index 000000000..5755f69cf --- /dev/null +++ b/arch/mips/crypto/chacha-core.S | |||
@@ -0,0 +1,497 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 OR MIT */ | ||
2 | /* | ||
3 | * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved. | ||
4 | * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. | ||
5 | */ | ||
6 | |||
7 | #define MASK_U32 0x3c | ||
8 | #define CHACHA20_BLOCK_SIZE 64 | ||
9 | #define STACK_SIZE 32 | ||
10 | |||
11 | #define X0 $t0 | ||
12 | #define X1 $t1 | ||
13 | #define X2 $t2 | ||
14 | #define X3 $t3 | ||
15 | #define X4 $t4 | ||
16 | #define X5 $t5 | ||
17 | #define X6 $t6 | ||
18 | #define X7 $t7 | ||
19 | #define X8 $t8 | ||
20 | #define X9 $t9 | ||
21 | #define X10 $v1 | ||
22 | #define X11 $s6 | ||
23 | #define X12 $s5 | ||
24 | #define X13 $s4 | ||
25 | #define X14 $s3 | ||
26 | #define X15 $s2 | ||
27 | /* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ | ||
28 | #define T0 $s1 | ||
29 | #define T1 $s0 | ||
30 | #define T(n) T ## n | ||
31 | #define X(n) X ## n | ||
32 | |||
33 | /* Input arguments */ | ||
34 | #define STATE $a0 | ||
35 | #define OUT $a1 | ||
36 | #define IN $a2 | ||
37 | #define BYTES $a3 | ||
38 | |||
39 | /* Output argument */ | ||
40 | /* NONCE[0] is kept in a register and not in memory. | ||
41 | * We don't want to touch original value in memory. | ||
42 | * Must be incremented every loop iteration. | ||
43 | */ | ||
44 | #define NONCE_0 $v0 | ||
45 | |||
46 | /* SAVED_X and SAVED_CA are set in the jump table. | ||
47 | * Use regs which are overwritten on exit else we don't leak clear data. | ||
48 | * They are used to handling the last bytes which are not multiple of 4. | ||
49 | */ | ||
50 | #define SAVED_X X15 | ||
51 | #define SAVED_CA $s7 | ||
52 | |||
53 | #define IS_UNALIGNED $s7 | ||
54 | |||
55 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | ||
56 | #define MSB 0 | ||
57 | #define LSB 3 | ||
58 | #define ROTx rotl | ||
59 | #define ROTR(n) rotr n, 24 | ||
60 | #define CPU_TO_LE32(n) \ | ||
61 | wsbh n; \ | ||
62 | rotr n, 16; | ||
63 | #else | ||
64 | #define MSB 3 | ||
65 | #define LSB 0 | ||
66 | #define ROTx rotr | ||
67 | #define CPU_TO_LE32(n) | ||
68 | #define ROTR(n) | ||
69 | #endif | ||
70 | |||
71 | #define FOR_EACH_WORD(x) \ | ||
72 | x( 0); \ | ||
73 | x( 1); \ | ||
74 | x( 2); \ | ||
75 | x( 3); \ | ||
76 | x( 4); \ | ||
77 | x( 5); \ | ||
78 | x( 6); \ | ||
79 | x( 7); \ | ||
80 | x( 8); \ | ||
81 | x( 9); \ | ||
82 | x(10); \ | ||
83 | x(11); \ | ||
84 | x(12); \ | ||
85 | x(13); \ | ||
86 | x(14); \ | ||
87 | x(15); | ||
88 | |||
89 | #define FOR_EACH_WORD_REV(x) \ | ||
90 | x(15); \ | ||
91 | x(14); \ | ||
92 | x(13); \ | ||
93 | x(12); \ | ||
94 | x(11); \ | ||
95 | x(10); \ | ||
96 | x( 9); \ | ||
97 | x( 8); \ | ||
98 | x( 7); \ | ||
99 | x( 6); \ | ||
100 | x( 5); \ | ||
101 | x( 4); \ | ||
102 | x( 3); \ | ||
103 | x( 2); \ | ||
104 | x( 1); \ | ||
105 | x( 0); | ||
106 | |||
107 | #define PLUS_ONE_0 1 | ||
108 | #define PLUS_ONE_1 2 | ||
109 | #define PLUS_ONE_2 3 | ||
110 | #define PLUS_ONE_3 4 | ||
111 | #define PLUS_ONE_4 5 | ||
112 | #define PLUS_ONE_5 6 | ||
113 | #define PLUS_ONE_6 7 | ||
114 | #define PLUS_ONE_7 8 | ||
115 | #define PLUS_ONE_8 9 | ||
116 | #define PLUS_ONE_9 10 | ||
117 | #define PLUS_ONE_10 11 | ||
118 | #define PLUS_ONE_11 12 | ||
119 | #define PLUS_ONE_12 13 | ||
120 | #define PLUS_ONE_13 14 | ||
121 | #define PLUS_ONE_14 15 | ||
122 | #define PLUS_ONE_15 16 | ||
123 | #define PLUS_ONE(x) PLUS_ONE_ ## x | ||
124 | #define _CONCAT3(a,b,c) a ## b ## c | ||
125 | #define CONCAT3(a,b,c) _CONCAT3(a,b,c) | ||
126 | |||
127 | #define STORE_UNALIGNED(x) \ | ||
128 | CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ | ||
129 | .if (x != 12); \ | ||
130 | lw T0, (x*4)(STATE); \ | ||
131 | .endif; \ | ||
132 | lwl T1, (x*4)+MSB ## (IN); \ | ||
133 | lwr T1, (x*4)+LSB ## (IN); \ | ||
134 | .if (x == 12); \ | ||
135 | addu X ## x, NONCE_0; \ | ||
136 | .else; \ | ||
137 | addu X ## x, T0; \ | ||
138 | .endif; \ | ||
139 | CPU_TO_LE32(X ## x); \ | ||
140 | xor X ## x, T1; \ | ||
141 | swl X ## x, (x*4)+MSB ## (OUT); \ | ||
142 | swr X ## x, (x*4)+LSB ## (OUT); | ||
143 | |||
144 | #define STORE_ALIGNED(x) \ | ||
145 | CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ | ||
146 | .if (x != 12); \ | ||
147 | lw T0, (x*4)(STATE); \ | ||
148 | .endif; \ | ||
149 | lw T1, (x*4) ## (IN); \ | ||
150 | .if (x == 12); \ | ||
151 | addu X ## x, NONCE_0; \ | ||
152 | .else; \ | ||
153 | addu X ## x, T0; \ | ||
154 | .endif; \ | ||
155 | CPU_TO_LE32(X ## x); \ | ||
156 | xor X ## x, T1; \ | ||
157 | sw X ## x, (x*4) ## (OUT); | ||
158 | |||
159 | /* Jump table macro. | ||
160 | * Used for setup and handling the last bytes, which are not multiple of 4. | ||
161 | * X15 is free to store Xn | ||
162 | * Every jumptable entry must be equal in size. | ||
163 | */ | ||
164 | #define JMPTBL_ALIGNED(x) \ | ||
165 | .Lchacha_mips_jmptbl_aligned_ ## x: ; \ | ||
166 | .set noreorder; \ | ||
167 | b .Lchacha_mips_xor_aligned_ ## x ## _b; \ | ||
168 | .if (x == 12); \ | ||
169 | addu SAVED_X, X ## x, NONCE_0; \ | ||
170 | .else; \ | ||
171 | addu SAVED_X, X ## x, SAVED_CA; \ | ||
172 | .endif; \ | ||
173 | .set reorder | ||
174 | |||
175 | #define JMPTBL_UNALIGNED(x) \ | ||
176 | .Lchacha_mips_jmptbl_unaligned_ ## x: ; \ | ||
177 | .set noreorder; \ | ||
178 | b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ | ||
179 | .if (x == 12); \ | ||
180 | addu SAVED_X, X ## x, NONCE_0; \ | ||
181 | .else; \ | ||
182 | addu SAVED_X, X ## x, SAVED_CA; \ | ||
183 | .endif; \ | ||
184 | .set reorder | ||
185 | |||
186 | #define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ | ||
187 | addu X(A), X(K); \ | ||
188 | addu X(B), X(L); \ | ||
189 | addu X(C), X(M); \ | ||
190 | addu X(D), X(N); \ | ||
191 | xor X(V), X(A); \ | ||
192 | xor X(W), X(B); \ | ||
193 | xor X(Y), X(C); \ | ||
194 | xor X(Z), X(D); \ | ||
195 | rotl X(V), S; \ | ||
196 | rotl X(W), S; \ | ||
197 | rotl X(Y), S; \ | ||
198 | rotl X(Z), S; | ||
199 | |||
200 | .text | ||
201 | .set reorder | ||
202 | .set noat | ||
203 | .globl chacha_crypt_arch | ||
204 | .ent chacha_crypt_arch | ||
205 | chacha_crypt_arch: | ||
206 | .frame $sp, STACK_SIZE, $ra | ||
207 | |||
208 | /* Load number of rounds */ | ||
209 | lw $at, 16($sp) | ||
210 | |||
211 | addiu $sp, -STACK_SIZE | ||
212 | |||
213 | /* Return bytes = 0. */ | ||
214 | beqz BYTES, .Lchacha_mips_end | ||
215 | |||
216 | lw NONCE_0, 48(STATE) | ||
217 | |||
218 | /* Save s0-s7 */ | ||
219 | sw $s0, 0($sp) | ||
220 | sw $s1, 4($sp) | ||
221 | sw $s2, 8($sp) | ||
222 | sw $s3, 12($sp) | ||
223 | sw $s4, 16($sp) | ||
224 | sw $s5, 20($sp) | ||
225 | sw $s6, 24($sp) | ||
226 | sw $s7, 28($sp) | ||
227 | |||
228 | /* Test IN or OUT is unaligned. | ||
229 | * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 | ||
230 | */ | ||
231 | or IS_UNALIGNED, IN, OUT | ||
232 | andi IS_UNALIGNED, 0x3 | ||
233 | |||
234 | b .Lchacha_rounds_start | ||
235 | |||
236 | .align 4 | ||
237 | .Loop_chacha_rounds: | ||
238 | addiu IN, CHACHA20_BLOCK_SIZE | ||
239 | addiu OUT, CHACHA20_BLOCK_SIZE | ||
240 | addiu NONCE_0, 1 | ||
241 | |||
242 | .Lchacha_rounds_start: | ||
243 | lw X0, 0(STATE) | ||
244 | lw X1, 4(STATE) | ||
245 | lw X2, 8(STATE) | ||
246 | lw X3, 12(STATE) | ||
247 | |||
248 | lw X4, 16(STATE) | ||
249 | lw X5, 20(STATE) | ||
250 | lw X6, 24(STATE) | ||
251 | lw X7, 28(STATE) | ||
252 | lw X8, 32(STATE) | ||
253 | lw X9, 36(STATE) | ||
254 | lw X10, 40(STATE) | ||
255 | lw X11, 44(STATE) | ||
256 | |||
257 | move X12, NONCE_0 | ||
258 | lw X13, 52(STATE) | ||
259 | lw X14, 56(STATE) | ||
260 | lw X15, 60(STATE) | ||
261 | |||
262 | .Loop_chacha_xor_rounds: | ||
263 | addiu $at, -2 | ||
264 | AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); | ||
265 | AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); | ||
266 | AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); | ||
267 | AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); | ||
268 | AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); | ||
269 | AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); | ||
270 | AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); | ||
271 | AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); | ||
272 | bnez $at, .Loop_chacha_xor_rounds | ||
273 | |||
274 | addiu BYTES, -(CHACHA20_BLOCK_SIZE) | ||
275 | |||
276 | /* Is data src/dst unaligned? Jump */ | ||
277 | bnez IS_UNALIGNED, .Loop_chacha_unaligned | ||
278 | |||
279 | /* Set number rounds here to fill delayslot. */ | ||
280 | lw $at, (STACK_SIZE+16)($sp) | ||
281 | |||
282 | /* BYTES < 0, it has no full block. */ | ||
283 | bltz BYTES, .Lchacha_mips_no_full_block_aligned | ||
284 | |||
285 | FOR_EACH_WORD_REV(STORE_ALIGNED) | ||
286 | |||
287 | /* BYTES > 0? Loop again. */ | ||
288 | bgtz BYTES, .Loop_chacha_rounds | ||
289 | |||
290 | /* Place this here to fill delay slot */ | ||
291 | addiu NONCE_0, 1 | ||
292 | |||
293 | /* BYTES < 0? Handle last bytes */ | ||
294 | bltz BYTES, .Lchacha_mips_xor_bytes | ||
295 | |||
296 | .Lchacha_mips_xor_done: | ||
297 | /* Restore used registers */ | ||
298 | lw $s0, 0($sp) | ||
299 | lw $s1, 4($sp) | ||
300 | lw $s2, 8($sp) | ||
301 | lw $s3, 12($sp) | ||
302 | lw $s4, 16($sp) | ||
303 | lw $s5, 20($sp) | ||
304 | lw $s6, 24($sp) | ||
305 | lw $s7, 28($sp) | ||
306 | |||
307 | /* Write NONCE_0 back to right location in state */ | ||
308 | sw NONCE_0, 48(STATE) | ||
309 | |||
310 | .Lchacha_mips_end: | ||
311 | addiu $sp, STACK_SIZE | ||
312 | jr $ra | ||
313 | |||
314 | .Lchacha_mips_no_full_block_aligned: | ||
315 | /* Restore the offset on BYTES */ | ||
316 | addiu BYTES, CHACHA20_BLOCK_SIZE | ||
317 | |||
318 | /* Get number of full WORDS */ | ||
319 | andi $at, BYTES, MASK_U32 | ||
320 | |||
321 | /* Load upper half of jump table addr */ | ||
322 | lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) | ||
323 | |||
324 | /* Calculate lower half jump table offset */ | ||
325 | ins T0, $at, 1, 6 | ||
326 | |||
327 | /* Add offset to STATE */ | ||
328 | addu T1, STATE, $at | ||
329 | |||
330 | /* Add lower half jump table addr */ | ||
331 | addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) | ||
332 | |||
333 | /* Read value from STATE */ | ||
334 | lw SAVED_CA, 0(T1) | ||
335 | |||
336 | /* Store remaining bytecounter as negative value */ | ||
337 | subu BYTES, $at, BYTES | ||
338 | |||
339 | jr T0 | ||
340 | |||
341 | /* Jump table */ | ||
342 | FOR_EACH_WORD(JMPTBL_ALIGNED) | ||
343 | |||
344 | |||
345 | .Loop_chacha_unaligned: | ||
346 | /* Set number rounds here to fill delayslot. */ | ||
347 | lw $at, (STACK_SIZE+16)($sp) | ||
348 | |||
349 | /* BYTES > 0, it has no full block. */ | ||
350 | bltz BYTES, .Lchacha_mips_no_full_block_unaligned | ||
351 | |||
352 | FOR_EACH_WORD_REV(STORE_UNALIGNED) | ||
353 | |||
354 | /* BYTES > 0? Loop again. */ | ||
355 | bgtz BYTES, .Loop_chacha_rounds | ||
356 | |||
357 | /* Write NONCE_0 back to right location in state */ | ||
358 | sw NONCE_0, 48(STATE) | ||
359 | |||
360 | .set noreorder | ||
361 | /* Fall through to byte handling */ | ||
362 | bgez BYTES, .Lchacha_mips_xor_done | ||
363 | .Lchacha_mips_xor_unaligned_0_b: | ||
364 | .Lchacha_mips_xor_aligned_0_b: | ||
365 | /* Place this here to fill delay slot */ | ||
366 | addiu NONCE_0, 1 | ||
367 | .set reorder | ||
368 | |||
369 | .Lchacha_mips_xor_bytes: | ||
370 | addu IN, $at | ||
371 | addu OUT, $at | ||
372 | /* First byte */ | ||
373 | lbu T1, 0(IN) | ||
374 | addiu $at, BYTES, 1 | ||
375 | CPU_TO_LE32(SAVED_X) | ||
376 | ROTR(SAVED_X) | ||
377 | xor T1, SAVED_X | ||
378 | sb T1, 0(OUT) | ||
379 | beqz $at, .Lchacha_mips_xor_done | ||
380 | /* Second byte */ | ||
381 | lbu T1, 1(IN) | ||
382 | addiu $at, BYTES, 2 | ||
383 | ROTx SAVED_X, 8 | ||
384 | xor T1, SAVED_X | ||
385 | sb T1, 1(OUT) | ||
386 | beqz $at, .Lchacha_mips_xor_done | ||
387 | /* Third byte */ | ||
388 | lbu T1, 2(IN) | ||
389 | ROTx SAVED_X, 8 | ||
390 | xor T1, SAVED_X | ||
391 | sb T1, 2(OUT) | ||
392 | b .Lchacha_mips_xor_done | ||
393 | |||
394 | .Lchacha_mips_no_full_block_unaligned: | ||
395 | /* Restore the offset on BYTES */ | ||
396 | addiu BYTES, CHACHA20_BLOCK_SIZE | ||
397 | |||
398 | /* Get number of full WORDS */ | ||
399 | andi $at, BYTES, MASK_U32 | ||
400 | |||
401 | /* Load upper half of jump table addr */ | ||
402 | lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) | ||
403 | |||
404 | /* Calculate lower half jump table offset */ | ||
405 | ins T0, $at, 1, 6 | ||
406 | |||
407 | /* Add offset to STATE */ | ||
408 | addu T1, STATE, $at | ||
409 | |||
410 | /* Add lower half jump table addr */ | ||
411 | addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) | ||
412 | |||
413 | /* Read value from STATE */ | ||
414 | lw SAVED_CA, 0(T1) | ||
415 | |||
416 | /* Store remaining bytecounter as negative value */ | ||
417 | subu BYTES, $at, BYTES | ||
418 | |||
419 | jr T0 | ||
420 | |||
421 | /* Jump table */ | ||
422 | FOR_EACH_WORD(JMPTBL_UNALIGNED) | ||
423 | .end chacha_crypt_arch | ||
424 | .set at | ||
425 | |||
426 | /* Input arguments | ||
427 | * STATE $a0 | ||
428 | * OUT $a1 | ||
429 | * NROUND $a2 | ||
430 | */ | ||
431 | |||
432 | #undef X12 | ||
433 | #undef X13 | ||
434 | #undef X14 | ||
435 | #undef X15 | ||
436 | |||
437 | #define X12 $a3 | ||
438 | #define X13 $at | ||
439 | #define X14 $v0 | ||
440 | #define X15 STATE | ||
441 | |||
442 | .set noat | ||
443 | .globl hchacha_block_arch | ||
444 | .ent hchacha_block_arch | ||
445 | hchacha_block_arch: | ||
446 | .frame $sp, STACK_SIZE, $ra | ||
447 | |||
448 | addiu $sp, -STACK_SIZE | ||
449 | |||
450 | /* Save X11(s6) */ | ||
451 | sw X11, 0($sp) | ||
452 | |||
453 | lw X0, 0(STATE) | ||
454 | lw X1, 4(STATE) | ||
455 | lw X2, 8(STATE) | ||
456 | lw X3, 12(STATE) | ||
457 | lw X4, 16(STATE) | ||
458 | lw X5, 20(STATE) | ||
459 | lw X6, 24(STATE) | ||
460 | lw X7, 28(STATE) | ||
461 | lw X8, 32(STATE) | ||
462 | lw X9, 36(STATE) | ||
463 | lw X10, 40(STATE) | ||
464 | lw X11, 44(STATE) | ||
465 | lw X12, 48(STATE) | ||
466 | lw X13, 52(STATE) | ||
467 | lw X14, 56(STATE) | ||
468 | lw X15, 60(STATE) | ||
469 | |||
470 | .Loop_hchacha_xor_rounds: | ||
471 | addiu $a2, -2 | ||
472 | AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); | ||
473 | AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); | ||
474 | AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); | ||
475 | AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); | ||
476 | AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); | ||
477 | AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); | ||
478 | AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); | ||
479 | AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); | ||
480 | bnez $a2, .Loop_hchacha_xor_rounds | ||
481 | |||
482 | /* Restore used register */ | ||
483 | lw X11, 0($sp) | ||
484 | |||
485 | sw X0, 0(OUT) | ||
486 | sw X1, 4(OUT) | ||
487 | sw X2, 8(OUT) | ||
488 | sw X3, 12(OUT) | ||
489 | sw X12, 16(OUT) | ||
490 | sw X13, 20(OUT) | ||
491 | sw X14, 24(OUT) | ||
492 | sw X15, 28(OUT) | ||
493 | |||
494 | addiu $sp, STACK_SIZE | ||
495 | jr $ra | ||
496 | .end hchacha_block_arch | ||
497 | .set at | ||