1/* SPDX-License-Identifier: BSD-2-Clause */
2/*
3 * Copyright (c) 2022 Linaro Limited
4 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5 */
6
7/* Core SM3 secure hash using ARMv8.2 Crypto Extensions */
8
9#include <asm.S>
10#include <arm64_macros.S>
11
12	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
13	.set		.Lv\b\().4s, \b
14	.endr
15
16	.macro		sm3partw1, rd, rn, rm
17	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
18	.endm
19
20	.macro		sm3partw2, rd, rn, rm
21	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
22	.endm
23
24	.macro		sm3ss1, rd, rn, rm, ra
25	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
26	.endm
27
28	.macro		sm3tt1a, rd, rn, rm, imm2
29	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
30	.endm
31
32	.macro		sm3tt1b, rd, rn, rm, imm2
33	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
34	.endm
35
36	.macro		sm3tt2a, rd, rn, rm, imm2
37	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
38	.endm
39
40	.macro		sm3tt2b, rd, rn, rm, imm2
41	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
42	.endm
43
44	.macro		round, ab, s0, t0, t1, i
45	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
46	shl		\t1\().4s, \t0\().4s, #1
47	sri		\t1\().4s, \t0\().4s, #31
48	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
49	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
50	.endm
51
52	.macro		qround, ab, s0, s1, s2, s3, s4
53	.ifnb		\s4
54	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
55	ext		v6.16b, \s0\().16b, \s1\().16b, #12
56	ext		v7.16b, \s2\().16b, \s3\().16b, #8
57	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
58	.endif
59
60	eor		v10.16b, \s0\().16b, \s1\().16b
61
62	round		\ab, \s0, v11, v12, 0
63	round		\ab, \s0, v12, v11, 1
64	round		\ab, \s0, v11, v12, 2
65	round		\ab, \s0, v12, v11, 3
66
67	.ifnb		\s4
68	sm3partw2	\s4\().4s, v7.4s, v6.4s
69	.endif
70	.endm
71
72	/*
73	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
74	 *                       int blocks)
75	 */
76FUNC sm3_ce_transform , :
77	/* load state */
78	ld1		{v8.4s-v9.4s}, [x0]
79	rev64		v8.4s, v8.4s
80	rev64		v9.4s, v9.4s
81	ext		v8.16b, v8.16b, v8.16b, #8
82	ext		v9.16b, v9.16b, v9.16b, #8
83
84	adr_l		x8, .Lt
85	ldp		s13, s14, [x8]
86
87	/* load input */
880:	ld1		{v0.16b-v3.16b}, [x1], #64
89	sub		w2, w2, #1
90
91	mov		v15.16b, v8.16b
92	mov		v16.16b, v9.16b
93
94	rev32		v0.16b, v0.16b
95	rev32		v1.16b, v1.16b
96	rev32		v2.16b, v2.16b
97	rev32		v3.16b, v3.16b
98
99	ext		v11.16b, v13.16b, v13.16b, #4
100
101	qround		a, v0, v1, v2, v3, v4
102	qround		a, v1, v2, v3, v4, v0
103	qround		a, v2, v3, v4, v0, v1
104	qround		a, v3, v4, v0, v1, v2
105
106	ext		v11.16b, v14.16b, v14.16b, #4
107
108	qround		b, v4, v0, v1, v2, v3
109	qround		b, v0, v1, v2, v3, v4
110	qround		b, v1, v2, v3, v4, v0
111	qround		b, v2, v3, v4, v0, v1
112	qround		b, v3, v4, v0, v1, v2
113	qround		b, v4, v0, v1, v2, v3
114	qround		b, v0, v1, v2, v3, v4
115	qround		b, v1, v2, v3, v4, v0
116	qround		b, v2, v3, v4, v0, v1
117	qround		b, v3, v4
118	qround		b, v4, v0
119	qround		b, v0, v1
120
121	eor		v8.16b, v8.16b, v15.16b
122	eor		v9.16b, v9.16b, v16.16b
123
124	/* handled all input blocks? */
125	cbnz		w2, 0b
126
127	/* save state */
128	rev64		v8.4s, v8.4s
129	rev64		v9.4s, v9.4s
130	ext		v8.16b, v8.16b, v8.16b, #8
131	ext		v9.16b, v9.16b, v9.16b, #8
132	st1		{v8.4s-v9.4s}, [x0]
133	ret
134END_FUNC sm3_ce_transform
135
136	.section	".rodata", "a"
137	.align		3
138LOCAL_DATA .Lt , :
139	.word		0x79cc4519, 0x9d8a7a87
140END_DATA .Lt
141
142BTI(emit_aarch64_feature_1_and     GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
143