1// SPDX-License-Identifier: GPL-2.0-only
2// Copyright (C) 2021 ARM Limited.
3// Original author: Mark Brown <broonie@kernel.org>
4//
5// Scalable Matrix Extension ZA context switch test
6// Repeatedly writes unique test patterns into each ZA tile
7// and reads them back to verify integrity.
8//
9// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
10// (leave it running for as long as you want...)
11// kill $pids
12
13#include <asm/unistd.h>
14#include "assembler.h"
15#include "asm-offsets.h"
16#include "sme-inst.h"
17
18.arch_extension sve
19
20#define MAXVL     2048
21#define MAXVL_B   (MAXVL / 8)
22
23// Declare some storage space to shadow ZA register contents and a
24// scratch buffer for a vector.
25.pushsection .text
26.data
27.align 4
28zaref:
29	.space	MAXVL_B * MAXVL_B
30scratch:
31	.space	MAXVL_B
32.popsection
33
34// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
35// Clobbers x0-x3
36function memcpy
37	cmp	x2, #0
38	b.eq	1f
390:	ldrb	w3, [x1], #1
40	strb	w3, [x0], #1
41	subs	x2, x2, #1
42	b.ne	0b
431:	ret
44endfunction
45
46// Generate a test pattern for storage in ZA
47// x0: pid
48// x1: row in ZA
49// x2: generation
50
51// These values are used to constuct a 32-bit pattern that is repeated in the
52// scratch buffer as many times as will fit:
53// bits 31:28	generation number (increments once per test_loop)
54// bits 27:16	pid
55// bits 15: 8	row number
56// bits  7: 0	32-bit lane index
57
58function pattern
59	mov	w3, wzr
60	bfi	w3, w0, #16, #12	// PID
61	bfi	w3, w1, #8, #8		// Row
62	bfi	w3, w2, #28, #4		// Generation
63
64	ldr	x0, =scratch
65	mov	w1, #MAXVL_B / 4
66
670:	str	w3, [x0], #4
68	add	w3, w3, #1		// Lane
69	subs	w1, w1, #1
70	b.ne	0b
71
72	ret
73endfunction
74
75// Get the address of shadow data for ZA horizontal vector xn
76.macro _adrza xd, xn, nrtmp
77	ldr	\xd, =zaref
78	rdsvl	\nrtmp, 1
79	madd	\xd, x\nrtmp, \xn, \xd
80.endm
81
82// Set up test pattern in a ZA horizontal vector
83// x0: pid
84// x1: row number
85// x2: generation
86function setup_za
87	mov	x4, x30
88	mov	x12, x1			// Use x12 for vector select
89
90	bl	pattern			// Get pattern in scratch buffer
91	_adrza	x0, x12, 2		// Shadow buffer pointer to x0 and x5
92	mov	x5, x0
93	ldr	x1, =scratch
94	bl	memcpy			// length set up in x2 by _adrza
95
96	_ldr_za 12, 5			// load vector w12 from pointer x5
97
98	ret	x4
99endfunction
100
101// Trivial memory compare: compare x2 bytes starting at address x0 with
102// bytes starting at address x1.
103// Returns only if all bytes match; otherwise, the program is aborted.
104// Clobbers x0-x5.
105function memcmp
106	cbz	x2, 2f
107
108	stp	x0, x1, [sp, #-0x20]!
109	str	x2, [sp, #0x10]
110
111	mov	x5, #0
1120:	ldrb	w3, [x0, x5]
113	ldrb	w4, [x1, x5]
114	add	x5, x5, #1
115	cmp	w3, w4
116	b.ne	1f
117	subs	x2, x2, #1
118	b.ne	0b
119
1201:	ldr	x2, [sp, #0x10]
121	ldp	x0, x1, [sp], #0x20
122	b.ne	barf
123
1242:	ret
125endfunction
126
127// Verify that a ZA vector matches its shadow in memory, else abort
128// x0: row number
129// Clobbers x0-x7 and x12.
130function check_za
131	mov	x3, x30
132
133	mov	x12, x0
134	_adrza	x5, x0, 6		// pointer to expected value in x5
135	mov	x4, x0
136	ldr	x7, =scratch		// x7 is scratch
137
138	mov	x0, x7			// Poison scratch
139	mov	x1, x6
140	bl	memfill_ae
141
142	_str_za 12, 7			// save vector w12 to pointer x7
143
144	mov	x0, x5
145	mov	x1, x7
146	mov	x2, x6
147	mov	x30, x3
148	b	memcmp
149endfunction
150
151// Any SME register modified here can cause corruption in the main
152// thread -- but *only* the locations modified here.
153function irritator_handler
154	// Increment the irritation signal count (x23):
155	ldr	x0, [x2, #ucontext_regs + 8 * 23]
156	add	x0, x0, #1
157	str	x0, [x2, #ucontext_regs + 8 * 23]
158
159	// Corrupt some random ZA data
160#if 0
161	adr	x0, .text + (irritator_handler - .text) / 16 * 16
162	movi	v0.8b, #1
163	movi	v9.16b, #2
164	movi	v31.8b, #3
165#endif
166
167	ret
168endfunction
169
170function tickle_handler
171	// Increment the signal count (x23):
172	ldr	x0, [x2, #ucontext_regs + 8 * 23]
173	add	x0, x0, #1
174	str	x0, [x2, #ucontext_regs + 8 * 23]
175
176	ret
177endfunction
178
179function terminate_handler
180	mov	w21, w0
181	mov	x20, x2
182
183	puts	"Terminated by signal "
184	mov	w0, w21
185	bl	putdec
186	puts	", no error, iterations="
187	ldr	x0, [x20, #ucontext_regs + 8 * 22]
188	bl	putdec
189	puts	", signals="
190	ldr	x0, [x20, #ucontext_regs + 8 * 23]
191	bl	putdecn
192
193	mov	x0, #0
194	mov	x8, #__NR_exit
195	svc	#0
196endfunction
197
198// w0: signal number
199// x1: sa_action
200// w2: sa_flags
201// Clobbers x0-x6,x8
202function setsignal
203	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
204
205	mov	w4, w0
206	mov	x5, x1
207	mov	w6, w2
208
209	add	x0, sp, #16
210	mov	x1, #sa_sz
211	bl	memclr
212
213	mov	w0, w4
214	add	x1, sp, #16
215	str	w6, [x1, #sa_flags]
216	str	x5, [x1, #sa_handler]
217	mov	x2, #0
218	mov	x3, #sa_mask_sz
219	mov	x8, #__NR_rt_sigaction
220	svc	#0
221
222	cbz	w0, 1f
223
224	puts	"sigaction failure\n"
225	b	.Labort
226
2271:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
228	ret
229endfunction
230
231// Main program entry point
232.globl _start
233function _start
234	mov	x23, #0		// signal count
235
236	mov	w0, #SIGINT
237	adr	x1, terminate_handler
238	mov	w2, #SA_SIGINFO
239	bl	setsignal
240
241	mov	w0, #SIGTERM
242	adr	x1, terminate_handler
243	mov	w2, #SA_SIGINFO
244	bl	setsignal
245
246	mov	w0, #SIGUSR1
247	adr	x1, irritator_handler
248	mov	w2, #SA_SIGINFO
249	orr	w2, w2, #SA_NODEFER
250	bl	setsignal
251
252	mov	w0, #SIGUSR2
253	adr	x1, tickle_handler
254	mov	w2, #SA_SIGINFO
255	orr	w2, w2, #SA_NODEFER
256	bl	setsignal
257
258	puts	"Streaming mode "
259	smstart_za
260
261	// Sanity-check and report the vector length
262
263	rdsvl	19, 8
264	cmp	x19, #128
265	b.lo	1f
266	cmp	x19, #2048
267	b.hi	1f
268	tst	x19, #(8 - 1)
269	b.eq	2f
270
2711:	puts	"bad vector length: "
272	mov	x0, x19
273	bl	putdecn
274	b	.Labort
275
2762:	puts	"vector length:\t"
277	mov	x0, x19
278	bl	putdec
279	puts	" bits\n"
280
281	// Obtain our PID, to ensure test pattern uniqueness between processes
282	mov	x8, #__NR_getpid
283	svc	#0
284	mov	x20, x0
285
286	puts	"PID:\t"
287	mov	x0, x20
288	bl	putdecn
289
290	mov	x22, #0		// generation number, increments per iteration
291.Ltest_loop:
292	rdsvl	0, 8
293	cmp	x0, x19
294	b.ne	vl_barf
295
296	rdsvl	21, 1		// Set up ZA & shadow with test pattern
2970:	mov	x0, x20
298	sub	x1, x21, #1
299	mov	x2, x22
300	bl	setup_za
301	subs	x21, x21, #1
302	b.ne	0b
303
304	mov	x8, #__NR_sched_yield	// encourage preemption
3051:
306	svc	#0
307
308	mrs	x0, S3_3_C4_C2_2	// SVCR should have ZA=1,SM=0
309	and	x1, x0, #3
310	cmp	x1, #2
311	b.ne	svcr_barf
312
313	rdsvl	21, 1			// Verify that the data made it through
314	rdsvl	24, 1			// Verify that the data made it through
3150:	sub	x0, x24, x21
316	bl	check_za
317	subs	x21, x21, #1
318	bne	0b
319
320	add	x22, x22, #1	// Everything still working
321	b	.Ltest_loop
322
323.Labort:
324	mov	x0, #0
325	mov	x1, #SIGABRT
326	mov	x8, #__NR_kill
327	svc	#0
328endfunction
329
330function barf
331// fpsimd.c acitivty log dump hack
332//	ldr	w0, =0xdeadc0de
333//	mov	w8, #__NR_exit
334//	svc	#0
335// end hack
336	smstop
337	mov	x10, x0	// expected data
338	mov	x11, x1	// actual data
339	mov	x12, x2	// data size
340
341	puts	"Mismatch: PID="
342	mov	x0, x20
343	bl	putdec
344	puts	", iteration="
345	mov	x0, x22
346	bl	putdec
347	puts	", row="
348	mov	x0, x21
349	bl	putdecn
350	puts	"\tExpected ["
351	mov	x0, x10
352	mov	x1, x12
353	bl	dumphex
354	puts	"]\n\tGot      ["
355	mov	x0, x11
356	mov	x1, x12
357	bl	dumphex
358	puts	"]\n"
359
360	mov	x8, #__NR_getpid
361	svc	#0
362// fpsimd.c acitivty log dump hack
363//	ldr	w0, =0xdeadc0de
364//	mov	w8, #__NR_exit
365//	svc	#0
366// ^ end of hack
367	mov	x1, #SIGABRT
368	mov	x8, #__NR_kill
369	svc	#0
370//	mov	x8, #__NR_exit
371//	mov	x1, #1
372//	svc	#0
373endfunction
374
375function vl_barf
376	mov	x10, x0
377
378	puts	"Bad active VL: "
379	mov	x0, x10
380	bl	putdecn
381
382	mov	x8, #__NR_exit
383	mov	x1, #1
384	svc	#0
385endfunction
386
387function svcr_barf
388	mov	x10, x0
389
390	puts	"Bad SVCR: "
391	mov	x0, x10
392	bl	putdecn
393
394	mov	x8, #__NR_exit
395	mov	x1, #1
396	svc	#0
397endfunction
398