1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Arm SVE assembly routines
4 *
5 * Copyright (C) 2022 ARM Ltd.
6 *
7 * Some macros and instruction encoding in this file are taken from linux 6.1.1,
8 * file arch/arm64/include/asm/fpsimdmacros.h, some of them are a modified
9 * version.
10 */
11
12/* Sanity-check macros to help avoid encoding garbage instructions */
13
14.macro _check_general_reg nr
15    .if (\nr) < 0 || (\nr) > 30
16        .error "Bad register number \nr."
17    .endif
18.endm
19
20.macro _sve_check_zreg znr
21    .if (\znr) < 0 || (\znr) > 31
22        .error "Bad Scalable Vector Extension vector register number \znr."
23    .endif
24.endm
25
26.macro _sve_check_preg pnr
27    .if (\pnr) < 0 || (\pnr) > 15
28        .error "Bad Scalable Vector Extension predicate register number \pnr."
29    .endif
30.endm
31
32.macro _check_num n, min, max
33    .if (\n) < (\min) || (\n) > (\max)
34        .error "Number \n out of range [\min,\max]"
35    .endif
36.endm
37
38/* SVE instruction encodings for non-SVE-capable assemblers */
39/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
40
41/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
42.macro _sve_str_v nz, nxbase, offset=0
43    _sve_check_zreg \nz
44    _check_general_reg \nxbase
45    _check_num (\offset), -0x100, 0xff
46    .inst 0xe5804000                \
47        | (\nz)                     \
48        | ((\nxbase) << 5)          \
49        | (((\offset) & 7) << 10)   \
50        | (((\offset) & 0x1f8) << 13)
51.endm
52
53/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
54.macro _sve_ldr_v nz, nxbase, offset=0
55    _sve_check_zreg \nz
56    _check_general_reg \nxbase
57    _check_num (\offset), -0x100, 0xff
58    .inst 0x85804000                \
59        | (\nz)                     \
60        | ((\nxbase) << 5)          \
61        | (((\offset) & 7) << 10)   \
62        | (((\offset) & 0x1f8) << 13)
63.endm
64
65/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
66.macro _sve_str_p np, nxbase, offset=0
67    _sve_check_preg \np
68    _check_general_reg \nxbase
69    _check_num (\offset), -0x100, 0xff
70    .inst 0xe5800000                \
71        | (\np)                     \
72        | ((\nxbase) << 5)          \
73        | (((\offset) & 7) << 10)   \
74        | (((\offset) & 0x1f8) << 13)
75.endm
76
77/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
78.macro _sve_ldr_p np, nxbase, offset=0
79    _sve_check_preg \np
80    _check_general_reg \nxbase
81    _check_num (\offset), -0x100, 0xff
82    .inst 0x85800000                \
83        | (\np)                     \
84        | ((\nxbase) << 5)          \
85        | (((\offset) & 7) << 10)   \
86        | (((\offset) & 0x1f8) << 13)
87.endm
88
89/* RDVL X\nx, #\imm */
90.macro _sve_rdvl nx, imm
91    _check_general_reg \nx
92    _check_num (\imm), -0x20, 0x1f
93    .inst 0x04bf5000                \
94        | (\nx)                     \
95        | (((\imm) & 0x3f) << 5)
96.endm
97
98/* RDFFR (unpredicated): RDFFR P\np.B */
99.macro _sve_rdffr np
100    _sve_check_preg \np
101    .inst 0x2519f000                \
102        | (\np)
103.endm
104
105/* WRFFR P\np.B */
106.macro _sve_wrffr np
107    _sve_check_preg \np
108    .inst 0x25289000                \
109        | ((\np) << 5)
110.endm
111
112.macro __for from:req, to:req
113    .if (\from) == (\to)
114        _for__body %\from
115    .else
116        __for %\from, %((\from) + ((\to) - (\from)) / 2)
117        __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
118    .endif
119.endm
120
121.macro _for var:req, from:req, to:req, insn:vararg
122    .macro _for__body \var:req
123        .noaltmacro
124        \insn
125        .altmacro
126    .endm
127
128    .altmacro
129    __for \from, \to
130    .noaltmacro
131
132    .purgem _for__body
133.endm
134
135/*
136 * sve_save and sve_load are different from the Linux version because the
137 * buffers to save the context are different from Xen and for example Linux
138 * is using this macro to save/restore also fpsr and fpcr while we do it in C
139 */
140
141.macro sve_save nxzffrctx, nxpctx, save_ffr
142    _for n, 0, 31, _sve_str_v \n, \nxzffrctx, \n - 32
143    _for n, 0, 15, _sve_str_p \n, \nxpctx, \n
144        cbz \save_ffr, 1f
145        _sve_rdffr 0
146        _sve_str_p 0, \nxzffrctx
147        _sve_ldr_p 0, \nxpctx
148        b 2f
1491:
150        str xzr, [x\nxzffrctx]      // Zero out FFR
1512:
152.endm
153
154.macro sve_load nxzffrctx, nxpctx, restore_ffr
155    _for n, 0, 31, _sve_ldr_v \n, \nxzffrctx, \n - 32
156        cbz \restore_ffr, 1f
157        _sve_ldr_p 0, \nxzffrctx
158        _sve_wrffr 0
1591:
160    _for n, 0, 15, _sve_ldr_p \n, \nxpctx, \n
161.endm
162
163/* Gets the current vector register size in bytes */
164FUNC(sve_get_hw_vl)
165    _sve_rdvl 0, 1
166    ret
167END(sve_get_hw_vl)
168
169/*
170 * Save the SVE context
171 *
172 * x0 - pointer to buffer for Z0-31 + FFR
173 * x1 - pointer to buffer for P0-15
174 * x2 - Save FFR if non-zero
175 */
176FUNC(sve_save_ctx)
177    sve_save 0, 1, x2
178    ret
179END(sve_save_ctx)
180
181/*
182 * Load the SVE context
183 *
184 * x0 - pointer to buffer for Z0-31 + FFR
185 * x1 - pointer to buffer for P0-15
186 * x2 - Restore FFR if non-zero
187 */
188FUNC(sve_load_ctx)
189    sve_load 0, 1, x2
190    ret
191END(sve_load_ctx)
192
193/*
194 * Local variables:
195 * mode: ASM
196 * indent-tabs-mode: nil
197 * End:
198 */
199