1! Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
2!
3! Licensed under the Apache License 2.0 (the "License").  You may not use
4! this file except in compliance with the License.  You can obtain a copy
5! in the file LICENSE in the source distribution or at
6! https://www.openssl.org/source/license.html
7
8#if defined(__SUNPRO_C) && defined(__sparcv9)
9# define ABI64  /* They've said -xarch=v9 at command line */
10#elif defined(__GNUC__) && defined(__arch64__)
11# define ABI64  /* They've said -m64 at command line */
12#endif
13
14#ifdef ABI64
15  .register	%g2,#scratch
16  .register	%g3,#scratch
17# define	FRAME	-192
18# define	BIAS	2047
19#else
20# define	FRAME	-96
21# define	BIAS	0
22#endif
23
24.text
25.global	OPENSSL_atomic_add
26.type	OPENSSL_atomic_add,#function
27.align	32
28OPENSSL_atomic_add:
29#ifndef ABI64
30	subcc	%g0,1,%o2
31	.word	0x95408000	!rd	%ccr,%o2, see comment above
32	cmp	%o2,0x99
33	be	.v9
34	nop
35	save	%sp,FRAME,%sp
36	ba	.enter
37	nop
38#ifdef __sun
39! Note that you do not have to link with libthread to call thr_yield,
40! as libc provides a stub, which is overloaded the moment you link
41! with *either* libpthread or libthread...
42#define	YIELD_CPU	thr_yield
43#else
44! applies at least to Linux and FreeBSD... Feedback expected...
45#define	YIELD_CPU	sched_yield
46#endif
47.spin:	call	YIELD_CPU
48	nop
49.enter:	ld	[%i0],%i2
50	cmp	%i2,-4096
51	be	.spin
52	mov	-1,%i2
53	swap	[%i0],%i2
54	cmp	%i2,-1
55	be	.spin
56	add	%i2,%i1,%i2
57	stbar
58	st	%i2,[%i0]
59	sra	%i2,%g0,%i0
60	ret
61	restore
62.v9:
63#endif
64	ld	[%o0],%o2
651:	add	%o1,%o2,%o3
66	.word	0xd7e2100a	!cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
67	cmp	%o2,%o3
68	bne	1b
69	mov	%o3,%o2		! cas is always fetching to dest. register
70	add	%o1,%o2,%o0	! OpenSSL expects the new value
71	retl
72	sra	%o0,%g0,%o0	! we return signed int, remember?
73.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
74
75.global	_sparcv9_rdtick
76.align	32
77_sparcv9_rdtick:
78	subcc	%g0,1,%o0
79	.word	0x91408000	!rd	%ccr,%o0
80	cmp	%o0,0x99
81	bne	.notick
82	xor	%o0,%o0,%o0
83	.word	0x91410000	!rd	%tick,%o0
84	retl
85	.word	0x93323020	!srlx	%o0,32,%o1
86.notick:
87	retl
88	xor	%o1,%o1,%o1
89.type	_sparcv9_rdtick,#function
90.size	_sparcv9_rdtick,.-_sparcv9_rdtick
91
92.global	_sparcv9_vis1_probe
93.align	8
94_sparcv9_vis1_probe:
95	add	%sp,BIAS+2,%o1
96	.word	0xc19a5a40	!ldda	[%o1]ASI_FP16_P,%f0
97	retl
98	.word	0x81b00d80	!fxor	%f0,%f0,%f0
99.type	_sparcv9_vis1_probe,#function
100.size	_sparcv9_vis1_probe,.-_sparcv9_vis1_probe
101
102! Probe and instrument VIS1 instruction. Output is number of cycles it
103! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
104! is slow (documented to be 6 cycles on T2) and the core is in-order
105! single-issue, it should be possible to distinguish Tx reliably...
106! Observed return values are:
107!
108!	UltraSPARC IIe		7
109!	UltraSPARC III		7
110!	UltraSPARC T1		24
111!	SPARC T4		65(*)
112!
113! (*)	result has lesser to do with VIS instruction latencies, rdtick
114!	appears that slow, but it does the trick in sense that FP and
115!	VIS code paths are still slower than integer-only ones.
116!
117! Numbers for T2 and SPARC64 V-VII are more than welcomed.
118!
119! It would be possible to detect specifically US-T1 by instrumenting
120! fmul8ulx16, which is emulated on T1 and as such accounts for quite
121! a lot of %tick-s, couple of thousand on Linux...
122.global	_sparcv9_vis1_instrument
123.align	8
124_sparcv9_vis1_instrument:
125	.word	0x81b00d80	!fxor	%f0,%f0,%f0
126	.word	0x85b08d82	!fxor	%f2,%f2,%f2
127	.word	0x91410000	!rd	%tick,%o0
128	.word	0x81b00d80	!fxor	%f0,%f0,%f0
129	.word	0x85b08d82	!fxor	%f2,%f2,%f2
130	.word	0x93410000	!rd	%tick,%o1
131	.word	0x81b00d80	!fxor	%f0,%f0,%f0
132	.word	0x85b08d82	!fxor	%f2,%f2,%f2
133	.word	0x95410000	!rd	%tick,%o2
134	.word	0x81b00d80	!fxor	%f0,%f0,%f0
135	.word	0x85b08d82	!fxor	%f2,%f2,%f2
136	.word	0x97410000	!rd	%tick,%o3
137	.word	0x81b00d80	!fxor	%f0,%f0,%f0
138	.word	0x85b08d82	!fxor	%f2,%f2,%f2
139	.word	0x99410000	!rd	%tick,%o4
140
141	! calculate intervals
142	sub	%o1,%o0,%o0
143	sub	%o2,%o1,%o1
144	sub	%o3,%o2,%o2
145	sub	%o4,%o3,%o3
146
147	! find minimum value
148	cmp	%o0,%o1
149	.word	0x38680002	!bgu,a	%xcc,.+8
150	mov	%o1,%o0
151	cmp	%o0,%o2
152	.word	0x38680002	!bgu,a	%xcc,.+8
153	mov	%o2,%o0
154	cmp	%o0,%o3
155	.word	0x38680002	!bgu,a	%xcc,.+8
156	mov	%o3,%o0
157
158	retl
159	nop
160.type	_sparcv9_vis1_instrument,#function
161.size	_sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
162
163.global	_sparcv9_vis2_probe
164.align	8
165_sparcv9_vis2_probe:
166	retl
167	.word	0x81b00980	!bshuffle	%f0,%f0,%f0
168.type	_sparcv9_vis2_probe,#function
169.size	_sparcv9_vis2_probe,.-_sparcv9_vis2_probe
170
171.global	_sparcv9_fmadd_probe
172.align	8
173_sparcv9_fmadd_probe:
174	.word	0x81b00d80	!fxor	%f0,%f0,%f0
175	.word	0x85b08d82	!fxor	%f2,%f2,%f2
176	retl
177	.word	0x81b80440	!fmaddd	%f0,%f0,%f2,%f0
178.type	_sparcv9_fmadd_probe,#function
179.size	_sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
180
181.global	_sparcv9_rdcfr
182.align	8
183_sparcv9_rdcfr:
184	retl
185	.word	0x91468000	!rd	%asr26,%o0
186.type	_sparcv9_rdcfr,#function
187.size	_sparcv9_rdcfr,.-_sparcv9_rdcfr
188
189.global	_sparcv9_vis3_probe
190.align	8
191_sparcv9_vis3_probe:
192	retl
193	.word	0x81b022a0	!xmulx	%g0,%g0,%g0
194.type	_sparcv9_vis3_probe,#function
195.size	_sparcv9_vis3_probe,.-_sparcv9_vis3_probe
196
197.global	_sparcv9_random
198.align	8
199_sparcv9_random:
200	retl
201	.word	0x91b002a0	!random	%o0
202.type	_sparcv9_random,#function
203.size	_sparcv9_random,.-_sparcv9_vis3_probe
204
205.global	_sparcv9_fjaesx_probe
206.align	8
207_sparcv9_fjaesx_probe:
208	.word	0x81b09206	!faesencx %f2,%f6,%f0
209	retl
210	nop
211.size	_sparcv9_fjaesx_probe,.-_sparcv9_fjaesx_probe
212
213.global	OPENSSL_cleanse
214.align	32
215OPENSSL_cleanse:
216	cmp	%o1,14
217	nop
218#ifdef ABI64
219	bgu	%xcc,.Lot
220#else
221	bgu	.Lot
222#endif
223	cmp	%o1,0
224	bne	.Little
225	nop
226	retl
227	nop
228
229.Little:
230	stb	%g0,[%o0]
231	subcc	%o1,1,%o1
232	bnz	.Little
233	add	%o0,1,%o0
234	retl
235	nop
236.align	32
237.Lot:
238#ifndef ABI64
239	subcc	%g0,1,%g1
240	! see above for explanation
241	.word	0x83408000	!rd	%ccr,%g1
242	cmp	%g1,0x99
243	bne	.v8lot
244	nop
245#endif
246
247.v9lot:	andcc	%o0,7,%g0
248	bz	.v9aligned
249	nop
250	stb	%g0,[%o0]
251	sub	%o1,1,%o1
252	ba	.v9lot
253	add	%o0,1,%o0
254.align	16,0x01000000
255.v9aligned:
256	.word	0xc0720000	!stx	%g0,[%o0]
257	sub	%o1,8,%o1
258	andcc	%o1,-8,%g0
259#ifdef ABI64
260	.word	0x126ffffd	!bnz	%xcc,.v9aligned
261#else
262	.word	0x124ffffd	!bnz	%icc,.v9aligned
263#endif
264	add	%o0,8,%o0
265
266	cmp	%o1,0
267	bne	.Little
268	nop
269	retl
270	nop
271#ifndef ABI64
272.v8lot:	andcc	%o0,3,%g0
273	bz	.v8aligned
274	nop
275	stb	%g0,[%o0]
276	sub	%o1,1,%o1
277	ba	.v8lot
278	add	%o0,1,%o0
279	nop
280.v8aligned:
281	st	%g0,[%o0]
282	sub	%o1,4,%o1
283	andcc	%o1,-4,%g0
284	bnz	.v8aligned
285	add	%o0,4,%o0
286
287	cmp	%o1,0
288	bne	.Little
289	nop
290	retl
291	nop
292#endif
293.type	OPENSSL_cleanse,#function
294.size	OPENSSL_cleanse,.-OPENSSL_cleanse
295
296.global	CRYPTO_memcmp
297.align	16
298CRYPTO_memcmp:
299	cmp	%o2,0
300#ifdef ABI64
301	beq,pn	%xcc,.Lno_data
302#else
303	beq	.Lno_data
304#endif
305	xor	%g1,%g1,%g1
306	nop
307
308.Loop_cmp:
309	ldub	[%o0],%o3
310	add	%o0,1,%o0
311	ldub	[%o1],%o4
312	add	%o1,1,%o1
313	subcc	%o2,1,%o2
314	xor	%o3,%o4,%o4
315#ifdef ABI64
316	bnz	%xcc,.Loop_cmp
317#else
318	bnz	.Loop_cmp
319#endif
320	or	%o4,%g1,%g1
321
322	sub	%g0,%g1,%g1
323	srl	%g1,31,%g1
324.Lno_data:
325	retl
326	mov	%g1,%o0
327.type	CRYPTO_memcmp,#function
328.size	CRYPTO_memcmp,.-CRYPTO_memcmp
329
330.global	_sparcv9_vis1_instrument_bus
331.align	8
332_sparcv9_vis1_instrument_bus:
333	mov	%o1,%o3					! save cnt
334	.word	0x99410000	!rd	%tick,%o4	! tick
335	mov	%o4,%o5					! lasttick = tick
336	set	0,%g4					! diff
337
338	andn	%o0,63,%g1
339	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
340	.word	0x8143e040	!membar	#Sync
341	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
342	.word	0x8143e040	!membar	#Sync
343	ld	[%o0],%o4
344	add	%o4,%g4,%g4
345	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
346
347.Loop:	.word	0x99410000	!rd	%tick,%o4
348	sub	%o4,%o5,%g4				! diff=tick-lasttick
349	mov	%o4,%o5					! lasttick=tick
350
351	andn	%o0,63,%g1
352	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
353	.word	0x8143e040	!membar	#Sync
354	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
355	.word	0x8143e040	!membar	#Sync
356	ld	[%o0],%o4
357	add	%o4,%g4,%g4
358	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
359	subcc	%o1,1,%o1				! --$cnt
360	bnz	.Loop
361	add	%o0,4,%o0				! ++$out
362
363	retl
364	mov	%o3,%o0
365.type	_sparcv9_vis1_instrument_bus,#function
366.size	_sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
367
368.global	_sparcv9_vis1_instrument_bus2
369.align	8
370_sparcv9_vis1_instrument_bus2:
371	mov	%o1,%o3					! save cnt
372	sll	%o1,2,%o1				! cnt*=4
373
374	.word	0x99410000	!rd	%tick,%o4	! tick
375	mov	%o4,%o5					! lasttick = tick
376	set	0,%g4					! diff
377
378	andn	%o0,63,%g1
379	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
380	.word	0x8143e040	!membar	#Sync
381	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
382	.word	0x8143e040	!membar	#Sync
383	ld	[%o0],%o4
384	add	%o4,%g4,%g4
385	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
386
387	.word	0x99410000	!rd	%tick,%o4	! tick
388	sub	%o4,%o5,%g4				! diff=tick-lasttick
389	mov	%o4,%o5					! lasttick=tick
390	mov	%g4,%g5					! lastdiff=diff
391.Loop2:
392	andn	%o0,63,%g1
393	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
394	.word	0x8143e040	!membar	#Sync
395	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
396	.word	0x8143e040	!membar	#Sync
397	ld	[%o0],%o4
398	add	%o4,%g4,%g4
399	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
400
401	subcc	%o2,1,%o2				! --max
402	bz	.Ldone2
403	nop
404
405	.word	0x99410000	!rd	%tick,%o4	! tick
406	sub	%o4,%o5,%g4				! diff=tick-lasttick
407	mov	%o4,%o5					! lasttick=tick
408	cmp	%g4,%g5
409	mov	%g4,%g5					! lastdiff=diff
410
411	.word	0x83408000	!rd	%ccr,%g1
412	and	%g1,4,%g1				! isolate zero flag
413	xor	%g1,4,%g1				! flip zero flag
414
415	subcc	%o1,%g1,%o1				! conditional --$cnt
416	bnz	.Loop2
417	add	%o0,%g1,%o0				! conditional ++$out
418
419.Ldone2:
420	srl	%o1,2,%o1
421	retl
422	sub	%o3,%o1,%o0
423.type	_sparcv9_vis1_instrument_bus2,#function
424.size	_sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
425