1// Copyright 2004-2017 The OpenSSL Project Authors. All Rights Reserved.
2//
3// Licensed under the Apache License 2.0 (the "License").  You may not use
4// this file except in compliance with the License.  You can obtain a copy
5// in the file LICENSE in the source distribution or at
6// https://www.openssl.org/source/license.html
7// Works on all IA-64 platforms: Linux, HP-UX, Win64i...
8// On Win64i compile with ias.exe.
9.text
10
11#if defined(_HPUX_SOURCE) && !defined(_LP64)
12#define	ADDP	addp4
13#else
14#define	ADDP	add
15#endif
16
17.global	OPENSSL_cpuid_setup#
18.proc	OPENSSL_cpuid_setup#
19OPENSSL_cpuid_setup:
20{ .mib;	br.ret.sptk.many	b0		};;
21.endp	OPENSSL_cpuid_setup#
22
23.global	OPENSSL_rdtsc#
24.proc	OPENSSL_rdtsc#
25OPENSSL_rdtsc:
26{ .mib;	mov			r8=ar.itc
27	br.ret.sptk.many	b0		};;
28.endp   OPENSSL_rdtsc#
29
30.global	OPENSSL_atomic_add#
31.proc	OPENSSL_atomic_add#
32.align	32
33OPENSSL_atomic_add:
34{ .mii;	ld4		r2=[r32]
35	nop.i		0
36	nop.i		0		};;
37.Lspin:
38{ .mii;	mov		ar.ccv=r2
39	add		r8=r2,r33
40	mov		r3=r2		};;
41{ .mmi;	mf;;
42	cmpxchg4.acq	r2=[r32],r8,ar.ccv
43	nop.i		0		};;
44{ .mib;	cmp.ne		p6,p0=r2,r3
45	nop.i		0
46(p6)	br.dpnt		.Lspin		};;
47{ .mib;	nop.m		0
48	sxt4		r8=r8
49	br.ret.sptk.many	b0	};;
50.endp	OPENSSL_atomic_add#
51
52.global	OPENSSL_cleanse#
53.proc	OPENSSL_cleanse#
54OPENSSL_cleanse:
55{ .mib;	cmp.eq		p6,p0=0,r33	    // len==0
56	ADDP		r32=0,r32
57(p6)	br.ret.spnt	b0		};;
58{ .mib;	and		r2=7,r32
59	cmp.leu		p6,p0=15,r33	    // len>=15
60(p6)	br.cond.dptk	.Lot		};;
61
62.Little:
63{ .mib;	st1		[r32]=r0,1
64	cmp.ltu		p6,p7=1,r33	}  // len>1
65{ .mbb;	add		r33=-1,r33	   // len--
66(p6)	br.cond.dptk	.Little
67(p7)	br.ret.sptk.many	b0	};;
68
69.Lot:
70{ .mib;	cmp.eq		p6,p0=0,r2
71(p6)	br.cond.dptk	.Laligned	};;
72{ .mmi;	st1		[r32]=r0,1;;
73	and		r2=7,r32	}
74{ .mib;	add		r33=-1,r33
75	br		.Lot		};;
76
77.Laligned:
78{ .mmi;	st8		[r32]=r0,8
79	and		r2=-8,r33	    // len&~7
80	add		r33=-8,r33	};; // len-=8
81{ .mib;	cmp.ltu		p6,p0=8,r2	    // ((len+8)&~7)>8
82(p6)	br.cond.dptk	.Laligned	};;
83
84{ .mbb;	cmp.eq		p6,p7=r0,r33
85(p7)	br.cond.dpnt	.Little
86(p6)	br.ret.sptk.many	b0	};;
87.endp	OPENSSL_cleanse#
88
89.global	CRYPTO_memcmp#
90.proc	CRYPTO_memcmp#
91.align	32
92.skip	16
93CRYPTO_memcmp:
94	.prologue
95{ .mib;	mov		r8=0
96	cmp.eq		p6,p0=0,r34	    // len==0?
97(p6)	br.ret.spnt	b0		};;
98	.save		ar.pfs,r2
99{ .mib;	alloc		r2=ar.pfs,3,5,0,8
100	.save		ar.lc,r3
101	mov		r3=ar.lc
102	brp.loop.imp	.Loop_cmp_ctop,.Loop_cmp_cend-16
103					}
104{ .mib;	sub		r10=r34,r0,1
105	.save		pr,r9
106	mov		r9=pr		};;
107{ .mii;	ADDP		r16=0,r32
108	mov		ar.lc=r10
109	mov		ar.ec=4		}
110{ .mib;	ADDP		r17=0,r33
111	mov		pr.rot=1<<16	};;
112
113.Loop_cmp_ctop:
114{ .mib;	(p16)	ld1	r32=[r16],1
115	(p18)	xor	r34=r34,r38	}
116{ .mib;	(p16)	ld1	r36=[r17],1
117	(p19)	or	r8=r8,r35
118	br.ctop.sptk	.Loop_cmp_ctop	};;
119.Loop_cmp_cend:
120
121{ .mib;	cmp.ne		p6,p0=0,r8
122	mov		ar.lc=r3	};;
123{ .mib;
124(p6)	mov		r8=1
125	mov		pr=r9,0x1ffff
126	br.ret.sptk.many	b0	};;
127.endp	CRYPTO_memcmp#
128
129.global	OPENSSL_instrument_bus#
130.proc	OPENSSL_instrument_bus#
131OPENSSL_instrument_bus:
132{ .mmi;	mov		r2=r33
133	ADDP		r32=0,r32	}
134{ .mmi;	mov		r8=ar.itc;;
135	mov		r10=r0
136	mov		r9=r8		};;
137
138{ .mmi;	fc		r32;;
139	ld4		r8=[r32]	};;
140{ .mmi;	mf
141	mov		ar.ccv=r8
142	add		r8=r8,r10	};;
143{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
144					};;
145.Loop:
146{ .mmi;	mov		r8=ar.itc;;
147	sub		r10=r8,r9		// diff=tick-lasttick
148	mov		r9=r8		};;	// lasttick=tick
149{ .mmi;	fc		r32;;
150	ld4		r8=[r32]	};;
151{ .mmi;	mf
152	mov		ar.ccv=r8
153	add		r8=r8,r10	};;
154{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
155	add		r33=-1,r33
156	add		r32=4,r32	};;
157{ .mib;	cmp4.ne		p6,p0=0,r33
158(p6)	br.cond.dptk	.Loop		};;
159
160{ .mib;	sub		r8=r2,r33
161	br.ret.sptk.many	b0	};;
162.endp	OPENSSL_instrument_bus#
163
164.global	OPENSSL_instrument_bus2#
165.proc	OPENSSL_instrument_bus2#
166OPENSSL_instrument_bus2:
167{ .mmi;	mov		r2=r33			// put aside cnt
168	ADDP		r32=0,r32	}
169{ .mmi;	mov		r8=ar.itc;;
170	mov		r10=r0
171	mov		r9=r8		};;
172
173{ .mmi;	fc		r32;;
174	ld4		r8=[r32]	};;
175{ .mmi;	mf
176	mov		ar.ccv=r8
177	add		r8=r8,r10	};;
178{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
179					};;
180
181{ .mmi;	mov		r8=ar.itc;;
182	sub		r10=r8,r9
183	mov		r9=r8		};;
184.Loop2:
185{ .mmi;	mov		r11=r10			// lastdiff=diff
186	add		r34=-1,r34	};;	// --max
187{ .mmi;	fc		r32;;
188	ld4		r8=[r32]
189	cmp4.eq		p6,p0=0,r34	};;
190{ .mmi;	mf
191	mov		ar.ccv=r8
192	add		r8=r8,r10	};;
193{ .mmb;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
194(p6)	br.cond.spnt	.Ldone2		};;
195
196{ .mmi;	mov		r8=ar.itc;;
197	sub		r10=r8,r9		// diff=tick-lasttick
198	mov		r9=r8		};;	// lasttick=tick
199{ .mmi;	cmp.ne		p6,p0=r10,r11;;		// diff!=lastdiff
200(p6)	add		r33=-1,r33	};;	// conditional --cnt
201{ .mib;	cmp4.ne		p7,p0=0,r33
202(p6)	add		r32=4,r32		// conditional ++out
203(p7)	br.cond.dptk	.Loop2		};;
204.Ldone2:
205{ .mib;	sub		r8=r2,r33
206	br.ret.sptk.many	b0	};;
207.endp	OPENSSL_instrument_bus2#
208