1// Copyright 2004-2017 The OpenSSL Project Authors. All Rights Reserved. 2// 3// Licensed under the Apache License 2.0 (the "License"). You may not use 4// this file except in compliance with the License. You can obtain a copy 5// in the file LICENSE in the source distribution or at 6// https://www.openssl.org/source/license.html 7// Works on all IA-64 platforms: Linux, HP-UX, Win64i... 8// On Win64i compile with ias.exe. 9.text 10 11#if defined(_HPUX_SOURCE) && !defined(_LP64) 12#define ADDP addp4 13#else 14#define ADDP add 15#endif 16 17.global OPENSSL_cpuid_setup# 18.proc OPENSSL_cpuid_setup# 19OPENSSL_cpuid_setup: 20{ .mib; br.ret.sptk.many b0 };; 21.endp OPENSSL_cpuid_setup# 22 23.global OPENSSL_rdtsc# 24.proc OPENSSL_rdtsc# 25OPENSSL_rdtsc: 26{ .mib; mov r8=ar.itc 27 br.ret.sptk.many b0 };; 28.endp OPENSSL_rdtsc# 29 30.global OPENSSL_atomic_add# 31.proc OPENSSL_atomic_add# 32.align 32 33OPENSSL_atomic_add: 34{ .mii; ld4 r2=[r32] 35 nop.i 0 36 nop.i 0 };; 37.Lspin: 38{ .mii; mov ar.ccv=r2 39 add r8=r2,r33 40 mov r3=r2 };; 41{ .mmi; mf;; 42 cmpxchg4.acq r2=[r32],r8,ar.ccv 43 nop.i 0 };; 44{ .mib; cmp.ne p6,p0=r2,r3 45 nop.i 0 46(p6) br.dpnt .Lspin };; 47{ .mib; nop.m 0 48 sxt4 r8=r8 49 br.ret.sptk.many b0 };; 50.endp OPENSSL_atomic_add# 51 52.global OPENSSL_cleanse# 53.proc OPENSSL_cleanse# 54OPENSSL_cleanse: 55{ .mib; cmp.eq p6,p0=0,r33 // len==0 56 ADDP r32=0,r32 57(p6) br.ret.spnt b0 };; 58{ .mib; and r2=7,r32 59 cmp.leu p6,p0=15,r33 // len>=15 60(p6) br.cond.dptk .Lot };; 61 62.Little: 63{ .mib; st1 [r32]=r0,1 64 cmp.ltu p6,p7=1,r33 } // len>1 65{ .mbb; add r33=-1,r33 // len-- 66(p6) br.cond.dptk .Little 67(p7) br.ret.sptk.many b0 };; 68 69.Lot: 70{ .mib; cmp.eq p6,p0=0,r2 71(p6) br.cond.dptk .Laligned };; 72{ .mmi; st1 [r32]=r0,1;; 73 and r2=7,r32 } 74{ .mib; add r33=-1,r33 75 br .Lot };; 76 77.Laligned: 78{ .mmi; st8 [r32]=r0,8 79 and r2=-8,r33 // len&~7 80 add r33=-8,r33 };; // len-=8 81{ .mib; cmp.ltu p6,p0=8,r2 // ((len+8)&~7)>8 82(p6) br.cond.dptk .Laligned };; 83 84{ .mbb; cmp.eq p6,p7=r0,r33 85(p7) br.cond.dpnt .Little 86(p6) br.ret.sptk.many b0 };; 87.endp OPENSSL_cleanse# 88 89.global CRYPTO_memcmp# 90.proc CRYPTO_memcmp# 91.align 32 92.skip 16 93CRYPTO_memcmp: 94 .prologue 95{ .mib; mov r8=0 96 cmp.eq p6,p0=0,r34 // len==0? 97(p6) br.ret.spnt b0 };; 98 .save ar.pfs,r2 99{ .mib; alloc r2=ar.pfs,3,5,0,8 100 .save ar.lc,r3 101 mov r3=ar.lc 102 brp.loop.imp .Loop_cmp_ctop,.Loop_cmp_cend-16 103 } 104{ .mib; sub r10=r34,r0,1 105 .save pr,r9 106 mov r9=pr };; 107{ .mii; ADDP r16=0,r32 108 mov ar.lc=r10 109 mov ar.ec=4 } 110{ .mib; ADDP r17=0,r33 111 mov pr.rot=1<<16 };; 112 113.Loop_cmp_ctop: 114{ .mib; (p16) ld1 r32=[r16],1 115 (p18) xor r34=r34,r38 } 116{ .mib; (p16) ld1 r36=[r17],1 117 (p19) or r8=r8,r35 118 br.ctop.sptk .Loop_cmp_ctop };; 119.Loop_cmp_cend: 120 121{ .mib; cmp.ne p6,p0=0,r8 122 mov ar.lc=r3 };; 123{ .mib; 124(p6) mov r8=1 125 mov pr=r9,0x1ffff 126 br.ret.sptk.many b0 };; 127.endp CRYPTO_memcmp# 128 129.global OPENSSL_instrument_bus# 130.proc OPENSSL_instrument_bus# 131OPENSSL_instrument_bus: 132{ .mmi; mov r2=r33 133 ADDP r32=0,r32 } 134{ .mmi; mov r8=ar.itc;; 135 mov r10=r0 136 mov r9=r8 };; 137 138{ .mmi; fc r32;; 139 ld4 r8=[r32] };; 140{ .mmi; mf 141 mov ar.ccv=r8 142 add r8=r8,r10 };; 143{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv 144 };; 145.Loop: 146{ .mmi; mov r8=ar.itc;; 147 sub r10=r8,r9 // diff=tick-lasttick 148 mov r9=r8 };; // lasttick=tick 149{ .mmi; fc r32;; 150 ld4 r8=[r32] };; 151{ .mmi; mf 152 mov ar.ccv=r8 153 add r8=r8,r10 };; 154{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv 155 add r33=-1,r33 156 add r32=4,r32 };; 157{ .mib; cmp4.ne p6,p0=0,r33 158(p6) br.cond.dptk .Loop };; 159 160{ .mib; sub r8=r2,r33 161 br.ret.sptk.many b0 };; 162.endp OPENSSL_instrument_bus# 163 164.global OPENSSL_instrument_bus2# 165.proc OPENSSL_instrument_bus2# 166OPENSSL_instrument_bus2: 167{ .mmi; mov r2=r33 // put aside cnt 168 ADDP r32=0,r32 } 169{ .mmi; mov r8=ar.itc;; 170 mov r10=r0 171 mov r9=r8 };; 172 173{ .mmi; fc r32;; 174 ld4 r8=[r32] };; 175{ .mmi; mf 176 mov ar.ccv=r8 177 add r8=r8,r10 };; 178{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv 179 };; 180 181{ .mmi; mov r8=ar.itc;; 182 sub r10=r8,r9 183 mov r9=r8 };; 184.Loop2: 185{ .mmi; mov r11=r10 // lastdiff=diff 186 add r34=-1,r34 };; // --max 187{ .mmi; fc r32;; 188 ld4 r8=[r32] 189 cmp4.eq p6,p0=0,r34 };; 190{ .mmi; mf 191 mov ar.ccv=r8 192 add r8=r8,r10 };; 193{ .mmb; cmpxchg4.acq r3=[r32],r8,ar.ccv 194(p6) br.cond.spnt .Ldone2 };; 195 196{ .mmi; mov r8=ar.itc;; 197 sub r10=r8,r9 // diff=tick-lasttick 198 mov r9=r8 };; // lasttick=tick 199{ .mmi; cmp.ne p6,p0=r10,r11;; // diff!=lastdiff 200(p6) add r33=-1,r33 };; // conditional --cnt 201{ .mib; cmp4.ne p7,p0=0,r33 202(p6) add r32=4,r32 // conditional ++out 203(p7) br.cond.dptk .Loop2 };; 204.Ldone2: 205{ .mib; sub r8=r2,r33 206 br.ret.sptk.many b0 };; 207.endp OPENSSL_instrument_bus2# 208