1! Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. 2! 3! Licensed under the Apache License 2.0 (the "License"). You may not use 4! this file except in compliance with the License. You can obtain a copy 5! in the file LICENSE in the source distribution or at 6! https://www.openssl.org/source/license.html 7 8#if defined(__SUNPRO_C) && defined(__sparcv9) 9# define ABI64 /* They've said -xarch=v9 at command line */ 10#elif defined(__GNUC__) && defined(__arch64__) 11# define ABI64 /* They've said -m64 at command line */ 12#endif 13 14#ifdef ABI64 15 .register %g2,#scratch 16 .register %g3,#scratch 17# define FRAME -192 18# define BIAS 2047 19#else 20# define FRAME -96 21# define BIAS 0 22#endif 23 24.text 25.global OPENSSL_atomic_add 26.type OPENSSL_atomic_add,#function 27.align 32 28OPENSSL_atomic_add: 29#ifndef ABI64 30 subcc %g0,1,%o2 31 .word 0x95408000 !rd %ccr,%o2, see comment above 32 cmp %o2,0x99 33 be .v9 34 nop 35 save %sp,FRAME,%sp 36 ba .enter 37 nop 38#ifdef __sun 39! Note that you do not have to link with libthread to call thr_yield, 40! as libc provides a stub, which is overloaded the moment you link 41! with *either* libpthread or libthread... 42#define YIELD_CPU thr_yield 43#else 44! applies at least to Linux and FreeBSD... Feedback expected... 45#define YIELD_CPU sched_yield 46#endif 47.spin: call YIELD_CPU 48 nop 49.enter: ld [%i0],%i2 50 cmp %i2,-4096 51 be .spin 52 mov -1,%i2 53 swap [%i0],%i2 54 cmp %i2,-1 55 be .spin 56 add %i2,%i1,%i2 57 stbar 58 st %i2,[%i0] 59 sra %i2,%g0,%i0 60 ret 61 restore 62.v9: 63#endif 64 ld [%o0],%o2 651: add %o1,%o2,%o3 66 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3 67 cmp %o2,%o3 68 bne 1b 69 mov %o3,%o2 ! cas is always fetching to dest. register 70 add %o1,%o2,%o0 ! OpenSSL expects the new value 71 retl 72 sra %o0,%g0,%o0 ! we return signed int, remember? 73.size OPENSSL_atomic_add,.-OPENSSL_atomic_add 74 75.global _sparcv9_rdtick 76.align 32 77_sparcv9_rdtick: 78 subcc %g0,1,%o0 79 .word 0x91408000 !rd %ccr,%o0 80 cmp %o0,0x99 81 bne .notick 82 xor %o0,%o0,%o0 83 .word 0x91410000 !rd %tick,%o0 84 retl 85 .word 0x93323020 !srlx %o0,32,%o1 86.notick: 87 retl 88 xor %o1,%o1,%o1 89.type _sparcv9_rdtick,#function 90.size _sparcv9_rdtick,.-_sparcv9_rdtick 91 92.global _sparcv9_vis1_probe 93.align 8 94_sparcv9_vis1_probe: 95 add %sp,BIAS+2,%o1 96 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 97 retl 98 .word 0x81b00d80 !fxor %f0,%f0,%f0 99.type _sparcv9_vis1_probe,#function 100.size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe 101 102! Probe and instrument VIS1 instruction. Output is number of cycles it 103! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit 104! is slow (documented to be 6 cycles on T2) and the core is in-order 105! single-issue, it should be possible to distinguish Tx reliably... 106! Observed return values are: 107! 108! UltraSPARC IIe 7 109! UltraSPARC III 7 110! UltraSPARC T1 24 111! SPARC T4 65(*) 112! 113! (*) result has lesser to do with VIS instruction latencies, rdtick 114! appears that slow, but it does the trick in sense that FP and 115! VIS code paths are still slower than integer-only ones. 116! 117! Numbers for T2 and SPARC64 V-VII are more than welcomed. 118! 119! It would be possible to detect specifically US-T1 by instrumenting 120! fmul8ulx16, which is emulated on T1 and as such accounts for quite 121! a lot of %tick-s, couple of thousand on Linux... 122.global _sparcv9_vis1_instrument 123.align 8 124_sparcv9_vis1_instrument: 125 .word 0x81b00d80 !fxor %f0,%f0,%f0 126 .word 0x85b08d82 !fxor %f2,%f2,%f2 127 .word 0x91410000 !rd %tick,%o0 128 .word 0x81b00d80 !fxor %f0,%f0,%f0 129 .word 0x85b08d82 !fxor %f2,%f2,%f2 130 .word 0x93410000 !rd %tick,%o1 131 .word 0x81b00d80 !fxor %f0,%f0,%f0 132 .word 0x85b08d82 !fxor %f2,%f2,%f2 133 .word 0x95410000 !rd %tick,%o2 134 .word 0x81b00d80 !fxor %f0,%f0,%f0 135 .word 0x85b08d82 !fxor %f2,%f2,%f2 136 .word 0x97410000 !rd %tick,%o3 137 .word 0x81b00d80 !fxor %f0,%f0,%f0 138 .word 0x85b08d82 !fxor %f2,%f2,%f2 139 .word 0x99410000 !rd %tick,%o4 140 141 ! calculate intervals 142 sub %o1,%o0,%o0 143 sub %o2,%o1,%o1 144 sub %o3,%o2,%o2 145 sub %o4,%o3,%o3 146 147 ! find minimum value 148 cmp %o0,%o1 149 .word 0x38680002 !bgu,a %xcc,.+8 150 mov %o1,%o0 151 cmp %o0,%o2 152 .word 0x38680002 !bgu,a %xcc,.+8 153 mov %o2,%o0 154 cmp %o0,%o3 155 .word 0x38680002 !bgu,a %xcc,.+8 156 mov %o3,%o0 157 158 retl 159 nop 160.type _sparcv9_vis1_instrument,#function 161.size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument 162 163.global _sparcv9_vis2_probe 164.align 8 165_sparcv9_vis2_probe: 166 retl 167 .word 0x81b00980 !bshuffle %f0,%f0,%f0 168.type _sparcv9_vis2_probe,#function 169.size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe 170 171.global _sparcv9_fmadd_probe 172.align 8 173_sparcv9_fmadd_probe: 174 .word 0x81b00d80 !fxor %f0,%f0,%f0 175 .word 0x85b08d82 !fxor %f2,%f2,%f2 176 retl 177 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0 178.type _sparcv9_fmadd_probe,#function 179.size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe 180 181.global _sparcv9_rdcfr 182.align 8 183_sparcv9_rdcfr: 184 retl 185 .word 0x91468000 !rd %asr26,%o0 186.type _sparcv9_rdcfr,#function 187.size _sparcv9_rdcfr,.-_sparcv9_rdcfr 188 189.global _sparcv9_vis3_probe 190.align 8 191_sparcv9_vis3_probe: 192 retl 193 .word 0x81b022a0 !xmulx %g0,%g0,%g0 194.type _sparcv9_vis3_probe,#function 195.size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe 196 197.global _sparcv9_random 198.align 8 199_sparcv9_random: 200 retl 201 .word 0x91b002a0 !random %o0 202.type _sparcv9_random,#function 203.size _sparcv9_random,.-_sparcv9_vis3_probe 204 205.global _sparcv9_fjaesx_probe 206.align 8 207_sparcv9_fjaesx_probe: 208 .word 0x81b09206 !faesencx %f2,%f6,%f0 209 retl 210 nop 211.size _sparcv9_fjaesx_probe,.-_sparcv9_fjaesx_probe 212 213.global OPENSSL_cleanse 214.align 32 215OPENSSL_cleanse: 216 cmp %o1,14 217 nop 218#ifdef ABI64 219 bgu %xcc,.Lot 220#else 221 bgu .Lot 222#endif 223 cmp %o1,0 224 bne .Little 225 nop 226 retl 227 nop 228 229.Little: 230 stb %g0,[%o0] 231 subcc %o1,1,%o1 232 bnz .Little 233 add %o0,1,%o0 234 retl 235 nop 236.align 32 237.Lot: 238#ifndef ABI64 239 subcc %g0,1,%g1 240 ! see above for explanation 241 .word 0x83408000 !rd %ccr,%g1 242 cmp %g1,0x99 243 bne .v8lot 244 nop 245#endif 246 247.v9lot: andcc %o0,7,%g0 248 bz .v9aligned 249 nop 250 stb %g0,[%o0] 251 sub %o1,1,%o1 252 ba .v9lot 253 add %o0,1,%o0 254.align 16,0x01000000 255.v9aligned: 256 .word 0xc0720000 !stx %g0,[%o0] 257 sub %o1,8,%o1 258 andcc %o1,-8,%g0 259#ifdef ABI64 260 .word 0x126ffffd !bnz %xcc,.v9aligned 261#else 262 .word 0x124ffffd !bnz %icc,.v9aligned 263#endif 264 add %o0,8,%o0 265 266 cmp %o1,0 267 bne .Little 268 nop 269 retl 270 nop 271#ifndef ABI64 272.v8lot: andcc %o0,3,%g0 273 bz .v8aligned 274 nop 275 stb %g0,[%o0] 276 sub %o1,1,%o1 277 ba .v8lot 278 add %o0,1,%o0 279 nop 280.v8aligned: 281 st %g0,[%o0] 282 sub %o1,4,%o1 283 andcc %o1,-4,%g0 284 bnz .v8aligned 285 add %o0,4,%o0 286 287 cmp %o1,0 288 bne .Little 289 nop 290 retl 291 nop 292#endif 293.type OPENSSL_cleanse,#function 294.size OPENSSL_cleanse,.-OPENSSL_cleanse 295 296.global CRYPTO_memcmp 297.align 16 298CRYPTO_memcmp: 299 cmp %o2,0 300#ifdef ABI64 301 beq,pn %xcc,.Lno_data 302#else 303 beq .Lno_data 304#endif 305 xor %g1,%g1,%g1 306 nop 307 308.Loop_cmp: 309 ldub [%o0],%o3 310 add %o0,1,%o0 311 ldub [%o1],%o4 312 add %o1,1,%o1 313 subcc %o2,1,%o2 314 xor %o3,%o4,%o4 315#ifdef ABI64 316 bnz %xcc,.Loop_cmp 317#else 318 bnz .Loop_cmp 319#endif 320 or %o4,%g1,%g1 321 322 sub %g0,%g1,%g1 323 srl %g1,31,%g1 324.Lno_data: 325 retl 326 mov %g1,%o0 327.type CRYPTO_memcmp,#function 328.size CRYPTO_memcmp,.-CRYPTO_memcmp 329 330.global _sparcv9_vis1_instrument_bus 331.align 8 332_sparcv9_vis1_instrument_bus: 333 mov %o1,%o3 ! save cnt 334 .word 0x99410000 !rd %tick,%o4 ! tick 335 mov %o4,%o5 ! lasttick = tick 336 set 0,%g4 ! diff 337 338 andn %o0,63,%g1 339 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load 340 .word 0x8143e040 !membar #Sync 341 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit 342 .word 0x8143e040 !membar #Sync 343 ld [%o0],%o4 344 add %o4,%g4,%g4 345 .word 0xc9e2100c !cas [%o0],%o4,%g4 346 347.Loop: .word 0x99410000 !rd %tick,%o4 348 sub %o4,%o5,%g4 ! diff=tick-lasttick 349 mov %o4,%o5 ! lasttick=tick 350 351 andn %o0,63,%g1 352 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load 353 .word 0x8143e040 !membar #Sync 354 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit 355 .word 0x8143e040 !membar #Sync 356 ld [%o0],%o4 357 add %o4,%g4,%g4 358 .word 0xc9e2100c !cas [%o0],%o4,%g4 359 subcc %o1,1,%o1 ! --$cnt 360 bnz .Loop 361 add %o0,4,%o0 ! ++$out 362 363 retl 364 mov %o3,%o0 365.type _sparcv9_vis1_instrument_bus,#function 366.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus 367 368.global _sparcv9_vis1_instrument_bus2 369.align 8 370_sparcv9_vis1_instrument_bus2: 371 mov %o1,%o3 ! save cnt 372 sll %o1,2,%o1 ! cnt*=4 373 374 .word 0x99410000 !rd %tick,%o4 ! tick 375 mov %o4,%o5 ! lasttick = tick 376 set 0,%g4 ! diff 377 378 andn %o0,63,%g1 379 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load 380 .word 0x8143e040 !membar #Sync 381 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit 382 .word 0x8143e040 !membar #Sync 383 ld [%o0],%o4 384 add %o4,%g4,%g4 385 .word 0xc9e2100c !cas [%o0],%o4,%g4 386 387 .word 0x99410000 !rd %tick,%o4 ! tick 388 sub %o4,%o5,%g4 ! diff=tick-lasttick 389 mov %o4,%o5 ! lasttick=tick 390 mov %g4,%g5 ! lastdiff=diff 391.Loop2: 392 andn %o0,63,%g1 393 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load 394 .word 0x8143e040 !membar #Sync 395 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit 396 .word 0x8143e040 !membar #Sync 397 ld [%o0],%o4 398 add %o4,%g4,%g4 399 .word 0xc9e2100c !cas [%o0],%o4,%g4 400 401 subcc %o2,1,%o2 ! --max 402 bz .Ldone2 403 nop 404 405 .word 0x99410000 !rd %tick,%o4 ! tick 406 sub %o4,%o5,%g4 ! diff=tick-lasttick 407 mov %o4,%o5 ! lasttick=tick 408 cmp %g4,%g5 409 mov %g4,%g5 ! lastdiff=diff 410 411 .word 0x83408000 !rd %ccr,%g1 412 and %g1,4,%g1 ! isolate zero flag 413 xor %g1,4,%g1 ! flip zero flag 414 415 subcc %o1,%g1,%o1 ! conditional --$cnt 416 bnz .Loop2 417 add %o0,%g1,%o0 ! conditional ++$out 418 419.Ldone2: 420 srl %o1,2,%o1 421 retl 422 sub %o3,%o1,%o0 423.type _sparcv9_vis1_instrument_bus2,#function 424.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 425