1#! /usr/bin/env perl 2# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 10push(@INC, "${dir}perlasm", "perlasm"); 11require "x86asm.pl"; 12 13$output = pop and open STDOUT,">$output"; 14 15&asm_init($ARGV[0]); 16 17for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 18 19&function_begin("OPENSSL_ia32_cpuid"); 20 &xor ("edx","edx"); 21 &pushf (); 22 &pop ("eax"); 23 &mov ("ecx","eax"); 24 &xor ("eax",1<<21); 25 &push ("eax"); 26 &popf (); 27 &pushf (); 28 &pop ("eax"); 29 &xor ("ecx","eax"); 30 &xor ("eax","eax"); 31 &mov ("esi",&wparam(0)); 32 &mov (&DWP(8,"esi"),"eax"); # clear extended feature flags 33 &bt ("ecx",21); 34 &jnc (&label("nocpuid")); 35 &cpuid (); 36 &mov ("edi","eax"); # max value for standard query level 37 38 &xor ("eax","eax"); 39 &cmp ("ebx",0x756e6547); # "Genu" 40 &setne (&LB("eax")); 41 &mov ("ebp","eax"); 42 &cmp ("edx",0x49656e69); # "ineI" 43 &setne (&LB("eax")); 44 &or ("ebp","eax"); 45 &cmp ("ecx",0x6c65746e); # "ntel" 46 &setne (&LB("eax")); 47 &or ("ebp","eax"); # 0 indicates Intel CPU 48 &jz (&label("intel")); 49 50 &cmp ("ebx",0x68747541); # "Auth" 51 &setne (&LB("eax")); 52 &mov ("esi","eax"); 53 &cmp ("edx",0x69746E65); # "enti" 54 &setne (&LB("eax")); 55 &or ("esi","eax"); 56 &cmp ("ecx",0x444D4163); # "cAMD" 57 &setne (&LB("eax")); 58 &or ("esi","eax"); # 0 indicates AMD CPU 59 &jnz (&label("intel")); 60 61 # AMD specific 62 &mov ("eax",0x80000000); 63 &cpuid (); 64 &cmp ("eax",0x80000001); 65 &jb (&label("intel")); 66 &mov ("esi","eax"); 67 &mov ("eax",0x80000001); 68 &cpuid (); 69 &or ("ebp","ecx"); 70 &and ("ebp",1<<11|1); # isolate XOP bit 71 &cmp ("esi",0x80000008); 72 &jb (&label("intel")); 73 74 &mov ("eax",0x80000008); 75 &cpuid (); 76 &movz ("esi",&LB("ecx")); # number of cores - 1 77 &inc ("esi"); # number of cores 78 79 &mov ("eax",1); 80 &xor ("ecx","ecx"); 81 &cpuid (); 82 &bt ("edx",28); 83 &jnc (&label("generic")); 84 &shr ("ebx",16); 85 &and ("ebx",0xff); 86 &cmp ("ebx","esi"); 87 &ja (&label("generic")); 88 &and ("edx",0xefffffff); # clear hyper-threading bit 89 &jmp (&label("generic")); 90 91&set_label("intel"); 92 &cmp ("edi",4); 93 &mov ("esi",-1); 94 &jb (&label("nocacheinfo")); 95 96 &mov ("eax",4); 97 &mov ("ecx",0); # query L1D 98 &cpuid (); 99 &mov ("esi","eax"); 100 &shr ("esi",14); 101 &and ("esi",0xfff); # number of cores -1 per L1D 102 103&set_label("nocacheinfo"); 104 &mov ("eax",1); 105 &xor ("ecx","ecx"); 106 &cpuid (); 107 &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 108 &cmp ("ebp",0); 109 &jne (&label("notintel")); 110 &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs 111 &and (&HB("eax"),15); # family ID 112 &cmp (&HB("eax"),15); # P4? 113 &jne (&label("notintel")); 114 &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR 115&set_label("notintel"); 116 &bt ("edx",28); # test hyper-threading bit 117 &jnc (&label("generic")); 118 &and ("edx",0xefffffff); 119 &cmp ("esi",0); 120 &je (&label("generic")); 121 122 &or ("edx",0x10000000); 123 &shr ("ebx",16); 124 &cmp (&LB("ebx"),1); 125 &ja (&label("generic")); 126 &and ("edx",0xefffffff); # clear hyper-threading bit if not 127 128&set_label("generic"); 129 &and ("ebp",1<<11); # isolate AMD XOP flag 130 &and ("ecx",0xfffff7ff); # force 11th bit to 0 131 &mov ("esi","edx"); # %ebp:%esi is copy of %ecx:%edx 132 &or ("ebp","ecx"); # merge AMD XOP flag 133 134 &cmp ("edi",7); 135 &mov ("edi",&wparam(0)); 136 &jb (&label("no_extended_info")); 137 &mov ("eax",7); 138 &xor ("ecx","ecx"); 139 &cpuid (); 140 &mov (&DWP(8,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2] 141 &mov (&DWP(12,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3] 142 &mov (&DWP(16,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4] 143 &cmp ("eax",1); # Do we have cpuid(EAX=0x7, ECX=0x1)? 144 &jb (&label("no_extended_info")); 145 &mov ("eax",7); 146 &mov ("ecx",1); 147 &cpuid (); # cpuid(EAX=0x7, ECX=0x1) 148 &mov (&DWP(20,"edi"),"eax"); # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5] 149 &mov (&DWP(24,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6] 150 &mov (&DWP(28,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7] 151 &mov (&DWP(32,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8] 152 153 &and ("edx",0x80000); # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support 154 &cmp ("edx",0x0); 155 &je (&label("no_extended_info")); 156 157 &mov ("eax",0x24); # Have AVX10 Support, query for details 158 &mov ("ecx",0x0); 159 &cpuid (); # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf 160 &mov (&DWP(36,"edi"),"ebx"); # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9] 161 162&set_label("no_extended_info"); 163 164 &bt ("ebp",27); # check OSXSAVE bit 165 &jnc (&label("clear_avx")); 166 &xor ("ecx","ecx"); 167 &data_byte(0x0f,0x01,0xd0); # xgetbv 168 &and ("eax",6); 169 &cmp ("eax",6); 170 &je (&label("done")); 171 &cmp ("eax",2); 172 &je (&label("clear_avx")); 173&set_label("clear_xmm"); 174 &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits 175 &and ("esi",0xfeffffff); # clear FXSR 176&set_label("clear_avx"); 177 &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits 178 &and (&DWP(20,"edi"),0xff7fffff); # ~(1<<23) clear AVXIFMA, 179 # which is VEX-encoded 180 # and requires YMM state support 181 &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2 182&set_label("done"); 183 &mov ("eax","esi"); 184 &mov ("edx","ebp"); 185&set_label("nocpuid"); 186&function_end("OPENSSL_ia32_cpuid"); 187 188&external_label("OPENSSL_ia32cap_P"); 189 190&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 191 &xor ("eax","eax"); 192 &xor ("edx","edx"); 193 &picmeup("ecx","OPENSSL_ia32cap_P"); 194 &bt (&DWP(0,"ecx"),4); 195 &jnc (&label("notsc")); 196 &rdtsc (); 197&set_label("notsc"); 198 &ret (); 199&function_end_B("OPENSSL_rdtsc"); 200 201# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], 202# but it's safe to call it on any [supported] 32-bit platform... 203# Just check for [non-]zero return value... 204&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 205 &picmeup("ecx","OPENSSL_ia32cap_P"); 206 &bt (&DWP(0,"ecx"),4); 207 &jnc (&label("nohalt")); # no TSC 208 209 &data_word(0x9058900e); # push %cs; pop %eax 210 &and ("eax",3); 211 &jnz (&label("nohalt")); # not enough privileges 212 213 &pushf (); 214 &pop ("eax"); 215 &bt ("eax",9); 216 &jnc (&label("nohalt")); # interrupts are disabled 217 218 &rdtsc (); 219 &push ("edx"); 220 &push ("eax"); 221 &halt (); 222 &rdtsc (); 223 224 &sub ("eax",&DWP(0,"esp")); 225 &sbb ("edx",&DWP(4,"esp")); 226 &add ("esp",8); 227 &ret (); 228 229&set_label("nohalt"); 230 &xor ("eax","eax"); 231 &xor ("edx","edx"); 232 &ret (); 233&function_end_B("OPENSSL_instrument_halt"); 234 235# Essentially there is only one use for this function. Under DJGPP: 236# 237# #include <go32.h> 238# ... 239# i=OPENSSL_far_spin(_dos_ds,0x46c); 240# ... 241# to obtain the number of spins till closest timer interrupt. 242 243&function_begin_B("OPENSSL_far_spin"); 244 &pushf (); 245 &pop ("eax"); 246 &bt ("eax",9); 247 &jnc (&label("nospin")); # interrupts are disabled 248 249 &mov ("eax",&DWP(4,"esp")); 250 &mov ("ecx",&DWP(8,"esp")); 251 &data_word (0x90d88e1e); # push %ds, mov %eax,%ds 252 &xor ("eax","eax"); 253 &mov ("edx",&DWP(0,"ecx")); 254 &jmp (&label("spin")); 255 256 &align (16); 257&set_label("spin"); 258 &inc ("eax"); 259 &cmp ("edx",&DWP(0,"ecx")); 260 &je (&label("spin")); 261 262 &data_word (0x1f909090); # pop %ds 263 &ret (); 264 265&set_label("nospin"); 266 &xor ("eax","eax"); 267 &xor ("edx","edx"); 268 &ret (); 269&function_end_B("OPENSSL_far_spin"); 270 271&function_begin_B("OPENSSL_atomic_add"); 272 &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg 273 &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg 274 &push ("ebx"); 275 &nop (); 276 &mov ("eax",&DWP(0,"edx")); 277&set_label("spin"); 278 &lea ("ebx",&DWP(0,"eax","ecx")); 279 &nop (); 280 &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is involved and is always reloaded 281 &jne (&label("spin")); 282 &mov ("eax","ebx"); # OpenSSL expects the new value 283 &pop ("ebx"); 284 &ret (); 285&function_end_B("OPENSSL_atomic_add"); 286 287&function_begin_B("OPENSSL_cleanse"); 288 &mov ("edx",&wparam(0)); 289 &mov ("ecx",&wparam(1)); 290 &xor ("eax","eax"); 291 &cmp ("ecx",7); 292 &jae (&label("lot")); 293 &cmp ("ecx",0); 294 &je (&label("ret")); 295&set_label("little"); 296 &mov (&BP(0,"edx"),"al"); 297 &sub ("ecx",1); 298 &lea ("edx",&DWP(1,"edx")); 299 &jnz (&label("little")); 300&set_label("ret"); 301 &ret (); 302 303&set_label("lot",16); 304 &test ("edx",3); 305 &jz (&label("aligned")); 306 &mov (&BP(0,"edx"),"al"); 307 &lea ("ecx",&DWP(-1,"ecx")); 308 &lea ("edx",&DWP(1,"edx")); 309 &jmp (&label("lot")); 310&set_label("aligned"); 311 &mov (&DWP(0,"edx"),"eax"); 312 &lea ("ecx",&DWP(-4,"ecx")); 313 &test ("ecx",-4); 314 &lea ("edx",&DWP(4,"edx")); 315 &jnz (&label("aligned")); 316 &cmp ("ecx",0); 317 &jne (&label("little")); 318 &ret (); 319&function_end_B("OPENSSL_cleanse"); 320 321&function_begin_B("CRYPTO_memcmp"); 322 &push ("esi"); 323 &push ("edi"); 324 &mov ("esi",&wparam(0)); 325 &mov ("edi",&wparam(1)); 326 &mov ("ecx",&wparam(2)); 327 &xor ("eax","eax"); 328 &xor ("edx","edx"); 329 &cmp ("ecx",0); 330 &je (&label("no_data")); 331&set_label("loop"); 332 &mov ("dl",&BP(0,"esi")); 333 &lea ("esi",&DWP(1,"esi")); 334 &xor ("dl",&BP(0,"edi")); 335 &lea ("edi",&DWP(1,"edi")); 336 &or ("al","dl"); 337 &dec ("ecx"); 338 &jnz (&label("loop")); 339 &neg ("eax"); 340 &shr ("eax",31); 341&set_label("no_data"); 342 &pop ("edi"); 343 &pop ("esi"); 344 &ret (); 345&function_end_B("CRYPTO_memcmp"); 346{ 347my $lasttick = "esi"; 348my $lastdiff = "ebx"; 349my $out = "edi"; 350my $cnt = "ecx"; 351my $max = "ebp"; 352 353&function_begin("OPENSSL_instrument_bus"); 354 &mov ("eax",0); 355 if ($sse2) { 356 &picmeup("edx","OPENSSL_ia32cap_P"); 357 &bt (&DWP(0,"edx"),4); 358 &jnc (&label("nogo")); # no TSC 359 &bt (&DWP(0,"edx"),19); 360 &jnc (&label("nogo")); # no CLFLUSH 361 362 &mov ($out,&wparam(0)); # load arguments 363 &mov ($cnt,&wparam(1)); 364 365 # collect 1st tick 366 &rdtsc (); 367 &mov ($lasttick,"eax"); # lasttick = tick 368 &mov ($lastdiff,0); # lastdiff = 0 369 &clflush(&DWP(0,$out)); 370 &data_byte(0xf0); # lock 371 &add (&DWP(0,$out),$lastdiff); 372 &jmp (&label("loop")); 373 374&set_label("loop",16); 375 &rdtsc (); 376 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 377 &sub ("eax",$lasttick); # diff 378 &mov ($lasttick,"edx"); # lasttick = tick 379 &mov ($lastdiff,"eax"); # lastdiff = diff 380 &clflush(&DWP(0,$out)); 381 &data_byte(0xf0); # lock 382 &add (&DWP(0,$out),"eax"); # accumulate diff 383 &lea ($out,&DWP(4,$out)); # ++$out 384 &sub ($cnt,1); # --$cnt 385 &jnz (&label("loop")); 386 387 &mov ("eax",&wparam(1)); 388&set_label("nogo"); 389 } 390&function_end("OPENSSL_instrument_bus"); 391 392&function_begin("OPENSSL_instrument_bus2"); 393 &mov ("eax",0); 394 if ($sse2) { 395 &picmeup("edx","OPENSSL_ia32cap_P"); 396 &bt (&DWP(0,"edx"),4); 397 &jnc (&label("nogo")); # no TSC 398 &bt (&DWP(0,"edx"),19); 399 &jnc (&label("nogo")); # no CLFLUSH 400 401 &mov ($out,&wparam(0)); # load arguments 402 &mov ($cnt,&wparam(1)); 403 &mov ($max,&wparam(2)); 404 405 &rdtsc (); # collect 1st tick 406 &mov ($lasttick,"eax"); # lasttick = tick 407 &mov ($lastdiff,0); # lastdiff = 0 408 409 &clflush(&DWP(0,$out)); 410 &data_byte(0xf0); # lock 411 &add (&DWP(0,$out),$lastdiff); 412 413 &rdtsc (); # collect 1st diff 414 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 415 &sub ("eax",$lasttick); # diff 416 &mov ($lasttick,"edx"); # lasttick = tick 417 &mov ($lastdiff,"eax"); # lastdiff = diff 418 &jmp (&label("loop2")); 419 420&set_label("loop2",16); 421 &clflush(&DWP(0,$out)); 422 &data_byte(0xf0); # lock 423 &add (&DWP(0,$out),"eax"); # accumulate diff 424 425 &sub ($max,1); 426 &jz (&label("done2")); 427 428 &rdtsc (); 429 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 430 &sub ("eax",$lasttick); # diff 431 &mov ($lasttick,"edx"); # lasttick = tick 432 &cmp ("eax",$lastdiff); 433 &mov ($lastdiff,"eax"); # lastdiff = diff 434 &mov ("edx",0); 435 &setne ("dl"); 436 &sub ($cnt,"edx"); # conditional --$cnt 437 &lea ($out,&DWP(0,$out,"edx",4)); # conditional ++$out 438 &jnz (&label("loop2")); 439 440&set_label("done2"); 441 &mov ("eax",&wparam(1)); 442 &sub ("eax",$cnt); 443&set_label("nogo"); 444 } 445&function_end("OPENSSL_instrument_bus2"); 446} 447 448sub gen_random { 449my $rdop = shift; 450&function_begin_B("OPENSSL_ia32_${rdop}_bytes"); 451 &push ("edi"); 452 &push ("ebx"); 453 &xor ("eax","eax"); # return value 454 &mov ("edi",&wparam(0)); 455 &mov ("ebx",&wparam(1)); 456 457 &cmp ("ebx",0); 458 &je (&label("done")); 459 460 &mov ("ecx",8); 461&set_label("loop"); 462 &${rdop}("edx"); 463 &jc (&label("break")); 464 &loop (&label("loop")); 465 &jmp (&label("done")); 466 467&set_label("break",16); 468 &cmp ("ebx",4); 469 &jb (&label("tail")); 470 &mov (&DWP(0,"edi"),"edx"); 471 &lea ("edi",&DWP(4,"edi")); 472 &add ("eax",4); 473 &sub ("ebx",4); 474 &jz (&label("done")); 475 &mov ("ecx",8); 476 &jmp (&label("loop")); 477 478&set_label("tail",16); 479 &mov (&BP(0,"edi"),"dl"); 480 &lea ("edi",&DWP(1,"edi")); 481 &inc ("eax"); 482 &shr ("edx",8); 483 &dec ("ebx"); 484 &jnz (&label("tail")); 485 486&set_label("done"); 487 &xor ("edx","edx"); # Clear random value from registers 488 &pop ("ebx"); 489 &pop ("edi"); 490 &ret (); 491&function_end_B("OPENSSL_ia32_${rdop}_bytes"); 492} 493&gen_random("rdrand"); 494&gen_random("rdseed"); 495 496&hidden("OPENSSL_cpuid_setup"); 497&hidden("OPENSSL_ia32cap_P"); 498 499&asm_finish(); 500 501close STDOUT or die "error closing STDOUT: $!"; 502