Skip to content

Commit

Permalink
sha/asm/keccak1600-avx512.pl: absorb bug-fix and minor optimization.
Browse files Browse the repository at this point in the history
Hardware used for benchmarking courtesy of Atos, experiments run by
Romain Dolbeau <[email protected]>. Kudos!

Reviewed-by: Rich Salz <[email protected]>
  • Loading branch information
Andy Polyakov committed Jul 21, 2017
1 parent 64d92d7 commit 0d7903f
Showing 1 changed file with 17 additions and 19 deletions.
36 changes: 17 additions & 19 deletions crypto/sha/asm/keccak1600-avx512.pl
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
#
# r=1088(*)
#
# Knights Landing -
# Skylake Xeon -
# Knights Landing 8.9
# Skylake-X 6.7
#
# (*) Corresponds to SHA3-256.

Expand Down Expand Up @@ -119,22 +119,22 @@
vpermq $A03,@Theta[3],$A03
vpermq $A04,@Theta[4],$A04
vpxorq $A01,$A00,$C00
vpxorq $A02,$C00,$C00
vpternlogq \$0x96,$A04,$A03,$C00
vmovdqa64 $A00,@T[0] # put aside original A00
vpternlogq \$0x96,$A02,$A01,$A00 # and use it as "C00"
vpternlogq \$0x96,$A04,$A03,$A00
vprolq \$1,$C00,$D00
vpermq $C00,@Theta[1],$C00
vprolq \$1,$A00,$D00
vpermq $A00,@Theta[1],$A00
vpermq $D00,@Theta[4],$D00
vpternlogq \$0x96,$C00,$D00,$A00
vpternlogq \$0x96,$C00,$D00,$A01
vpternlogq \$0x96,$C00,$D00,$A02
vpternlogq \$0x96,$C00,$D00,$A03
vpternlogq \$0x96,$C00,$D00,$A04
vpternlogq \$0x96,$A00,$D00,@T[0] # T[0] is original A00
vpternlogq \$0x96,$A00,$D00,$A01
vpternlogq \$0x96,$A00,$D00,$A02
vpternlogq \$0x96,$A00,$D00,$A03
vpternlogq \$0x96,$A00,$D00,$A04
######################################### Rho
vprolvq @Rhotate[0],$A00,$A00
vprolvq @Rhotate[0],@T[0],$A00 # T[0] is original A00
vprolvq @Rhotate[1],$A01,$A01
vprolvq @Rhotate[2],$A02,$A02
vprolvq @Rhotate[3],$A03,$A03
Expand Down Expand Up @@ -259,22 +259,20 @@
jc .Ldone_absorb_avx512
shr \$3,%eax
vmovdqu64 -96($inp),@{T[0]}{$k11111}
sub \$4,%eax
___
for(my $i=5; $i<25; $i++) {
for(my $i=0; $i<25; $i++) {
$code.=<<___
dec %eax
jz .Labsorved_avx512
mov 8*$i-96($inp),%r8
mov %r8,$A_jagged_in[$i]-128(%r9)
dec %eax
jz .Labsorved_avx512
___
}
$code.=<<___;
.Labsorved_avx512:
lea ($inp,$bsz),$inp
vpxorq @T[0],$A00,$A00
vpxorq 64*0-128(%r9),$A00,$A00
vpxorq 64*1-128(%r9),$A01,$A01
vpxorq 64*2-128(%r9),$A02,$A02
vpxorq 64*3-128(%r9),$A03,$A03
Expand Down

0 comments on commit 0d7903f

Please sign in to comment.