mirror of https://gitee.com/openkylin/linux.git
crypto: aesni - fix counter overflow handling in "by8" variant
The "by8" CTR AVX implementation fails to propperly handle counter
overflows. That was the reason it got disabled in commit 7da4b29d49
("crypto: aesni - disable "by8" AVX CTR optimization").
Fix the overflow handling by incrementing the counter block as a double
quad word, i.e. a 128 bit, and testing for overflows afterwards. We need
to use VPTEST to do so as VPADD* does not set the flags itself and
silently drops the carry bit.
As this change adds branches to the hot path, minor performance
regressions might be a side effect. But, OTOH, we now have a conforming
implementation -- the preferable goal.
A tcrypt test on a SandyBridge system (i7-2620M) showed almost identical
numbers for the old and this version with differences within the noise
range. A dm-crypt test with the fixed version gave even slightly better
results for this version. So the performance impact might not be as big
as expected.
Tested-by: Romain Francoise <romain@orebokech.com>
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Chandramouli Narayanan <mouli@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
7a1ae9c0ce
commit
80dca4734b
|
@ -108,6 +108,10 @@
|
||||||
|
|
||||||
byteswap_const:
|
byteswap_const:
|
||||||
.octa 0x000102030405060708090A0B0C0D0E0F
|
.octa 0x000102030405060708090A0B0C0D0E0F
|
||||||
|
ddq_low_msk:
|
||||||
|
.octa 0x0000000000000000FFFFFFFFFFFFFFFF
|
||||||
|
ddq_high_add_1:
|
||||||
|
.octa 0x00000000000000010000000000000000
|
||||||
ddq_add_1:
|
ddq_add_1:
|
||||||
.octa 0x00000000000000000000000000000001
|
.octa 0x00000000000000000000000000000001
|
||||||
ddq_add_2:
|
ddq_add_2:
|
||||||
|
@ -169,7 +173,12 @@ ddq_add_8:
|
||||||
.rept (by - 1)
|
.rept (by - 1)
|
||||||
club DDQ_DATA, i
|
club DDQ_DATA, i
|
||||||
club XDATA, i
|
club XDATA, i
|
||||||
vpaddd var_ddq_add(%rip), xcounter, var_xdata
|
vpaddq var_ddq_add(%rip), xcounter, var_xdata
|
||||||
|
vptest ddq_low_msk(%rip), var_xdata
|
||||||
|
jnz 1f
|
||||||
|
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
|
||||||
|
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||||
|
1:
|
||||||
vpshufb xbyteswap, var_xdata, var_xdata
|
vpshufb xbyteswap, var_xdata, var_xdata
|
||||||
.set i, (i +1)
|
.set i, (i +1)
|
||||||
.endr
|
.endr
|
||||||
|
@ -178,7 +187,11 @@ ddq_add_8:
|
||||||
|
|
||||||
vpxor xkey0, xdata0, xdata0
|
vpxor xkey0, xdata0, xdata0
|
||||||
club DDQ_DATA, by
|
club DDQ_DATA, by
|
||||||
vpaddd var_ddq_add(%rip), xcounter, xcounter
|
vpaddq var_ddq_add(%rip), xcounter, xcounter
|
||||||
|
vptest ddq_low_msk(%rip), xcounter
|
||||||
|
jnz 1f
|
||||||
|
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||||
|
1:
|
||||||
|
|
||||||
.set i, 1
|
.set i, 1
|
||||||
.rept (by - 1)
|
.rept (by - 1)
|
||||||
|
|
Loading…
Reference in New Issue