crypto: arm64/aes-ce-mac - simplify NEON yield

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Ard Biesheuvel 2021-02-03 12:36:24 +01:00 committed by Herbert Xu
parent f5943ef456
commit f0070f4a79
2 changed files with 33 additions and 40 deletions

View File

@ -105,7 +105,7 @@ asmlinkage void aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
int rounds, int blocks, u8 iv[], int rounds, int blocks, u8 iv[],
u32 const rk2[]); u32 const rk2[]);
asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds, asmlinkage int aes_mac_update(u8 const in[], u32 const rk[], int rounds,
int blocks, u8 dg[], int enc_before, int blocks, u8 dg[], int enc_before,
int enc_after); int enc_after);
@ -856,10 +856,17 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
int rounds = 6 + ctx->key_length / 4; int rounds = 6 + ctx->key_length / 4;
if (crypto_simd_usable()) { if (crypto_simd_usable()) {
int rem;
do {
kernel_neon_begin(); kernel_neon_begin();
aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before, rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
enc_after); dg, enc_before, enc_after);
kernel_neon_end(); kernel_neon_end();
in += (blocks - rem) * AES_BLOCK_SIZE;
blocks = rem;
enc_before = 0;
} while (blocks);
} else { } else {
if (enc_before) if (enc_before)
aes_encrypt(ctx, dg, dg); aes_encrypt(ctx, dg, dg);

View File

@ -678,61 +678,47 @@ AES_FUNC_END(aes_xts_decrypt)
* int blocks, u8 dg[], int enc_before, int enc_after) * int blocks, u8 dg[], int enc_before, int enc_after)
*/ */
AES_FUNC_START(aes_mac_update) AES_FUNC_START(aes_mac_update)
frame_push 6 ld1 {v0.16b}, [x4] /* get dg */
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x6
ld1 {v0.16b}, [x23] /* get dg */
enc_prepare w2, x1, x7 enc_prepare w2, x1, x7
cbz w5, .Lmacloop4x cbz w5, .Lmacloop4x
encrypt_block v0, w2, x1, x7, w8 encrypt_block v0, w2, x1, x7, w8
.Lmacloop4x: .Lmacloop4x:
subs w22, w22, #4 subs w3, w3, #4
bmi .Lmac1x bmi .Lmac1x
ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */ ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
encrypt_block v0, w21, x20, x7, w8 encrypt_block v0, w2, x1, x7, w8
eor v0.16b, v0.16b, v2.16b eor v0.16b, v0.16b, v2.16b
encrypt_block v0, w21, x20, x7, w8 encrypt_block v0, w2, x1, x7, w8
eor v0.16b, v0.16b, v3.16b eor v0.16b, v0.16b, v3.16b
encrypt_block v0, w21, x20, x7, w8 encrypt_block v0, w2, x1, x7, w8
eor v0.16b, v0.16b, v4.16b eor v0.16b, v0.16b, v4.16b
cmp w22, wzr cmp w3, wzr
csinv x5, x24, xzr, eq csinv x5, x6, xzr, eq
cbz w5, .Lmacout cbz w5, .Lmacout
encrypt_block v0, w21, x20, x7, w8 encrypt_block v0, w2, x1, x7, w8
st1 {v0.16b}, [x23] /* return dg */ st1 {v0.16b}, [x4] /* return dg */
cond_yield_neon .Lmacrestart cond_yield .Lmacout, x7
b .Lmacloop4x b .Lmacloop4x
.Lmac1x: .Lmac1x:
add w22, w22, #4 add w3, w3, #4
.Lmacloop: .Lmacloop:
cbz w22, .Lmacout cbz w3, .Lmacout
ld1 {v1.16b}, [x19], #16 /* get next pt block */ ld1 {v1.16b}, [x0], #16 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
subs w22, w22, #1 subs w3, w3, #1
csinv x5, x24, xzr, eq csinv x5, x6, xzr, eq
cbz w5, .Lmacout cbz w5, .Lmacout
.Lmacenc: .Lmacenc:
encrypt_block v0, w21, x20, x7, w8 encrypt_block v0, w2, x1, x7, w8
b .Lmacloop b .Lmacloop
.Lmacout: .Lmacout:
st1 {v0.16b}, [x23] /* return dg */ st1 {v0.16b}, [x4] /* return dg */
frame_pop mov w0, w3
ret ret
.Lmacrestart:
ld1 {v0.16b}, [x23] /* get dg */
enc_prepare w21, x20, x0
b .Lmacloop4x
AES_FUNC_END(aes_mac_update) AES_FUNC_END(aes_mac_update)