diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c index 31eb75b6002f..9bdafd57888c 100644 --- a/arch/arm/crypto/curve25519-glue.c +++ b/arch/arm/crypto/curve25519-glue.c @@ -112,7 +112,7 @@ static struct kpp_alg curve25519_alg = { .max_size = curve25519_max_size, }; -static int __init mod_init(void) +static int __init arm_curve25519_init(void) { if (elf_hwcap & HWCAP_NEON) { static_branch_enable(&have_neon); @@ -122,14 +122,14 @@ static int __init mod_init(void) return 0; } -static void __exit mod_exit(void) +static void __exit arm_curve25519_exit(void) { if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON) crypto_unregister_kpp(&curve25519_alg); } -module_init(mod_init); -module_exit(mod_exit); +module_init(arm_curve25519_init); +module_exit(arm_curve25519_exit); MODULE_ALIAS_CRYPTO("curve25519"); MODULE_ALIAS_CRYPTO("curve25519-neon"); diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index b8eb0453123d..55f19450091b 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -51,7 +51,7 @@ config CRYPTO_SM4_ARM64_CE tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_SM4 + select CRYPTO_LIB_SM4 config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 2754c875d39c..9c93cfc4841b 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -17,12 +17,20 @@ MODULE_LICENSE("GPL v2"); asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in); +static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - crypto_sm4_encrypt(tfm, out, in); + sm4_crypt_block(ctx->rkey_enc, out, in); } else { kernel_neon_begin(); sm4_ce_do_crypt(ctx->rkey_enc, out, in); @@ -32,10 +40,10 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - crypto_sm4_decrypt(tfm, out, in); + sm4_crypt_block(ctx->rkey_dec, out, in); } else { kernel_neon_begin(); sm4_ce_do_crypt(ctx->rkey_dec, out, in); @@ -49,12 +57,12 @@ static struct crypto_alg sm4_ce_alg = { .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = SM4_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_sm4_ctx), + .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, .cra_u.cipher = { .cia_min_keysize = SM4_KEY_SIZE, .cia_max_keysize = SM4_KEY_SIZE, - .cia_setkey = crypto_sm4_set_key, + .cia_setkey = sm4_ce_setkey, .cia_encrypt = sm4_ce_encrypt, .cia_decrypt = sm4_ce_decrypt } diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index d0959e7b809f..f307c93fc90a 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -88,6 +88,12 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o +obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o +sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o + +obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o +sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o + quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $< > $@ $(obj)/%.S: $(src)/%.pl FORCE diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2144e54a6c89..0fc961bef299 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -849,6 +849,8 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt) return -EINVAL; err = skcipher_walk_virt(&walk, req, false); + if (!walk.nbytes) + return err; if (unlikely(tail > 0 && walk.nbytes < walk.total)) { int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; @@ -862,7 +864,10 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt) skcipher_request_set_crypt(&subreq, req->src, req->dst, blocks * AES_BLOCK_SIZE, req->iv); req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; } else { tail = 0; } diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S new file mode 100644 index 000000000000..fa2c3f50aecb --- /dev/null +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -0,0 +1,589 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 Markku-Juhani O. Saarinen + * Copyright (C) 2020 Jussi Kivilinna + * Copyright (c) 2021 Tianjia Zhang + */ + +/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: + * https://github.com/mjosaarinen/sm4ni + */ + +#include +#include + +#define rRIP (%rip) + +#define RX0 %xmm0 +#define RX1 %xmm1 +#define MASK_4BIT %xmm2 +#define RTMP0 %xmm3 +#define RTMP1 %xmm4 +#define RTMP2 %xmm5 +#define RTMP3 %xmm6 +#define RTMP4 %xmm7 + +#define RA0 %xmm8 +#define RA1 %xmm9 +#define RA2 %xmm10 +#define RA3 %xmm11 + +#define RB0 %xmm12 +#define RB1 %xmm13 +#define RB2 %xmm14 +#define RB3 %xmm15 + +#define RNOT %xmm0 +#define RBSWAP %xmm1 + + +/* Transpose four 32-bit words between 128-bit vectors. */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/* pre-SubByte transform. */ +#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by + * 'vaeslastenc' instruction. + */ +#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \ + vpandn mask4bit, x, tmp0; \ + vpsrld $4, x, x; \ + vpand x, mask4bit, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + + +.section .rodata.cst164, "aM", @progbits, 164 +.align 16 + +/* + * Following four affine transform look-up tables are from work by + * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni + * + * These allow exposing SM4 S-Box from AES SubByte. + */ + +/* pre-SubByte affine transform, from SM4 field to AES field. */ +.Lpre_tf_lo_s: + .quad 0x9197E2E474720701, 0xC7C1B4B222245157 +.Lpre_tf_hi_s: + .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012 + +/* post-SubByte affine transform, from AES field to SM4 field. */ +.Lpost_tf_lo_s: + .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 +.Lpost_tf_hi_s: + .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_8: + .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e + .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06 + +/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_16: + .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01 + .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09 + +/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_24: + .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04 + .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* For input word byte-swap */ +.Lbswap32_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + + +.text +.align 16 + +/* + * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, + * const u8 *src, int nblocks) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_crypt4) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (1..4 blocks) + * %rdx: src (1..4 blocks) + * %rcx: num blocks (1..4) + */ + FRAME_BEGIN + + vmovdqu 0*16(%rdx), RA0; + vmovdqa RA0, RA1; + vmovdqa RA0, RA2; + vmovdqa RA0, RA3; + cmpq $2, %rcx; + jb .Lblk4_load_input_done; + vmovdqu 1*16(%rdx), RA1; + je .Lblk4_load_input_done; + vmovdqu 2*16(%rdx), RA2; + cmpq $3, %rcx; + je .Lblk4_load_input_done; + vmovdqu 3*16(%rdx), RA3; + +.Lblk4_load_input_done: + + vmovdqa .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + + vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT; + vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; + vmovdqa .Lpre_tf_hi_s rRIP, RB0; + vmovdqa .Lpost_tf_lo_s rRIP, RB1; + vmovdqa .Lpost_tf_hi_s rRIP, RB2; + vmovdqa .Linv_shift_row rRIP, RB3; + vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2; + vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3) \ + vbroadcastss (4*(round))(%rdi), RX0; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \ + vaesenclast MASK_4BIT, RX0, RX0; \ + transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RB3, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP2, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP3, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP1, s0, s0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk4: + ROUND(0, RA0, RA1, RA2, RA3); + ROUND(1, RA1, RA2, RA3, RA0); + ROUND(2, RA2, RA3, RA0, RA1); + ROUND(3, RA3, RA0, RA1, RA2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk4; + +#undef ROUND + + vmovdqa .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + + vmovdqu RA0, 0*16(%rsi); + cmpq $2, %rcx; + jb .Lblk4_store_output_done; + vmovdqu RA1, 1*16(%rsi); + je .Lblk4_store_output_done; + vmovdqu RA2, 2*16(%rsi); + cmpq $3, %rcx; + je .Lblk4_store_output_done; + vmovdqu RA3, 3*16(%rsi); + +.Lblk4_store_output_done: + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_crypt4) + +.align 8 +SYM_FUNC_START_LOCAL(__sm4_crypt_blk8) + /* input: + * %rdi: round key array, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * plaintext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel + * ciphertext blocks + */ + FRAME_BEGIN + + vmovdqa .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \ + vbroadcastss (4*(round))(%rdi), RX0; \ + vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \ + vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \ + vmovdqa RX0, RX1; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \ + vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \ + vpxor r1, RX1, RX1; \ + vpxor r2, RX1, RX1; \ + vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + vmovdqa .Linv_shift_row rRIP, RTMP4; \ + vaesenclast MASK_4BIT, RX0, RX0; \ + vaesenclast MASK_4BIT, RX1, RX1; \ + transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RTMP4, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP4, RX1, RTMP2; \ + vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \ + vpxor RTMP2, r0, r0; /* r0 ^ x */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + vpxor RTMP1, s0, s0; \ + vpshufb RTMP4, RX1, RTMP3; \ + vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \ + /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpslld $2, RTMP2, RTMP3; \ + vpsrld $30, RTMP2, RTMP2; \ + vpxor RTMP2, r0, r0; \ + vpxor RTMP3, r0, r0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk8: + ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); + ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); + ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); + ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk8; + +#undef ROUND + + vmovdqa .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + FRAME_END + ret; +SYM_FUNC_END(__sm4_crypt_blk8) + +/* + * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, + * const u8 *src, int nblocks) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_crypt8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (1..8 blocks) + * %rdx: src (1..8 blocks) + * %rcx: num blocks (1..8) + */ + FRAME_BEGIN + + cmpq $5, %rcx; + jb sm4_aesni_avx_crypt4; + vmovdqu (0 * 16)(%rdx), RA0; + vmovdqu (1 * 16)(%rdx), RA1; + vmovdqu (2 * 16)(%rdx), RA2; + vmovdqu (3 * 16)(%rdx), RA3; + vmovdqu (4 * 16)(%rdx), RB0; + vmovdqa RB0, RB1; + vmovdqa RB0, RB2; + vmovdqa RB0, RB3; + je .Lblk8_load_input_done; + vmovdqu (5 * 16)(%rdx), RB1; + cmpq $7, %rcx; + jb .Lblk8_load_input_done; + vmovdqu (6 * 16)(%rdx), RB2; + je .Lblk8_load_input_done; + vmovdqu (7 * 16)(%rdx), RB3; + +.Lblk8_load_input_done: + call __sm4_crypt_blk8; + + cmpq $6, %rcx; + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + jb .Lblk8_store_output_done; + vmovdqu RB1, (5 * 16)(%rsi); + je .Lblk8_store_output_done; + vmovdqu RB2, (6 * 16)(%rsi); + cmpq $7, %rcx; + je .Lblk8_store_output_done; + vmovdqu RB3, (7 * 16)(%rsi); + +.Lblk8_store_output_done: + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_crypt8) + +/* + * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv (big endian, 128bit) + */ + FRAME_BEGIN + + /* load IV and byteswap */ + vmovdqu (%rcx), RA0; + + vmovdqa .Lbswap128_mask rRIP, RBSWAP; + vpshufb RBSWAP, RA0, RTMP0; /* be => le */ + + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */ + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP2); /* +1 */ + vpshufb RBSWAP, RTMP0, RA1; + inc_le128(RTMP0, RNOT, RTMP2); /* +2 */ + vpshufb RBSWAP, RTMP0, RA2; + inc_le128(RTMP0, RNOT, RTMP2); /* +3 */ + vpshufb RBSWAP, RTMP0, RA3; + inc_le128(RTMP0, RNOT, RTMP2); /* +4 */ + vpshufb RBSWAP, RTMP0, RB0; + inc_le128(RTMP0, RNOT, RTMP2); /* +5 */ + vpshufb RBSWAP, RTMP0, RB1; + inc_le128(RTMP0, RNOT, RTMP2); /* +6 */ + vpshufb RBSWAP, RTMP0, RB2; + inc_le128(RTMP0, RNOT, RTMP2); /* +7 */ + vpshufb RBSWAP, RTMP0, RB3; + inc_le128(RTMP0, RNOT, RTMP2); /* +8 */ + vpshufb RBSWAP, RTMP0, RTMP1; + + /* store new IV */ + vmovdqu RTMP1, (%rcx); + + call __sm4_crypt_blk8; + + vpxor (0 * 16)(%rdx), RA0, RA0; + vpxor (1 * 16)(%rdx), RA1, RA1; + vpxor (2 * 16)(%rdx), RA2, RA2; + vpxor (3 * 16)(%rdx), RA3, RA3; + vpxor (4 * 16)(%rdx), RB0, RB0; + vpxor (5 * 16)(%rdx), RB1, RB1; + vpxor (6 * 16)(%rdx), RB2, RB2; + vpxor (7 * 16)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8) + +/* + * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vmovdqu (0 * 16)(%rdx), RA0; + vmovdqu (1 * 16)(%rdx), RA1; + vmovdqu (2 * 16)(%rdx), RA2; + vmovdqu (3 * 16)(%rdx), RA3; + vmovdqu (4 * 16)(%rdx), RB0; + vmovdqu (5 * 16)(%rdx), RB1; + vmovdqu (6 * 16)(%rdx), RB2; + vmovdqu (7 * 16)(%rdx), RB3; + + call __sm4_crypt_blk8; + + vmovdqu (7 * 16)(%rdx), RNOT; + vpxor (%rcx), RA0, RA0; + vpxor (0 * 16)(%rdx), RA1, RA1; + vpxor (1 * 16)(%rdx), RA2, RA2; + vpxor (2 * 16)(%rdx), RA3, RA3; + vpxor (3 * 16)(%rdx), RB0, RB0; + vpxor (4 * 16)(%rdx), RB1, RB1; + vpxor (5 * 16)(%rdx), RB2, RB2; + vpxor (6 * 16)(%rdx), RB3, RB3; + vmovdqu RNOT, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8) + +/* + * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + /* Load input */ + vmovdqu (%rcx), RA0; + vmovdqu 0 * 16(%rdx), RA1; + vmovdqu 1 * 16(%rdx), RA2; + vmovdqu 2 * 16(%rdx), RA3; + vmovdqu 3 * 16(%rdx), RB0; + vmovdqu 4 * 16(%rdx), RB1; + vmovdqu 5 * 16(%rdx), RB2; + vmovdqu 6 * 16(%rdx), RB3; + + /* Update IV */ + vmovdqu 7 * 16(%rdx), RNOT; + vmovdqu RNOT, (%rcx); + + call __sm4_crypt_blk8; + + vpxor (0 * 16)(%rdx), RA0, RA0; + vpxor (1 * 16)(%rdx), RA1, RA1; + vpxor (2 * 16)(%rdx), RA2, RA2; + vpxor (3 * 16)(%rdx), RA3, RA3; + vpxor (4 * 16)(%rdx), RB0, RB0; + vpxor (5 * 16)(%rdx), RB1, RB1; + vpxor (6 * 16)(%rdx), RB2, RB2; + vpxor (7 * 16)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 16)(%rsi); + vmovdqu RA1, (1 * 16)(%rsi); + vmovdqu RA2, (2 * 16)(%rsi); + vmovdqu RA3, (3 * 16)(%rsi); + vmovdqu RB0, (4 * 16)(%rsi); + vmovdqu RB1, (5 * 16)(%rsi); + vmovdqu RB2, (6 * 16)(%rsi); + vmovdqu RB3, (7 * 16)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8) diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S new file mode 100644 index 000000000000..d2ffd7f76ee2 --- /dev/null +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SM4 Cipher Algorithm, AES-NI/AVX2 optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 Markku-Juhani O. Saarinen + * Copyright (C) 2020 Jussi Kivilinna + * Copyright (c) 2021 Tianjia Zhang + */ + +/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at: + * https://github.com/mjosaarinen/sm4ni + */ + +#include +#include + +#define rRIP (%rip) + +/* vector registers */ +#define RX0 %ymm0 +#define RX1 %ymm1 +#define MASK_4BIT %ymm2 +#define RTMP0 %ymm3 +#define RTMP1 %ymm4 +#define RTMP2 %ymm5 +#define RTMP3 %ymm6 +#define RTMP4 %ymm7 + +#define RA0 %ymm8 +#define RA1 %ymm9 +#define RA2 %ymm10 +#define RA3 %ymm11 + +#define RB0 %ymm12 +#define RB1 %ymm13 +#define RB2 %ymm14 +#define RB3 %ymm15 + +#define RNOT %ymm0 +#define RBSWAP %ymm1 + +#define RX0x %xmm0 +#define RX1x %xmm1 +#define MASK_4BITx %xmm2 + +#define RNOTx %xmm0 +#define RBSWAPx %xmm1 + +#define RTMP0x %xmm3 +#define RTMP1x %xmm4 +#define RTMP2x %xmm5 +#define RTMP3x %xmm6 +#define RTMP4x %xmm7 + + +/* helper macros */ + +/* Transpose four 32-bit words between 128-bit vector lanes. */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/* post-SubByte transform. */ +#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by + * 'vaeslastenc' instruction. */ +#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \ + vpandn mask4bit, x, tmp0; \ + vpsrld $4, x, x; \ + vpand x, mask4bit, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + + +.section .rodata.cst164, "aM", @progbits, 164 +.align 16 + +/* + * Following four affine transform look-up tables are from work by + * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni + * + * These allow exposing SM4 S-Box from AES SubByte. + */ + +/* pre-SubByte affine transform, from SM4 field to AES field. */ +.Lpre_tf_lo_s: + .quad 0x9197E2E474720701, 0xC7C1B4B222245157 +.Lpre_tf_hi_s: + .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012 + +/* post-SubByte affine transform, from AES field to SM4 field. */ +.Lpost_tf_lo_s: + .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 +.Lpost_tf_hi_s: + .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_8: + .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e + .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06 + +/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_16: + .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01 + .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09 + +/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */ +.Linv_shift_row_rol_24: + .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04 + .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* For input word byte-swap */ +.Lbswap32_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + +.text +.align 16 + +.align 8 +SYM_FUNC_START_LOCAL(__sm4_crypt_blk16) + /* input: + * %rdi: round key array, CTX + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * plaintext blocks + * output: + * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel + * ciphertext blocks + */ + FRAME_BEGIN + + vbroadcasti128 .Lbswap32_mask rRIP, RTMP2; + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT; + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + +#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \ + vpbroadcastd (4*(round))(%rdi), RX0; \ + vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \ + vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \ + vmovdqa RX0, RX1; \ + vpxor s1, RX0, RX0; \ + vpxor s2, RX0, RX0; \ + vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \ + vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \ + vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \ + vpxor r1, RX1, RX1; \ + vpxor r2, RX1, RX1; \ + vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \ + \ + /* sbox, non-linear part */ \ + transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \ + vextracti128 $1, RX0, RTMP4x; \ + vextracti128 $1, RX1, RTMP0x; \ + vaesenclast MASK_4BITx, RX0x, RX0x; \ + vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \ + vaesenclast MASK_4BITx, RX1x, RX1x; \ + vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \ + vinserti128 $1, RTMP4x, RX0, RX0; \ + vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \ + vinserti128 $1, RTMP0x, RX1, RX1; \ + transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \ + \ + /* linear part */ \ + vpshufb RTMP4, RX0, RTMP0; \ + vpxor RTMP0, s0, s0; /* s0 ^ x */ \ + vpshufb RTMP4, RX1, RTMP2; \ + vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \ + vpxor RTMP2, r0, r0; /* r0 ^ x */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX1, RTMP3; \ + vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \ + vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \ + vpshufb RTMP4, RX0, RTMP1; \ + vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP0, RTMP1; \ + vpsrld $30, RTMP0, RTMP0; \ + vpxor RTMP0, s0, s0; \ + /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP1, s0, s0; \ + vpshufb RTMP4, RX1, RTMP3; \ + vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \ + vpslld $2, RTMP2, RTMP3; \ + vpsrld $30, RTMP2, RTMP2; \ + vpxor RTMP2, r0, r0; \ + /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \ + vpxor RTMP3, r0, r0; + + leaq (32*4)(%rdi), %rax; +.align 16 +.Lroundloop_blk8: + ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); + ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); + ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); + ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); + leaq (4*4)(%rdi), %rdi; + cmpq %rax, %rdi; + jne .Lroundloop_blk8; + +#undef ROUND + + vbroadcasti128 .Lbswap128_mask rRIP, RTMP2; + + transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1); + transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); + vpshufb RTMP2, RA0, RA0; + vpshufb RTMP2, RA1, RA1; + vpshufb RTMP2, RA2, RA2; + vpshufb RTMP2, RA3, RA3; + vpshufb RTMP2, RB0, RB0; + vpshufb RTMP2, RB1, RB1; + vpshufb RTMP2, RB2, RB2; + vpshufb RTMP2, RB3, RB3; + + FRAME_END + ret; +SYM_FUNC_END(__sm4_crypt_blk16) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +/* + * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv (big endian, 128bit) + */ + FRAME_BEGIN + + movq 8(%rcx), %rax; + bswapq %rax; + + vzeroupper; + + vbroadcasti128 .Lbswap128_mask rRIP, RTMP3; + vpcmpeqd RNOT, RNOT, RNOT; + vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */ + vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */ + + /* load IV and byteswap */ + vmovdqu (%rcx), RTMP4x; + vpshufb RTMP3x, RTMP4x, RTMP4x; + vmovdqa RTMP4x, RTMP0x; + inc_le128(RTMP4x, RNOTx, RTMP1x); + vinserti128 $1, RTMP4x, RTMP0, RTMP0; + vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */ + + /* check need for handling 64-bit overflow and carry */ + cmpq $(0xffffffffffffffff - 16), %rax; + ja .Lhandle_ctr_carry; + + /* construct IVs */ + vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */ + vpshufb RTMP3, RTMP0, RA1; + vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */ + vpshufb RTMP3, RTMP0, RA2; + vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */ + vpshufb RTMP3, RTMP0, RA3; + vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */ + vpshufb RTMP3, RTMP0, RB0; + vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */ + vpshufb RTMP3, RTMP0, RB1; + vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */ + vpshufb RTMP3, RTMP0, RB2; + vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */ + vpshufb RTMP3, RTMP0, RB3; + vpsubq RTMP2, RTMP0, RTMP0; /* +16 */ + vpshufb RTMP3x, RTMP0x, RTMP0x; + + jmp .Lctr_carry_done; + +.Lhandle_ctr_carry: + /* construct IVs */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */ + inc_le128(RTMP0, RNOT, RTMP1); + inc_le128(RTMP0, RNOT, RTMP1); + vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */ + inc_le128(RTMP0, RNOT, RTMP1); + vextracti128 $1, RTMP0, RTMP0x; + vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */ + +.align 4 +.Lctr_carry_done: + /* store new IV */ + vmovdqu RTMP0x, (%rcx); + + call __sm4_crypt_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA3, RA3; + vpxor (4 * 32)(%rdx), RB0, RB0; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16) + +/* + * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vzeroupper; + + vmovdqu (0 * 32)(%rdx), RA0; + vmovdqu (1 * 32)(%rdx), RA1; + vmovdqu (2 * 32)(%rdx), RA2; + vmovdqu (3 * 32)(%rdx), RA3; + vmovdqu (4 * 32)(%rdx), RB0; + vmovdqu (5 * 32)(%rdx), RB1; + vmovdqu (6 * 32)(%rdx), RB2; + vmovdqu (7 * 32)(%rdx), RB3; + + call __sm4_crypt_blk16; + + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RNOT; + vpxor RNOT, RA0, RA0; + vpxor (0 * 32 + 16)(%rdx), RA1, RA1; + vpxor (1 * 32 + 16)(%rdx), RA2, RA2; + vpxor (2 * 32 + 16)(%rdx), RA3, RA3; + vpxor (3 * 32 + 16)(%rdx), RB0, RB0; + vpxor (4 * 32 + 16)(%rdx), RB1, RB1; + vpxor (5 * 32 + 16)(%rdx), RB2, RB2; + vpxor (6 * 32 + 16)(%rdx), RB3, RB3; + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); /* store new IV */ + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16) + +/* + * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, + * const u8 *src, u8 *iv) + */ +.align 8 +SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) + /* input: + * %rdi: round key array, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + FRAME_BEGIN + + vzeroupper; + + /* Load input */ + vmovdqu (%rcx), RNOTx; + vinserti128 $1, (%rdx), RNOT, RA0; + vmovdqu (0 * 32 + 16)(%rdx), RA1; + vmovdqu (1 * 32 + 16)(%rdx), RA2; + vmovdqu (2 * 32 + 16)(%rdx), RA3; + vmovdqu (3 * 32 + 16)(%rdx), RB0; + vmovdqu (4 * 32 + 16)(%rdx), RB1; + vmovdqu (5 * 32 + 16)(%rdx), RB2; + vmovdqu (6 * 32 + 16)(%rdx), RB3; + + /* Update IV */ + vmovdqu (7 * 32 + 16)(%rdx), RNOTx; + vmovdqu RNOTx, (%rcx); + + call __sm4_crypt_blk16; + + vpxor (0 * 32)(%rdx), RA0, RA0; + vpxor (1 * 32)(%rdx), RA1, RA1; + vpxor (2 * 32)(%rdx), RA2, RA2; + vpxor (3 * 32)(%rdx), RA3, RA3; + vpxor (4 * 32)(%rdx), RB0, RB0; + vpxor (5 * 32)(%rdx), RB1, RB1; + vpxor (6 * 32)(%rdx), RB2, RB2; + vpxor (7 * 32)(%rdx), RB3, RB3; + + vmovdqu RA0, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA3, (3 * 32)(%rsi); + vmovdqu RB0, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB3, (7 * 32)(%rsi); + + vzeroall; + FRAME_END + ret; +SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16) diff --git a/arch/x86/crypto/sm4-avx.h b/arch/x86/crypto/sm4-avx.h new file mode 100644 index 000000000000..1bceab7516aa --- /dev/null +++ b/arch/x86/crypto/sm4-avx.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef ASM_X86_SM4_AVX_H +#define ASM_X86_SM4_AVX_H + +#include +#include + +typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv); + +int sm4_avx_ecb_encrypt(struct skcipher_request *req); +int sm4_avx_ecb_decrypt(struct skcipher_request *req); + +int sm4_cbc_encrypt(struct skcipher_request *req); +int sm4_avx_cbc_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +int sm4_cfb_encrypt(struct skcipher_request *req); +int sm4_avx_cfb_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +int sm4_avx_ctr_crypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func); + +#endif diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c new file mode 100644 index 000000000000..84bc718f49a3 --- /dev/null +++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX2 optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (c) 2021, Alibaba Group. + * Copyright (c) 2021 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include "sm4-avx.h" + +#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16) + +asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); + +static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_cbc_dec_blk16); +} + + +static int cfb_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_cfb_dec_blk16); +} + +static int ctr_crypt(struct skcipher_request *req) +{ + return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE, + sm4_aesni_avx2_ctr_enc_blk16); +} + +static struct skcipher_alg sm4_aesni_avx2_skciphers[] = { + { + .base = { + .cra_name = "__ecb(sm4)", + .cra_driver_name = "__ecb-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_avx_ecb_encrypt, + .decrypt = sm4_avx_ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(sm4)", + .cra_driver_name = "__cbc-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cfb(sm4)", + .cra_driver_name = "__cfb-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = cfb_decrypt, + }, { + .base = { + .cra_name = "__ctr(sm4)", + .cra_driver_name = "__ctr-sm4-aesni-avx2", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 16 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; + +static struct simd_skcipher_alg * +simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)]; + +static int __init sm4_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX2 or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers), + simd_sm4_aesni_avx2_skciphers); +} + +static void __exit sm4_exit(void) +{ + simd_unregister_skciphers(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers), + simd_sm4_aesni_avx2_skciphers); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-aesni-avx2"); diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c new file mode 100644 index 000000000000..7800f77d68ad --- /dev/null +++ b/arch/x86/crypto/sm4_aesni_avx_glue.c @@ -0,0 +1,487 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, AES-NI/AVX optimized. + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (c) 2021, Alibaba Group. + * Copyright (c) 2021 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include "sm4-avx.h" + +#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8) + +asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, + const u8 *src, int nblocks); +asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, + const u8 *src, int nblocks); +asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); +asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, + const u8 *src, u8 *iv); + +static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) +{ + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { + sm4_aesni_avx_crypt8(rkey, dst, src, 8); + dst += SM4_CRYPT8_BLOCK_SIZE; + src += SM4_CRYPT8_BLOCK_SIZE; + nbytes -= SM4_CRYPT8_BLOCK_SIZE; + } + while (nbytes >= SM4_BLOCK_SIZE) { + unsigned int nblocks = min(nbytes >> 4, 4u); + sm4_aesni_avx_crypt4(rkey, dst, src, nblocks); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +int sm4_avx_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_crypt(req, ctx->rkey_enc); +} +EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt); + +int sm4_avx_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_crypt(req, ctx->rkey_dec); +} +EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt); + +int sm4_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE); + sm4_crypt_block(ctx->rkey_enc, dst, dst); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_cbc_encrypt); + +int sm4_avx_cbc_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_dec, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + u8 iv[SM4_BLOCK_SIZE]; + unsigned int nblocks = min(nbytes >> 4, 8u); + int i; + + sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream, + src, nblocks); + + src += ((int)nblocks - 2) * SM4_BLOCK_SIZE; + dst += (nblocks - 1) * SM4_BLOCK_SIZE; + memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); + + for (i = nblocks - 1; i > 0; i--) { + crypto_xor_cpy(dst, src, + &keystream[i * SM4_BLOCK_SIZE], + SM4_BLOCK_SIZE); + src -= SM4_BLOCK_SIZE; + dst -= SM4_BLOCK_SIZE; + } + crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE); + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += (nblocks + 1) * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt); + +static int cbc_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_cbc_dec_blk8); +} + +int sm4_cfb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + sm4_crypt_block(ctx->rkey_enc, keystream, iv); + crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_cfb_encrypt); + +int sm4_avx_cfb_decrypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_enc, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + unsigned int nblocks = min(nbytes >> 4, 8u); + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + if (nblocks > 1) + memcpy(&keystream[SM4_BLOCK_SIZE], src, + (nblocks - 1) * SM4_BLOCK_SIZE); + memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE); + + sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream, + keystream, nblocks); + + crypto_xor_cpy(dst, src, keystream, + nblocks * SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt); + +static int cfb_decrypt(struct skcipher_request *req) +{ + return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_cfb_dec_blk8); +} + +int sm4_avx_ctr_crypt(struct skcipher_request *req, + unsigned int bsize, sm4_crypt_func func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + kernel_fpu_begin(); + + while (nbytes >= bsize) { + func(ctx->rkey_enc, dst, src, walk.iv); + dst += bsize; + src += bsize; + nbytes -= bsize; + } + + while (nbytes >= SM4_BLOCK_SIZE) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + unsigned int nblocks = min(nbytes >> 4, 8u); + int i; + + for (i = 0; i < nblocks; i++) { + memcpy(&keystream[i * SM4_BLOCK_SIZE], + walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + } + sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream, + keystream, nblocks); + + crypto_xor_cpy(dst, src, keystream, + nblocks * SM4_BLOCK_SIZE); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } + + kernel_fpu_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + + sm4_crypt_block(ctx->rkey_enc, keystream, keystream); + + crypto_xor_cpy(dst, src, keystream, nbytes); + dst += nbytes; + src += nbytes; + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt); + +static int ctr_crypt(struct skcipher_request *req) +{ + return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE, + sm4_aesni_avx_ctr_enc_blk8); +} + +static struct skcipher_alg sm4_aesni_avx_skciphers[] = { + { + .base = { + .cra_name = "__ecb(sm4)", + .cra_driver_name = "__ecb-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_avx_ecb_encrypt, + .decrypt = sm4_avx_ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(sm4)", + .cra_driver_name = "__cbc-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cfb(sm4)", + .cra_driver_name = "__cfb-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = cfb_decrypt, + }, { + .base = { + .cra_name = "__ctr(sm4)", + .cra_driver_name = "__ctr-sm4-aesni-avx", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .walksize = 8 * SM4_BLOCK_SIZE, + .setkey = sm4_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; + +static struct simd_skcipher_alg * +simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)]; + +static int __init sm4_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return simd_register_skciphers_compat(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers), + simd_sm4_aesni_avx_skciphers); +} + +static void __exit sm4_exit(void) +{ + simd_unregister_skciphers(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers), + simd_sm4_aesni_avx_skciphers); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-aesni-avx"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 64b772c5d1c9..536df4b6b825 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1547,6 +1547,7 @@ config CRYPTO_SERPENT_AVX2_X86_64 config CRYPTO_SM4 tristate "SM4 cipher algorithm" select CRYPTO_ALGAPI + select CRYPTO_LIB_SM4 help SM4 cipher algorithms (OSCCA GB/T 32907-2016). @@ -1569,6 +1570,49 @@ config CRYPTO_SM4 If unsure, say N. +config CRYPTO_SM4_AESNI_AVX_X86_64 + tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_LIB_SM4 + help + SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX). + + SM4 (GBT.32907-2016) is a cryptographic standard issued by the + Organization of State Commercial Administration of China (OSCCA) + as an authorized cryptographic algorithms for the use within China. + + This is SM4 optimized implementation using AES-NI/AVX/x86_64 + instruction set for block cipher. Through two affine transforms, + we can use the AES S-Box to simulate the SM4 S-Box to achieve the + effect of instruction acceleration. + + If unsure, say N. + +config CRYPTO_SM4_AESNI_AVX2_X86_64 + tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX2)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_LIB_SM4 + select CRYPTO_SM4_AESNI_AVX_X86_64 + help + SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2). + + SM4 (GBT.32907-2016) is a cryptographic standard issued by the + Organization of State Commercial Administration of China (OSCCA) + as an authorized cryptographic algorithms for the use within China. + + This is SM4 optimized implementation using AES-NI/AVX2/x86_64 + instruction set for block cipher. Through two affine transforms, + we can use the AES S-Box to simulate the SM4 S-Box to achieve the + effect of instruction acceleration. + + If unsure, say N. + config CRYPTO_TEA tristate "TEA, XTEA and XETA cipher algorithms" depends on CRYPTO_USER_API_ENABLE_OBSOLETE diff --git a/crypto/Makefile b/crypto/Makefile index 10526d4559b8..c633f15a0481 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -74,7 +74,6 @@ obj-$(CONFIG_CRYPTO_NULL2) += crypto_null.o obj-$(CONFIG_CRYPTO_MD4) += md4.o obj-$(CONFIG_CRYPTO_MD5) += md5.o obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o -obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o diff --git a/crypto/ecc.h b/crypto/ecc.h index a006132646a4..1350e8eb6ac2 100644 --- a/crypto/ecc.h +++ b/crypto/ecc.h @@ -27,6 +27,7 @@ #define _CRYPTO_ECC_H #include +#include /* One digit is u64 qword. */ #define ECC_CURVE_NIST_P192_DIGITS 3 @@ -46,13 +47,13 @@ * @out: Output array * @ndigits: Number of digits to copy */ -static inline void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits) +static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigits) { const __be64 *src = (__force __be64 *)in; int i; for (i = 0; i < ndigits; i++) - out[i] = be64_to_cpu(src[ndigits - 1 - i]); + out[i] = get_unaligned_be64(&src[ndigits - 1 - i]); } /** diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index c72d72ad828e..be70e76d6d86 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -143,9 +143,6 @@ sha512_transform(u64 *state, const u8 *input) state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; state[5] += f; state[6] += g; state[7] += h; - - /* erase our data */ - a = b = c = d = e = f = g = h = t1 = t2 = 0; } static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src, diff --git a/crypto/skcipher.c b/crypto/skcipher.c index a15376245416..418211180cee 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -431,7 +431,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk) static int skcipher_walk_first(struct skcipher_walk *walk) { - if (WARN_ON_ONCE(in_irq())) + if (WARN_ON_ONCE(in_hardirq())) return -EDEADLK; walk->buffer = NULL; diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c index 016dbc595705..4a6480a27fee 100644 --- a/crypto/sm4_generic.c +++ b/crypto/sm4_generic.c @@ -16,191 +16,43 @@ #include #include -static const u32 fk[4] = { - 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc -}; - -static const u8 sbox[256] = { - 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, - 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, - 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, - 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, - 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, - 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, - 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, - 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, - 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, - 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, - 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, - 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, - 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, - 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, - 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, - 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, - 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, - 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, - 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, - 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, - 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, - 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, - 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, - 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, - 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, - 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, - 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, - 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, - 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, - 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, - 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, - 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 -}; - -static const u32 ck[] = { - 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, - 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, - 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, - 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, - 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, - 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, - 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, - 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 -}; - -static u32 sm4_t_non_lin_sub(u32 x) -{ - int i; - u8 *b = (u8 *)&x; - - for (i = 0; i < 4; ++i) - b[i] = sbox[b[i]]; - - return x; -} - -static u32 sm4_key_lin_sub(u32 x) -{ - return x ^ rol32(x, 13) ^ rol32(x, 23); - -} - -static u32 sm4_enc_lin_sub(u32 x) -{ - return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24); -} - -static u32 sm4_key_sub(u32 x) -{ - return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); -} - -static u32 sm4_enc_sub(u32 x) -{ - return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); -} - -static u32 sm4_round(const u32 *x, const u32 rk) -{ - return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk); -} - - /** - * crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016 - * @ctx: The location where the computed key will be stored. - * @in_key: The supplied key. - * @key_len: The length of the supplied key. - * - * Returns 0 on success. The function fails only if an invalid key size (or - * pointer) is supplied. - */ -int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key, - unsigned int key_len) -{ - u32 rk[4], t; - const u32 *key = (u32 *)in_key; - int i; - - if (key_len != SM4_KEY_SIZE) - return -EINVAL; - - for (i = 0; i < 4; ++i) - rk[i] = get_unaligned_be32(&key[i]) ^ fk[i]; - - for (i = 0; i < 32; ++i) { - t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]); - ctx->rkey_enc[i] = t; - rk[0] = rk[1]; - rk[1] = rk[2]; - rk[2] = rk[3]; - rk[3] = t; - } - - for (i = 0; i < 32; ++i) - ctx->rkey_dec[i] = ctx->rkey_enc[31 - i]; - - return 0; -} -EXPORT_SYMBOL_GPL(crypto_sm4_expand_key); - -/** - * crypto_sm4_set_key - Set the SM4 key. + * sm4_setkey - Set the SM4 key. * @tfm: The %crypto_tfm that is used in the context. * @in_key: The input key. * @key_len: The size of the key. * - * This function uses crypto_sm4_expand_key() to expand the key. - * &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is + * This function uses sm4_expandkey() to expand the key. + * &sm4_ctx _must_ be the private data embedded in @tfm which is * retrieved with crypto_tfm_ctx(). * * Return: 0 on success; -EINVAL on failure (only happens for bad key lengths) */ -int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int sm4_setkey(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { - struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); - return crypto_sm4_expand_key(ctx, in_key, key_len); -} -EXPORT_SYMBOL_GPL(crypto_sm4_set_key); - -static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in) -{ - u32 x[4], i, t; - - for (i = 0; i < 4; ++i) - x[i] = get_unaligned_be32(&in[i]); - - for (i = 0; i < 32; ++i) { - t = sm4_round(x, rk[i]); - x[0] = x[1]; - x[1] = x[2]; - x[2] = x[3]; - x[3] = t; - } - - for (i = 0; i < 4; ++i) - put_unaligned_be32(x[3 - i], &out[i]); + return sm4_expandkey(ctx, in_key, key_len); } /* encrypt a block of text */ -void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); - sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in); + sm4_crypt_block(ctx->rkey_enc, out, in); } -EXPORT_SYMBOL_GPL(crypto_sm4_encrypt); /* decrypt a block of text */ -void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); - sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in); + sm4_crypt_block(ctx->rkey_dec, out, in); } -EXPORT_SYMBOL_GPL(crypto_sm4_decrypt); static struct crypto_alg sm4_alg = { .cra_name = "sm4", @@ -208,15 +60,15 @@ static struct crypto_alg sm4_alg = { .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = SM4_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_sm4_ctx), + .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = SM4_KEY_SIZE, .cia_max_keysize = SM4_KEY_SIZE, - .cia_setkey = crypto_sm4_set_key, - .cia_encrypt = crypto_sm4_encrypt, - .cia_decrypt = crypto_sm4_decrypt + .cia_setkey = sm4_setkey, + .cia_encrypt = sm4_encrypt, + .cia_decrypt = sm4_decrypt } } }; diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index f8d06da78e4f..82b0400985a5 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -77,7 +77,7 @@ static const char *check[] = { NULL }; -static const int block_sizes[] = { 16, 64, 256, 1024, 1420, 4096, 0 }; +static const int block_sizes[] = { 16, 64, 128, 256, 1024, 1420, 4096, 0 }; static const int aead_sizes[] = { 16, 64, 256, 512, 1024, 1420, 4096, 8192, 0 }; #define XBUFSIZE 8 @@ -290,6 +290,11 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, } ret = crypto_aead_setauthsize(tfm, authsize); + if (ret) { + pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo, + ret); + goto out_free_tfm; + } for (i = 0; i < num_mb; ++i) if (testmgr_alloc_buf(data[i].xbuf)) { @@ -315,7 +320,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, for (i = 0; i < num_mb; ++i) { data[i].req = aead_request_alloc(tfm, GFP_KERNEL); if (!data[i].req) { - pr_err("alg: skcipher: Failed to allocate request for %s\n", + pr_err("alg: aead: Failed to allocate request for %s\n", algo); while (i--) aead_request_free(data[i].req); @@ -567,13 +572,19 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, sgout = &sg[9]; tfm = crypto_alloc_aead(algo, 0, 0); - if (IS_ERR(tfm)) { pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo, PTR_ERR(tfm)); goto out_notfm; } + ret = crypto_aead_setauthsize(tfm, authsize); + if (ret) { + pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo, + ret); + goto out_noreq; + } + crypto_init_wait(&wait); printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo, get_driver_name(crypto_aead, tfm), e); @@ -611,8 +622,13 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, break; } } + ret = crypto_aead_setkey(tfm, key, *keysize); - ret = crypto_aead_setauthsize(tfm, authsize); + if (ret) { + pr_err("setkey() failed flags=%x: %d\n", + crypto_aead_get_flags(tfm), ret); + goto out; + } iv_len = crypto_aead_ivsize(tfm); if (iv_len) @@ -622,15 +638,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ", i, *keysize * 8, bs); - memset(tvmem[0], 0xff, PAGE_SIZE); - if (ret) { - pr_err("setkey() failed flags=%x\n", - crypto_aead_get_flags(tfm)); - goto out; - } - sg_init_aead(sg, xbuf, bs + (enc ? 0 : authsize), assoc, aad_size); @@ -1907,6 +1916,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("streebog512"); break; + case 55: + ret += tcrypt_test("gcm(sm4)"); + break; + + case 56: + ret += tcrypt_test("ccm(sm4)"); + break; + case 100: ret += tcrypt_test("hmac(md5)"); break; @@ -1998,6 +2015,15 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) case 157: ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))"); break; + + case 158: + ret += tcrypt_test("cbcmac(sm4)"); + break; + + case 159: + ret += tcrypt_test("cmac(sm4)"); + break; + case 181: ret += tcrypt_test("authenc(hmac(sha1),cbc(des))"); break; @@ -2031,6 +2057,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) case 191: ret += tcrypt_test("ecb(sm4)"); ret += tcrypt_test("cbc(sm4)"); + ret += tcrypt_test("cfb(sm4)"); ret += tcrypt_test("ctr(sm4)"); break; case 200: @@ -2289,6 +2316,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_16); test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0, speed_template_16); + test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0, speed_template_16); test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0, @@ -2322,6 +2353,34 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) NULL, 0, 16, 8, speed_template_16); break; + case 222: + test_aead_speed("gcm(sm4)", ENCRYPT, sec, + NULL, 0, 16, 8, speed_template_16); + test_aead_speed("gcm(sm4)", DECRYPT, sec, + NULL, 0, 16, 8, speed_template_16); + break; + + case 223: + test_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec, + NULL, 0, 16, 16, aead_speed_template_19); + test_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec, + NULL, 0, 16, 16, aead_speed_template_19); + break; + + case 224: + test_mb_aead_speed("gcm(sm4)", ENCRYPT, sec, NULL, 0, 16, 8, + speed_template_16, num_mb); + test_mb_aead_speed("gcm(sm4)", DECRYPT, sec, NULL, 0, 16, 8, + speed_template_16, num_mb); + break; + + case 225: + test_mb_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec, NULL, 0, + 16, 16, aead_speed_template_19, num_mb); + test_mb_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec, NULL, 0, + 16, 16, aead_speed_template_19, num_mb); + break; + case 300: if (alg) { test_hash_speed(alg, sec, generic_hash_speed_template); @@ -2757,6 +2816,25 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_8_32); break; + case 518: + test_acipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0, + speed_template_16); + break; + case 600: test_mb_skcipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, speed_template_16_24_32, num_mb); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index c978e41f11a1..70f69f0910c9 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4450,6 +4450,12 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(aes_cbcmac_tv_template) } + }, { + .alg = "cbcmac(sm4)", + .test = alg_test_hash, + .suite = { + .hash = __VECS(sm4_cbcmac_tv_template) + } }, { .alg = "ccm(aes)", .generic_driver = "ccm_base(ctr(aes-generic),cbcmac(aes-generic))", @@ -4461,6 +4467,16 @@ static const struct alg_test_desc alg_test_descs[] = { .einval_allowed = 1, } } + }, { + .alg = "ccm(sm4)", + .generic_driver = "ccm_base(ctr(sm4-generic),cbcmac(sm4-generic))", + .test = alg_test_aead, + .suite = { + .aead = { + ____VECS(sm4_ccm_tv_template), + .einval_allowed = 1, + } + } }, { .alg = "cfb(aes)", .test = alg_test_skcipher, @@ -4494,6 +4510,12 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(des3_ede_cmac64_tv_template) } + }, { + .alg = "cmac(sm4)", + .test = alg_test_hash, + .suite = { + .hash = __VECS(sm4_cmac128_tv_template) + } }, { .alg = "compress_null", .test = alg_test_null, @@ -4967,6 +4989,13 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .aead = __VECS(aes_gcm_tv_template) } + }, { + .alg = "gcm(sm4)", + .generic_driver = "gcm_base(ctr(sm4-generic),ghash-generic)", + .test = alg_test_aead, + .suite = { + .aead = __VECS(sm4_gcm_tv_template) + } }, { .alg = "ghash", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 3ed6ab34ab51..e6fca34b5b25 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -13328,6 +13328,154 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = { } }; +static const struct aead_testvec sm4_gcm_tv_template[] = { + { /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */ + .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF" + "\xFE\xDC\xBA\x98\x76\x54\x32\x10", + .klen = 16, + .iv = "\x00\x00\x12\x34\x56\x78\x00\x00" + "\x00\x00\xAB\xCD", + .ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA" + "\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB" + "\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC" + "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD" + "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE" + "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" + "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE" + "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA", + .plen = 64, + .assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF" + "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF" + "\xAB\xAD\xDA\xD2", + .alen = 20, + .ctext = "\x17\xF3\x99\xF0\x8C\x67\xD5\xEE" + "\x19\xD0\xDC\x99\x69\xC4\xBB\x7D" + "\x5F\xD4\x6F\xD3\x75\x64\x89\x06" + "\x91\x57\xB2\x82\xBB\x20\x07\x35" + "\xD8\x27\x10\xCA\x5C\x22\xF0\xCC" + "\xFA\x7C\xBF\x93\xD4\x96\xAC\x15" + "\xA5\x68\x34\xCB\xCF\x98\xC3\x97" + "\xB4\x02\x4A\x26\x91\x23\x3B\x8D" + "\x83\xDE\x35\x41\xE4\xC2\xB5\x81" + "\x77\xE0\x65\xA9\xBF\x7B\x62\xEC", + .clen = 80, + } +}; + +static const struct aead_testvec sm4_ccm_tv_template[] = { + { /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.2 */ + .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF" + "\xFE\xDC\xBA\x98\x76\x54\x32\x10", + .klen = 16, + .iv = "\x02\x00\x00\x12\x34\x56\x78\x00" + "\x00\x00\x00\xAB\xCD\x00\x00\x00", + .ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA" + "\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB" + "\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC" + "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD" + "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE" + "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" + "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE" + "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA", + .plen = 64, + .assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF" + "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF" + "\xAB\xAD\xDA\xD2", + .alen = 20, + .ctext = "\x48\xAF\x93\x50\x1F\xA6\x2A\xDB" + "\xCD\x41\x4C\xCE\x60\x34\xD8\x95" + "\xDD\xA1\xBF\x8F\x13\x2F\x04\x20" + "\x98\x66\x15\x72\xE7\x48\x30\x94" + "\xFD\x12\xE5\x18\xCE\x06\x2C\x98" + "\xAC\xEE\x28\xD9\x5D\xF4\x41\x6B" + "\xED\x31\xA2\xF0\x44\x76\xC1\x8B" + "\xB4\x0C\x84\xA7\x4B\x97\xDC\x5B" + "\x16\x84\x2D\x4F\xA1\x86\xF5\x6A" + "\xB3\x32\x56\x97\x1F\xA1\x10\xF4", + .clen = 80, + } +}; + +static const struct hash_testvec sm4_cbcmac_tv_template[] = { + { + .key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88" + "\x77\x66\x55\x44\x33\x22\x11\x00", + .plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xba\x98\x76\x54\x32\x10", + .digest = "\x97\xb4\x75\x8f\x84\x92\x3d\x3f" + "\x86\x81\x0e\x0e\xea\x14\x6d\x73", + .psize = 16, + .ksize = 16, + }, { + .key = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xBA\x98\x76\x54\x32\x10", + .plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb" + "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xee", + .digest = "\xc7\xdb\x17\x71\xa1\x5c\x0d\x22" + "\xa3\x39\x3a\x31\x88\x91\x49\xa1", + .psize = 33, + .ksize = 16, + }, { + .key = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xBA\x98\x76\x54\x32\x10", + .plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16" + "\xf9\xdd\xbe\x91\x73\x53\x37\x1a" + "\xfe\xdd\xba\x97\x7e\x53\x3c\x1c" + "\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11" + "\xf0\xd8\xbc\x96\x73\x5c\x34\x11" + "\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f" + "\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17" + "\xfd\xdb\xb1\x9b\x76\x5c\x37", + .digest = "\x9b\x07\x88\x7f\xd5\x95\x23\x12" + "\x64\x0a\x66\x7f\x4e\x25\xca\xd0", + .psize = 63, + .ksize = 16, + } +}; + +static const struct hash_testvec sm4_cmac128_tv_template[] = { + { + .key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88" + "\x77\x66\x55\x44\x33\x22\x11\x00", + .plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xba\x98\x76\x54\x32\x10", + .digest = "\x00\xd4\x63\xb4\x9a\xf3\x52\xe2" + "\x74\xa9\x00\x55\x13\x54\x2a\xd1", + .psize = 16, + .ksize = 16, + }, { + .key = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xBA\x98\x76\x54\x32\x10", + .plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb" + "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xee", + .digest = "\x8a\x8a\xe9\xc0\xc8\x97\x0e\x85" + "\x21\x57\x02\x10\x1a\xbf\x9c\xc6", + .psize = 33, + .ksize = 16, + }, { + .key = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xfe\xdc\xBA\x98\x76\x54\x32\x10", + .plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16" + "\xf9\xdd\xbe\x91\x73\x53\x37\x1a" + "\xfe\xdd\xba\x97\x7e\x53\x3c\x1c" + "\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11" + "\xf0\xd8\xbc\x96\x73\x5c\x34\x11" + "\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f" + "\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17" + "\xfd\xdb\xb1\x9b\x76\x5c\x37", + .digest = "\x5f\x14\xc9\xa9\x20\xb2\xb4\xf0" + "\x76\xe0\xd8\xd6\xdc\x4f\xe1\xbc", + .psize = 63, + .ksize = 16, + } +}; + /* Cast6 test vectors from RFC 2612 */ static const struct cipher_testvec cast6_tv_template[] = { { diff --git a/crypto/wp512.c b/crypto/wp512.c index bf79fbb2340f..5e820afa3c78 100644 --- a/crypto/wp512.c +++ b/crypto/wp512.c @@ -775,7 +775,7 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = { 0xca2dbf07ad5a8333ULL, }; -/** +/* * The core Whirlpool transform. */ diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 3f166c8a4099..239eca4d6805 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -524,6 +524,20 @@ config HW_RANDOM_XIPHERA To compile this driver as a module, choose M here: the module will be called xiphera-trng. +config HW_RANDOM_ARM_SMCCC_TRNG + tristate "Arm SMCCC TRNG firmware interface support" + depends on HAVE_ARM_SMCCC_DISCOVERY + default HW_RANDOM + help + Say 'Y' to enable the True Random Number Generator driver using + the Arm SMCCC TRNG firmware interface. This reads entropy from + higher exception levels (firmware, hypervisor). Uses SMCCC for + communicating with the firmware: + https://developer.arm.com/documentation/den0098/latest/ + + To compile this driver as a module, choose M here: the + module will be called arm_smccc_trng. + endif # HW_RANDOM config UML_RANDOM diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index 8933fada74f2..a5a1c765a394 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile @@ -45,3 +45,4 @@ obj-$(CONFIG_HW_RANDOM_OPTEE) += optee-rng.o obj-$(CONFIG_HW_RANDOM_NPCM) += npcm-rng.o obj-$(CONFIG_HW_RANDOM_CCTRNG) += cctrng.o obj-$(CONFIG_HW_RANDOM_XIPHERA) += xiphera-trng.o +obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c index d8d4ef5214a1..c22d4184bb61 100644 --- a/drivers/char/hw_random/amd-rng.c +++ b/drivers/char/hw_random/amd-rng.c @@ -124,7 +124,7 @@ static struct hwrng amd_rng = { .read = amd_rng_read, }; -static int __init mod_init(void) +static int __init amd_rng_mod_init(void) { int err; struct pci_dev *pdev = NULL; @@ -188,7 +188,7 @@ static int __init mod_init(void) return err; } -static void __exit mod_exit(void) +static void __exit amd_rng_mod_exit(void) { struct amd768_priv *priv; @@ -203,8 +203,8 @@ static void __exit mod_exit(void) kfree(priv); } -module_init(mod_init); -module_exit(mod_exit); +module_init(amd_rng_mod_init); +module_exit(amd_rng_mod_exit); MODULE_AUTHOR("The Linux Kernel team"); MODULE_DESCRIPTION("H/W RNG driver for AMD chipsets"); diff --git a/drivers/char/hw_random/arm_smccc_trng.c b/drivers/char/hw_random/arm_smccc_trng.c new file mode 100644 index 000000000000..b24ac39a903b --- /dev/null +++ b/drivers/char/hw_random/arm_smccc_trng.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Randomness driver for the ARM SMCCC TRNG Firmware Interface + * https://developer.arm.com/documentation/den0098/latest/ + * + * Copyright (C) 2020 Arm Ltd. + * + * The ARM TRNG firmware interface specifies a protocol to read entropy + * from a higher exception level, to abstract from any machine specific + * implemenations and allow easier use in hypervisors. + * + * The firmware interface is realised using the SMCCC specification. + */ + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_ARM64 +#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND64 +#define MAX_BITS_PER_CALL (3 * 64UL) +#else +#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND32 +#define MAX_BITS_PER_CALL (3 * 32UL) +#endif + +/* We don't want to allow the firmware to stall us forever. */ +#define SMCCC_TRNG_MAX_TRIES 20 + +#define SMCCC_RET_TRNG_INVALID_PARAMETER -2 +#define SMCCC_RET_TRNG_NO_ENTROPY -3 + +static int copy_from_registers(char *buf, struct arm_smccc_res *res, + size_t bytes) +{ + unsigned int chunk, copied; + + if (bytes == 0) + return 0; + + chunk = min(bytes, sizeof(long)); + memcpy(buf, &res->a3, chunk); + copied = chunk; + if (copied >= bytes) + return copied; + + chunk = min((bytes - copied), sizeof(long)); + memcpy(&buf[copied], &res->a2, chunk); + copied += chunk; + if (copied >= bytes) + return copied; + + chunk = min((bytes - copied), sizeof(long)); + memcpy(&buf[copied], &res->a1, chunk); + + return copied + chunk; +} + +static int smccc_trng_read(struct hwrng *rng, void *data, size_t max, bool wait) +{ + struct arm_smccc_res res; + u8 *buf = data; + unsigned int copied = 0; + int tries = 0; + + while (copied < max) { + size_t bits = min_t(size_t, (max - copied) * BITS_PER_BYTE, + MAX_BITS_PER_CALL); + + arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND, bits, &res); + if ((int)res.a0 < 0) + return (int)res.a0; + + switch ((int)res.a0) { + case SMCCC_RET_SUCCESS: + copied += copy_from_registers(buf + copied, &res, + bits / BITS_PER_BYTE); + tries = 0; + break; + case SMCCC_RET_TRNG_NO_ENTROPY: + if (!wait) + return copied; + tries++; + if (tries >= SMCCC_TRNG_MAX_TRIES) + return copied; + cond_resched(); + break; + } + } + + return copied; +} + +static int smccc_trng_probe(struct platform_device *pdev) +{ + struct hwrng *trng; + + trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL); + if (!trng) + return -ENOMEM; + + trng->name = "smccc_trng"; + trng->read = smccc_trng_read; + + platform_set_drvdata(pdev, trng); + + return devm_hwrng_register(&pdev->dev, trng); +} + +static struct platform_driver smccc_trng_driver = { + .driver = { + .name = "smccc_trng", + }, + .probe = smccc_trng_probe, +}; +module_platform_driver(smccc_trng_driver); + +MODULE_ALIAS("platform:smccc_trng"); +MODULE_AUTHOR("Andre Przywara"); +MODULE_LICENSE("GPL"); diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c index e1d421a36a13..138ce434f86b 100644 --- a/drivers/char/hw_random/geode-rng.c +++ b/drivers/char/hw_random/geode-rng.c @@ -83,7 +83,7 @@ static struct hwrng geode_rng = { }; -static int __init mod_init(void) +static int __init geode_rng_init(void) { int err = -ENODEV; struct pci_dev *pdev = NULL; @@ -124,7 +124,7 @@ static int __init mod_init(void) goto out; } -static void __exit mod_exit(void) +static void __exit geode_rng_exit(void) { void __iomem *mem = (void __iomem *)geode_rng.priv; @@ -132,8 +132,8 @@ static void __exit mod_exit(void) iounmap(mem); } -module_init(mod_init); -module_exit(mod_exit); +module_init(geode_rng_init); +module_exit(geode_rng_exit); MODULE_DESCRIPTION("H/W RNG driver for AMD Geode LX CPUs"); MODULE_LICENSE("GPL"); diff --git a/drivers/char/hw_random/intel-rng.c b/drivers/char/hw_random/intel-rng.c index d740b8814bf3..7b171cb3b825 100644 --- a/drivers/char/hw_random/intel-rng.c +++ b/drivers/char/hw_random/intel-rng.c @@ -325,7 +325,7 @@ PFX "RNG, try using the 'no_fwh_detect' option.\n"; } -static int __init mod_init(void) +static int __init intel_rng_mod_init(void) { int err = -ENODEV; int i; @@ -403,7 +403,7 @@ static int __init mod_init(void) } -static void __exit mod_exit(void) +static void __exit intel_rng_mod_exit(void) { void __iomem *mem = (void __iomem *)intel_rng.priv; @@ -411,8 +411,8 @@ static void __exit mod_exit(void) iounmap(mem); } -module_init(mod_init); -module_exit(mod_exit); +module_init(intel_rng_mod_init); +module_exit(intel_rng_mod_exit); MODULE_DESCRIPTION("H/W RNG driver for Intel chipsets"); MODULE_LICENSE("GPL"); diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index 39943bc3651a..7444cc146e86 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -192,7 +192,7 @@ static struct hwrng via_rng = { }; -static int __init mod_init(void) +static int __init via_rng_mod_init(void) { int err; @@ -209,13 +209,13 @@ static int __init mod_init(void) out: return err; } -module_init(mod_init); +module_init(via_rng_mod_init); -static void __exit mod_exit(void) +static void __exit via_rng_mod_exit(void) { hwrng_unregister(&via_rng); } -module_exit(mod_exit); +module_exit(via_rng_mod_exit); static struct x86_cpu_id __maybe_unused via_rng_cpu_id[] = { X86_MATCH_FEATURE(X86_FEATURE_XSTORE, NULL), diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c index cd1baee424a1..b3a9bbfb8831 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c @@ -26,8 +26,7 @@ void sun8i_ce_prng_exit(struct crypto_tfm *tfm) { struct sun8i_ce_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - memzero_explicit(ctx->seed, ctx->slen); - kfree(ctx->seed); + kfree_sensitive(ctx->seed); ctx->seed = NULL; ctx->slen = 0; } @@ -38,8 +37,7 @@ int sun8i_ce_prng_seed(struct crypto_rng *tfm, const u8 *seed, struct sun8i_ce_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm); if (ctx->seed && ctx->slen != slen) { - memzero_explicit(ctx->seed, ctx->slen); - kfree(ctx->seed); + kfree_sensitive(ctx->seed); ctx->slen = 0; ctx->seed = NULL; } @@ -157,9 +155,8 @@ int sun8i_ce_prng_generate(struct crypto_rng *tfm, const u8 *src, memcpy(dst, d, dlen); memcpy(ctx->seed, d + dlen, ctx->slen); } - memzero_explicit(d, todo); err_iv: - kfree(d); + kfree_sensitive(d); err_mem: return err; } diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c index 5b7af4498bd5..19cd2e52f89d 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c @@ -95,9 +95,8 @@ static int sun8i_ce_trng_read(struct hwrng *rng, void *data, size_t max, bool wa memcpy(data, d, max); err = max; } - memzero_explicit(d, todo); err_dst: - kfree(d); + kfree_sensitive(d); return err; } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c index 3191527928e4..246a6782674c 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c @@ -20,8 +20,7 @@ int sun8i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed, struct sun8i_ss_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm); if (ctx->seed && ctx->slen != slen) { - memzero_explicit(ctx->seed, ctx->slen); - kfree(ctx->seed); + kfree_sensitive(ctx->seed); ctx->slen = 0; ctx->seed = NULL; } @@ -48,8 +47,7 @@ void sun8i_ss_prng_exit(struct crypto_tfm *tfm) { struct sun8i_ss_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - memzero_explicit(ctx->seed, ctx->slen); - kfree(ctx->seed); + kfree_sensitive(ctx->seed); ctx->seed = NULL; ctx->slen = 0; } @@ -167,9 +165,8 @@ int sun8i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, /* Update seed */ memcpy(ctx->seed, d + dlen, ctx->slen); } - memzero_explicit(d, todo); err_free: - kfree(d); + kfree_sensitive(d); return err; } diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index b1d286004295..9391ccc03382 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -143,6 +143,7 @@ struct atmel_aes_xts_ctx { struct atmel_aes_base_ctx base; u32 key2[AES_KEYSIZE_256 / sizeof(u32)]; + struct crypto_skcipher *fallback_tfm; }; #if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC) @@ -155,6 +156,7 @@ struct atmel_aes_authenc_ctx { struct atmel_aes_reqctx { unsigned long mode; u8 lastc[AES_BLOCK_SIZE]; + struct skcipher_request fallback_req; }; #if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC) @@ -418,24 +420,15 @@ static inline size_t atmel_aes_padlen(size_t len, size_t block_size) return len ? block_size - len : 0; } -static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_base_ctx *ctx) +static struct atmel_aes_dev *atmel_aes_dev_alloc(struct atmel_aes_base_ctx *ctx) { - struct atmel_aes_dev *aes_dd = NULL; - struct atmel_aes_dev *tmp; + struct atmel_aes_dev *aes_dd; spin_lock_bh(&atmel_aes.lock); - if (!ctx->dd) { - list_for_each_entry(tmp, &atmel_aes.dev_list, list) { - aes_dd = tmp; - break; - } - ctx->dd = aes_dd; - } else { - aes_dd = ctx->dd; - } - + /* One AES IP per SoC. */ + aes_dd = list_first_entry_or_null(&atmel_aes.dev_list, + struct atmel_aes_dev, list); spin_unlock_bh(&atmel_aes.lock); - return aes_dd; } @@ -967,7 +960,6 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd, ctx = crypto_tfm_ctx(areq->tfm); dd->areq = areq; - dd->ctx = ctx; start_async = (areq != new_areq); dd->is_async = start_async; @@ -1083,12 +1075,48 @@ static int atmel_aes_ctr_start(struct atmel_aes_dev *dd) return atmel_aes_ctr_transfer(dd); } +static int atmel_aes_xts_fallback(struct skcipher_request *req, bool enc) +{ + struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req); + struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx( + crypto_skcipher_reqtfm(req)); + + skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + skcipher_request_set_callback(&rctx->fallback_req, req->base.flags, + req->base.complete, req->base.data); + skcipher_request_set_crypt(&rctx->fallback_req, req->src, req->dst, + req->cryptlen, req->iv); + + return enc ? crypto_skcipher_encrypt(&rctx->fallback_req) : + crypto_skcipher_decrypt(&rctx->fallback_req); +} + static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct atmel_aes_base_ctx *ctx = crypto_skcipher_ctx(skcipher); struct atmel_aes_reqctx *rctx; - struct atmel_aes_dev *dd; + u32 opmode = mode & AES_FLAGS_OPMODE_MASK; + + if (opmode == AES_FLAGS_XTS) { + if (req->cryptlen < XTS_BLOCK_SIZE) + return -EINVAL; + + if (!IS_ALIGNED(req->cryptlen, XTS_BLOCK_SIZE)) + return atmel_aes_xts_fallback(req, + mode & AES_FLAGS_ENCRYPT); + } + + /* + * ECB, CBC, CFB, OFB or CTR mode require the plaintext and ciphertext + * to have a positve integer length. + */ + if (!req->cryptlen && opmode != AES_FLAGS_XTS) + return 0; + + if ((opmode == AES_FLAGS_ECB || opmode == AES_FLAGS_CBC) && + !IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(skcipher))) + return -EINVAL; switch (mode & AES_FLAGS_OPMODE_MASK) { case AES_FLAGS_CFB8: @@ -1113,14 +1141,10 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) } ctx->is_aead = false; - dd = atmel_aes_find_dev(ctx); - if (!dd) - return -ENODEV; - rctx = skcipher_request_ctx(req); rctx->mode = mode; - if ((mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_ECB && + if (opmode != AES_FLAGS_ECB && !(mode & AES_FLAGS_ENCRYPT) && req->src == req->dst) { unsigned int ivsize = crypto_skcipher_ivsize(skcipher); @@ -1130,7 +1154,7 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) ivsize, 0); } - return atmel_aes_handle_queue(dd, &req->base); + return atmel_aes_handle_queue(ctx->dd, &req->base); } static int atmel_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, @@ -1242,8 +1266,15 @@ static int atmel_aes_ctr_decrypt(struct skcipher_request *req) static int atmel_aes_init_tfm(struct crypto_skcipher *tfm) { struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct atmel_aes_dev *dd; + + dd = atmel_aes_dev_alloc(&ctx->base); + if (!dd) + return -ENODEV; crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx)); + ctx->base.dd = dd; + ctx->base.dd->ctx = &ctx->base; ctx->base.start = atmel_aes_start; return 0; @@ -1252,8 +1283,15 @@ static int atmel_aes_init_tfm(struct crypto_skcipher *tfm) static int atmel_aes_ctr_init_tfm(struct crypto_skcipher *tfm) { struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct atmel_aes_dev *dd; + + dd = atmel_aes_dev_alloc(&ctx->base); + if (!dd) + return -ENODEV; crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx)); + ctx->base.dd = dd; + ctx->base.dd->ctx = &ctx->base; ctx->base.start = atmel_aes_ctr_start; return 0; @@ -1290,7 +1328,7 @@ static struct skcipher_alg aes_algs[] = { { .base.cra_name = "ofb(aes)", .base.cra_driver_name = "atmel-ofb-aes", - .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct atmel_aes_ctx), .init = atmel_aes_init_tfm, @@ -1691,20 +1729,15 @@ static int atmel_aes_gcm_crypt(struct aead_request *req, { struct atmel_aes_base_ctx *ctx; struct atmel_aes_reqctx *rctx; - struct atmel_aes_dev *dd; ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); ctx->block_size = AES_BLOCK_SIZE; ctx->is_aead = true; - dd = atmel_aes_find_dev(ctx); - if (!dd) - return -ENODEV; - rctx = aead_request_ctx(req); rctx->mode = AES_FLAGS_GCM | mode; - return atmel_aes_handle_queue(dd, &req->base); + return atmel_aes_handle_queue(ctx->dd, &req->base); } static int atmel_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key, @@ -1742,8 +1775,15 @@ static int atmel_aes_gcm_decrypt(struct aead_request *req) static int atmel_aes_gcm_init(struct crypto_aead *tfm) { struct atmel_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm); + struct atmel_aes_dev *dd; + + dd = atmel_aes_dev_alloc(&ctx->base); + if (!dd) + return -ENODEV; crypto_aead_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx)); + ctx->base.dd = dd; + ctx->base.dd->ctx = &ctx->base; ctx->base.start = atmel_aes_gcm_start; return 0; @@ -1819,12 +1859,8 @@ static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd) * the order of the ciphered tweak bytes need to be reversed before * writing them into the ODATARx registers. */ - for (i = 0; i < AES_BLOCK_SIZE/2; ++i) { - u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i]; - - tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i]; - tweak_bytes[i] = tmp; - } + for (i = 0; i < AES_BLOCK_SIZE/2; ++i) + swap(tweak_bytes[i], tweak_bytes[AES_BLOCK_SIZE - 1 - i]); /* Process the data. */ atmel_aes_write_ctrl(dd, use_dma, NULL); @@ -1849,6 +1885,13 @@ static int atmel_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, if (err) return err; + crypto_skcipher_clear_flags(ctx->fallback_tfm, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->fallback_tfm, tfm->base.crt_flags & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); + if (err) + return err; + memcpy(ctx->base.key, key, keylen/2); memcpy(ctx->key2, key + keylen/2, keylen/2); ctx->base.keylen = keylen/2; @@ -1869,18 +1912,40 @@ static int atmel_aes_xts_decrypt(struct skcipher_request *req) static int atmel_aes_xts_init_tfm(struct crypto_skcipher *tfm) { struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct atmel_aes_dev *dd; + const char *tfm_name = crypto_tfm_alg_name(&tfm->base); - crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx)); + dd = atmel_aes_dev_alloc(&ctx->base); + if (!dd) + return -ENODEV; + + ctx->fallback_tfm = crypto_alloc_skcipher(tfm_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fallback_tfm)) + return PTR_ERR(ctx->fallback_tfm); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx) + + crypto_skcipher_reqsize(ctx->fallback_tfm)); + ctx->base.dd = dd; + ctx->base.dd->ctx = &ctx->base; ctx->base.start = atmel_aes_xts_start; return 0; } +static void atmel_aes_xts_exit_tfm(struct crypto_skcipher *tfm) +{ + struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->fallback_tfm); +} + static struct skcipher_alg aes_xts_alg = { .base.cra_name = "xts(aes)", .base.cra_driver_name = "atmel-xts-aes", .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct atmel_aes_xts_ctx), + .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, .min_keysize = 2 * AES_MIN_KEY_SIZE, .max_keysize = 2 * AES_MAX_KEY_SIZE, @@ -1889,6 +1954,7 @@ static struct skcipher_alg aes_xts_alg = { .encrypt = atmel_aes_xts_encrypt, .decrypt = atmel_aes_xts_decrypt, .init = atmel_aes_xts_init_tfm, + .exit = atmel_aes_xts_exit_tfm, }; #if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC) @@ -2075,6 +2141,11 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm, { struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm); unsigned int auth_reqsize = atmel_sha_authenc_get_reqsize(); + struct atmel_aes_dev *dd; + + dd = atmel_aes_dev_alloc(&ctx->base); + if (!dd) + return -ENODEV; ctx->auth = atmel_sha_authenc_spawn(auth_mode); if (IS_ERR(ctx->auth)) @@ -2082,6 +2153,8 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm, crypto_aead_set_reqsize(tfm, (sizeof(struct atmel_aes_authenc_reqctx) + auth_reqsize)); + ctx->base.dd = dd; + ctx->base.dd->ctx = &ctx->base; ctx->base.start = atmel_aes_authenc_start; return 0; @@ -2127,7 +2200,6 @@ static int atmel_aes_authenc_crypt(struct aead_request *req, struct atmel_aes_base_ctx *ctx = crypto_aead_ctx(tfm); u32 authsize = crypto_aead_authsize(tfm); bool enc = (mode & AES_FLAGS_ENCRYPT); - struct atmel_aes_dev *dd; /* Compute text length. */ if (!enc && req->cryptlen < authsize) @@ -2146,11 +2218,7 @@ static int atmel_aes_authenc_crypt(struct aead_request *req, ctx->block_size = AES_BLOCK_SIZE; ctx->is_aead = true; - dd = atmel_aes_find_dev(ctx); - if (!dd) - return -ENODEV; - - return atmel_aes_handle_queue(dd, &req->base); + return atmel_aes_handle_queue(ctx->dd, &req->base); } static int atmel_aes_authenc_cbc_aes_encrypt(struct aead_request *req) @@ -2358,7 +2426,7 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd) static void atmel_aes_crypto_alg_init(struct crypto_alg *alg) { - alg->cra_flags = CRYPTO_ALG_ASYNC; + alg->cra_flags |= CRYPTO_ALG_ASYNC; alg->cra_alignmask = 0xf; alg->cra_priority = ATMEL_AES_PRIORITY; alg->cra_module = THIS_MODULE; diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index 6f01c51e3c37..e30786ec9f2d 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -196,23 +196,15 @@ static void atmel_tdes_write_n(struct atmel_tdes_dev *dd, u32 offset, atmel_tdes_write(dd, offset, *value); } -static struct atmel_tdes_dev *atmel_tdes_find_dev(struct atmel_tdes_ctx *ctx) +static struct atmel_tdes_dev *atmel_tdes_dev_alloc(void) { - struct atmel_tdes_dev *tdes_dd = NULL; - struct atmel_tdes_dev *tmp; + struct atmel_tdes_dev *tdes_dd; spin_lock_bh(&atmel_tdes.lock); - if (!ctx->dd) { - list_for_each_entry(tmp, &atmel_tdes.dev_list, list) { - tdes_dd = tmp; - break; - } - ctx->dd = tdes_dd; - } else { - tdes_dd = ctx->dd; - } + /* One TDES IP per SoC. */ + tdes_dd = list_first_entry_or_null(&atmel_tdes.dev_list, + struct atmel_tdes_dev, list); spin_unlock_bh(&atmel_tdes.lock); - return tdes_dd; } @@ -320,7 +312,7 @@ static int atmel_tdes_crypt_pdc_stop(struct atmel_tdes_dev *dd) dd->buf_out, dd->buflen, dd->dma_size, 1); if (count != dd->dma_size) { err = -EINVAL; - pr_err("not all data converted: %zu\n", count); + dev_dbg(dd->dev, "not all data converted: %zu\n", count); } } @@ -337,24 +329,24 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd) dd->buflen &= ~(DES_BLOCK_SIZE - 1); if (!dd->buf_in || !dd->buf_out) { - dev_err(dd->dev, "unable to alloc pages.\n"); + dev_dbg(dd->dev, "unable to alloc pages.\n"); goto err_alloc; } /* MAP here */ dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in, dd->buflen, DMA_TO_DEVICE); - if (dma_mapping_error(dd->dev, dd->dma_addr_in)) { - dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen); - err = -EINVAL; + err = dma_mapping_error(dd->dev, dd->dma_addr_in); + if (err) { + dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen); goto err_map_in; } dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out, dd->buflen, DMA_FROM_DEVICE); - if (dma_mapping_error(dd->dev, dd->dma_addr_out)) { - dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen); - err = -EINVAL; + err = dma_mapping_error(dd->dev, dd->dma_addr_out); + if (err) { + dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen); goto err_map_out; } @@ -367,8 +359,6 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd) err_alloc: free_page((unsigned long)dd->buf_out); free_page((unsigned long)dd->buf_in); - if (err) - pr_err("error: %d\n", err); return err; } @@ -520,14 +510,14 @@ static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd) err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); if (!err) { - dev_err(dd->dev, "dma_map_sg() error\n"); + dev_dbg(dd->dev, "dma_map_sg() error\n"); return -EINVAL; } err = dma_map_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); if (!err) { - dev_err(dd->dev, "dma_map_sg() error\n"); + dev_dbg(dd->dev, "dma_map_sg() error\n"); dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); return -EINVAL; @@ -646,7 +636,6 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd, rctx->mode &= TDES_FLAGS_MODE_MASK; dd->flags = (dd->flags & ~TDES_FLAGS_MODE_MASK) | rctx->mode; dd->ctx = ctx; - ctx->dd = dd; err = atmel_tdes_write_ctrl(dd); if (!err) @@ -679,7 +668,7 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd) dd->buf_out, dd->buflen, dd->dma_size, 1); if (count != dd->dma_size) { err = -EINVAL; - pr_err("not all data converted: %zu\n", count); + dev_dbg(dd->dev, "not all data converted: %zu\n", count); } } } @@ -691,11 +680,15 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode) struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(skcipher); struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req); + struct device *dev = ctx->dd->dev; + + if (!req->cryptlen) + return 0; switch (mode & TDES_FLAGS_OPMODE_MASK) { case TDES_FLAGS_CFB8: if (!IS_ALIGNED(req->cryptlen, CFB8_BLOCK_SIZE)) { - pr_err("request size is not exact amount of CFB8 blocks\n"); + dev_dbg(dev, "request size is not exact amount of CFB8 blocks\n"); return -EINVAL; } ctx->block_size = CFB8_BLOCK_SIZE; @@ -703,7 +696,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode) case TDES_FLAGS_CFB16: if (!IS_ALIGNED(req->cryptlen, CFB16_BLOCK_SIZE)) { - pr_err("request size is not exact amount of CFB16 blocks\n"); + dev_dbg(dev, "request size is not exact amount of CFB16 blocks\n"); return -EINVAL; } ctx->block_size = CFB16_BLOCK_SIZE; @@ -711,7 +704,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode) case TDES_FLAGS_CFB32: if (!IS_ALIGNED(req->cryptlen, CFB32_BLOCK_SIZE)) { - pr_err("request size is not exact amount of CFB32 blocks\n"); + dev_dbg(dev, "request size is not exact amount of CFB32 blocks\n"); return -EINVAL; } ctx->block_size = CFB32_BLOCK_SIZE; @@ -719,7 +712,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode) default: if (!IS_ALIGNED(req->cryptlen, DES_BLOCK_SIZE)) { - pr_err("request size is not exact amount of DES blocks\n"); + dev_dbg(dev, "request size is not exact amount of DES blocks\n"); return -EINVAL; } ctx->block_size = DES_BLOCK_SIZE; @@ -897,14 +890,13 @@ static int atmel_tdes_ofb_decrypt(struct skcipher_request *req) static int atmel_tdes_init_tfm(struct crypto_skcipher *tfm) { struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct atmel_tdes_dev *dd; + + ctx->dd = atmel_tdes_dev_alloc(); + if (!ctx->dd) + return -ENODEV; crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_tdes_reqctx)); - dd = atmel_tdes_find_dev(ctx); - if (!dd) - return -ENODEV; - return 0; } @@ -999,7 +991,7 @@ static struct skcipher_alg tdes_algs[] = { { .base.cra_name = "ofb(des)", .base.cra_driver_name = "atmel-ofb-des", - .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_blocksize = 1, .base.cra_alignmask = 0x7, .min_keysize = DES_KEY_SIZE, diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 91808402e0bf..2ecb0e1f65d8 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -300,6 +300,9 @@ static int __sev_platform_shutdown_locked(int *error) struct sev_device *sev = psp_master->sev_data; int ret; + if (sev->state == SEV_STATE_UNINIT) + return 0; + ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); if (ret) return ret; @@ -1019,6 +1022,20 @@ int sev_dev_init(struct psp_device *psp) return ret; } +static void sev_firmware_shutdown(struct sev_device *sev) +{ + sev_platform_shutdown(NULL); + + if (sev_es_tmr) { + /* The TMR area was encrypted, flush it from the cache */ + wbinvd_on_all_cpus(); + + free_pages((unsigned long)sev_es_tmr, + get_order(SEV_ES_TMR_SIZE)); + sev_es_tmr = NULL; + } +} + void sev_dev_destroy(struct psp_device *psp) { struct sev_device *sev = psp->sev_data; @@ -1026,6 +1043,8 @@ void sev_dev_destroy(struct psp_device *psp) if (!sev) return; + sev_firmware_shutdown(sev); + if (sev->misc) kref_put(&misc_dev->refcount, sev_exit); @@ -1056,21 +1075,6 @@ void sev_pci_init(void) if (sev_get_api_version()) goto err; - /* - * If platform is not in UNINIT state then firmware upgrade and/or - * platform INIT command will fail. These command require UNINIT state. - * - * In a normal boot we should never run into case where the firmware - * is not in UNINIT state on boot. But in case of kexec boot, a reboot - * may not go through a typical shutdown sequence and may leave the - * firmware in INIT or WORKING state. - */ - - if (sev->state != SEV_STATE_UNINIT) { - sev_platform_shutdown(NULL); - sev->state = SEV_STATE_UNINIT; - } - if (sev_version_greater_or_equal(0, 15) && sev_update_firmware(sev->dev) == 0) sev_get_api_version(); @@ -1115,17 +1119,10 @@ void sev_pci_init(void) void sev_pci_exit(void) { - if (!psp_master->sev_data) + struct sev_device *sev = psp_master->sev_data; + + if (!sev) return; - sev_platform_shutdown(NULL); - - if (sev_es_tmr) { - /* The TMR area was encrypted, flush it from the cache */ - wbinvd_on_all_cpus(); - - free_pages((unsigned long)sev_es_tmr, - get_order(SEV_ES_TMR_SIZE)); - sev_es_tmr = NULL; - } + sev_firmware_shutdown(sev); } diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index 6fb6ba35f89d..88c672ad27e4 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -241,6 +241,17 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; } +static void sp_pci_shutdown(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + if (!sp) + return; + + sp_destroy(sp); +} + static void sp_pci_remove(struct pci_dev *pdev) { struct device *dev = &pdev->dev; @@ -349,6 +360,12 @@ static const struct sp_dev_vdata dev_vdata[] = { #endif #ifdef CONFIG_CRYPTO_DEV_SP_PSP .psp_vdata = &pspv3, +#endif + }, + { /* 5 */ + .bar = 2, +#ifdef CONFIG_CRYPTO_DEV_SP_PSP + .psp_vdata = &pspv2, #endif }, }; @@ -359,6 +376,7 @@ static const struct pci_device_id sp_pci_table[] = { { PCI_VDEVICE(AMD, 0x1486), (kernel_ulong_t)&dev_vdata[3] }, { PCI_VDEVICE(AMD, 0x15DF), (kernel_ulong_t)&dev_vdata[4] }, { PCI_VDEVICE(AMD, 0x1649), (kernel_ulong_t)&dev_vdata[4] }, + { PCI_VDEVICE(AMD, 0x14CA), (kernel_ulong_t)&dev_vdata[5] }, /* Last entry must be zero */ { 0, } }; @@ -371,6 +389,7 @@ static struct pci_driver sp_pci_driver = { .id_table = sp_pci_table, .probe = sp_pci_probe, .remove = sp_pci_remove, + .shutdown = sp_pci_shutdown, .driver.pm = &sp_pci_pm_ops, }; diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 8b0640fb04be..65a641396c07 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include "hpre.h" @@ -81,6 +82,16 @@ #define HPRE_PREFETCH_DISABLE BIT(30) #define HPRE_SVA_DISABLE_READY (BIT(4) | BIT(8)) +/* clock gate */ +#define HPRE_CLKGATE_CTL 0x301a10 +#define HPRE_PEH_CFG_AUTO_GATE 0x301a2c +#define HPRE_CLUSTER_DYN_CTL 0x302010 +#define HPRE_CORE_SHB_CFG 0x302088 +#define HPRE_CLKGATE_CTL_EN BIT(0) +#define HPRE_PEH_CFG_AUTO_GATE_EN BIT(0) +#define HPRE_CLUSTER_DYN_CTL_EN BIT(0) +#define HPRE_CORE_GATE_EN (BIT(30) | BIT(31)) + #define HPRE_AM_OOO_SHUTDOWN_ENB 0x301044 #define HPRE_AM_OOO_SHUTDOWN_ENABLE BIT(0) #define HPRE_WR_MSI_PORT BIT(2) @@ -417,12 +428,63 @@ static void hpre_close_sva_prefetch(struct hisi_qm *qm) pci_err(qm->pdev, "failed to close sva prefetch\n"); } +static void hpre_enable_clock_gate(struct hisi_qm *qm) +{ + u32 val; + + if (qm->ver < QM_HW_V3) + return; + + val = readl(qm->io_base + HPRE_CLKGATE_CTL); + val |= HPRE_CLKGATE_CTL_EN; + writel(val, qm->io_base + HPRE_CLKGATE_CTL); + + val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE); + val |= HPRE_PEH_CFG_AUTO_GATE_EN; + writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE); + + val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL); + val |= HPRE_CLUSTER_DYN_CTL_EN; + writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL); + + val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG); + val |= HPRE_CORE_GATE_EN; + writel(val, qm->io_base + HPRE_CORE_SHB_CFG); +} + +static void hpre_disable_clock_gate(struct hisi_qm *qm) +{ + u32 val; + + if (qm->ver < QM_HW_V3) + return; + + val = readl(qm->io_base + HPRE_CLKGATE_CTL); + val &= ~HPRE_CLKGATE_CTL_EN; + writel(val, qm->io_base + HPRE_CLKGATE_CTL); + + val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE); + val &= ~HPRE_PEH_CFG_AUTO_GATE_EN; + writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE); + + val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL); + val &= ~HPRE_CLUSTER_DYN_CTL_EN; + writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL); + + val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG); + val &= ~HPRE_CORE_GATE_EN; + writel(val, qm->io_base + HPRE_CORE_SHB_CFG); +} + static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; u32 val; int ret; + /* disabel dynamic clock gate before sram init */ + hpre_disable_clock_gate(qm); + writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_ARUSER_M_CFG_ENABLE); writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_AWUSER_M_CFG_ENABLE); writel_relaxed(HPRE_QM_AXI_CFG_MASK, qm->io_base + QM_AXI_M_CFG); @@ -473,6 +535,8 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) /* Config data buffer pasid needed by Kunpeng 920 */ hpre_config_pasid(qm); + hpre_enable_clock_gate(qm); + return ret; } @@ -595,10 +659,15 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct hpre_debugfs_file *file = filp->private_data; + struct hisi_qm *qm = hpre_file_to_qm(file); char tbuf[HPRE_DBGFS_VAL_MAX_LEN]; u32 val; int ret; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->type) { case HPRE_CLEAR_ENABLE: @@ -608,18 +677,25 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf, val = hpre_cluster_inqry_read(file); break; default: - spin_unlock_irq(&file->lock); - return -EINVAL; + goto err_input; } spin_unlock_irq(&file->lock); + + hisi_qm_put_dfx_access(qm); ret = snprintf(tbuf, HPRE_DBGFS_VAL_MAX_LEN, "%u\n", val); return simple_read_from_buffer(buf, count, pos, tbuf, ret); + +err_input: + spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); + return -EINVAL; } static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct hpre_debugfs_file *file = filp->private_data; + struct hisi_qm *qm = hpre_file_to_qm(file); char tbuf[HPRE_DBGFS_VAL_MAX_LEN]; unsigned long val; int len, ret; @@ -639,6 +715,10 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf, if (kstrtoul(tbuf, 0, &val)) return -EFAULT; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->type) { case HPRE_CLEAR_ENABLE: @@ -655,12 +735,12 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf, ret = -EINVAL; goto err_input; } - spin_unlock_irq(&file->lock); - return count; + ret = count; err_input: spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); return ret; } @@ -700,6 +780,24 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get, hpre_debugfs_atomic64_set, "%llu\n"); +static int hpre_com_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hpre_com_regs); + +static int hpre_cluster_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs); + static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir, enum hpre_ctrl_dbgfs_file type, int indx) { @@ -737,8 +835,11 @@ static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm) regset->regs = hpre_com_dfx_regs; regset->nregs = ARRAY_SIZE(hpre_com_dfx_regs); regset->base = qm->io_base; + regset->dev = dev; + + debugfs_create_file("regs", 0444, qm->debug.debug_root, + regset, &hpre_com_regs_fops); - debugfs_create_regset32("regs", 0444, qm->debug.debug_root, regset); return 0; } @@ -764,8 +865,10 @@ static int hpre_cluster_debugfs_init(struct hisi_qm *qm) regset->regs = hpre_cluster_dfx_regs; regset->nregs = ARRAY_SIZE(hpre_cluster_dfx_regs); regset->base = qm->io_base + hpre_cluster_offsets[i]; + regset->dev = dev; - debugfs_create_regset32("regs", 0444, tmp_d, regset); + debugfs_create_file("regs", 0444, tmp_d, regset, + &hpre_cluster_regs_fops); ret = hpre_create_debugfs_file(qm, tmp_d, HPRE_CLUSTER_CTRL, i + HPRE_CLUSTER_CTRL); if (ret) @@ -1017,6 +1120,8 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_with_alg_register; } + hisi_qm_pm_init(qm); + return 0; err_with_alg_register: @@ -1040,6 +1145,7 @@ static void hpre_remove(struct pci_dev *pdev) struct hisi_qm *qm = pci_get_drvdata(pdev); int ret; + hisi_qm_pm_uninit(qm); hisi_qm_wait_task_finish(qm, &hpre_devices); hisi_qm_alg_unregister(qm, &hpre_devices); if (qm->fun_type == QM_HW_PF && qm->vfs_num) { @@ -1062,6 +1168,10 @@ static void hpre_remove(struct pci_dev *pdev) hisi_qm_uninit(qm); } +static const struct dev_pm_ops hpre_pm_ops = { + SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL) +}; + static const struct pci_error_handlers hpre_err_handler = { .error_detected = hisi_qm_dev_err_detected, .slot_reset = hisi_qm_dev_slot_reset, @@ -1078,6 +1188,7 @@ static struct pci_driver hpre_pci_driver = { hisi_qm_sriov_configure : NULL, .err_handler = &hpre_err_handler, .shutdown = hisi_qm_dev_shutdown, + .driver.pm = &hpre_pm_ops, }; static void hpre_register_debugfs(void) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 1d67f94a1d56..369562d34d66 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4,12 +4,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include #include @@ -270,6 +270,8 @@ #define QM_QOS_MAX_CIR_S 11 #define QM_QOS_VAL_MAX_LEN 32 +#define QM_AUTOSUSPEND_DELAY 3000 + #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ (((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \ ((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \ @@ -734,6 +736,34 @@ static u32 qm_get_irq_num_v3(struct hisi_qm *qm) return QM_IRQ_NUM_VF_V3; } +static int qm_pm_get_sync(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + int ret; + + if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3) + return 0; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) { + dev_err(dev, "failed to get_sync(%d).\n", ret); + return ret; + } + + return 0; +} + +static void qm_pm_put_sync(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + + if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3) + return; + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); +} + static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe) { u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK; @@ -1173,16 +1203,13 @@ static struct hisi_qm *file_to_qm(struct debugfs_file *file) return container_of(debug, struct hisi_qm, debug); } -static u32 current_q_read(struct debugfs_file *file) +static u32 current_q_read(struct hisi_qm *qm) { - struct hisi_qm *qm = file_to_qm(file); - return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT; } -static int current_q_write(struct debugfs_file *file, u32 val) +static int current_q_write(struct hisi_qm *qm, u32 val) { - struct hisi_qm *qm = file_to_qm(file); u32 tmp; if (val >= qm->debug.curr_qm_qp_num) @@ -1199,18 +1226,14 @@ static int current_q_write(struct debugfs_file *file, u32 val) return 0; } -static u32 clear_enable_read(struct debugfs_file *file) +static u32 clear_enable_read(struct hisi_qm *qm) { - struct hisi_qm *qm = file_to_qm(file); - return readl(qm->io_base + QM_DFX_CNT_CLR_CE); } /* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */ -static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl) +static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl) { - struct hisi_qm *qm = file_to_qm(file); - if (rd_clr_ctrl > 1) return -EINVAL; @@ -1219,16 +1242,13 @@ static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl) return 0; } -static u32 current_qm_read(struct debugfs_file *file) +static u32 current_qm_read(struct hisi_qm *qm) { - struct hisi_qm *qm = file_to_qm(file); - return readl(qm->io_base + QM_DFX_MB_CNT_VF); } -static int current_qm_write(struct debugfs_file *file, u32 val) +static int current_qm_write(struct hisi_qm *qm, u32 val) { - struct hisi_qm *qm = file_to_qm(file); u32 tmp; if (val > qm->vfs_num) @@ -1259,29 +1279,39 @@ static ssize_t qm_debug_read(struct file *filp, char __user *buf, { struct debugfs_file *file = filp->private_data; enum qm_debug_file index = file->index; + struct hisi_qm *qm = file_to_qm(file); char tbuf[QM_DBG_TMP_BUF_LEN]; u32 val; int ret; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + mutex_lock(&file->lock); switch (index) { case CURRENT_QM: - val = current_qm_read(file); + val = current_qm_read(qm); break; case CURRENT_Q: - val = current_q_read(file); + val = current_q_read(qm); break; case CLEAR_ENABLE: - val = clear_enable_read(file); + val = clear_enable_read(qm); break; default: - mutex_unlock(&file->lock); - return -EINVAL; + goto err_input; } mutex_unlock(&file->lock); + hisi_qm_put_dfx_access(qm); ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val); return simple_read_from_buffer(buf, count, pos, tbuf, ret); + +err_input: + mutex_unlock(&file->lock); + hisi_qm_put_dfx_access(qm); + return -EINVAL; } static ssize_t qm_debug_write(struct file *filp, const char __user *buf, @@ -1289,6 +1319,7 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf, { struct debugfs_file *file = filp->private_data; enum qm_debug_file index = file->index; + struct hisi_qm *qm = file_to_qm(file); unsigned long val; char tbuf[QM_DBG_TMP_BUF_LEN]; int len, ret; @@ -1308,22 +1339,28 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf, if (kstrtoul(tbuf, 0, &val)) return -EFAULT; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + mutex_lock(&file->lock); switch (index) { case CURRENT_QM: - ret = current_qm_write(file, val); + ret = current_qm_write(qm, val); break; case CURRENT_Q: - ret = current_q_write(file, val); + ret = current_q_write(qm, val); break; case CLEAR_ENABLE: - ret = clear_enable_write(file, val); + ret = clear_enable_write(qm, val); break; default: ret = -EINVAL; } mutex_unlock(&file->lock); + hisi_qm_put_dfx_access(qm); + if (ret) return ret; @@ -1337,13 +1374,8 @@ static const struct file_operations qm_debug_fops = { .write = qm_debug_write, }; -struct qm_dfx_registers { - char *reg_name; - u64 reg_offset; -}; - #define CNT_CYC_REGS_NUM 10 -static struct qm_dfx_registers qm_dfx_regs[] = { +static const struct debugfs_reg32 qm_dfx_regs[] = { /* XXX_CNT are reading clear register */ {"QM_ECC_1BIT_CNT ", 0x104000ull}, {"QM_ECC_MBIT_CNT ", 0x104008ull}, @@ -1369,31 +1401,59 @@ static struct qm_dfx_registers qm_dfx_regs[] = { {"QM_DFX_FF_ST5 ", 0x1040dcull}, {"QM_DFX_FF_ST6 ", 0x1040e0ull}, {"QM_IN_IDLE_ST ", 0x1040e4ull}, - { NULL, 0} }; -static struct qm_dfx_registers qm_vf_dfx_regs[] = { +static const struct debugfs_reg32 qm_vf_dfx_regs[] = { {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull}, - { NULL, 0} }; +/** + * hisi_qm_regs_dump() - Dump registers's value. + * @s: debugfs file handle. + * @regset: accelerator registers information. + * + * Dump accelerator registers. + */ +void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset) +{ + struct pci_dev *pdev = to_pci_dev(regset->dev); + struct hisi_qm *qm = pci_get_drvdata(pdev); + const struct debugfs_reg32 *regs = regset->regs; + int regs_len = regset->nregs; + int i, ret; + u32 val; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return; + + for (i = 0; i < regs_len; i++) { + val = readl(regset->base + regs[i].offset); + seq_printf(s, "%s= 0x%08x\n", regs[i].name, val); + } + + hisi_qm_put_dfx_access(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_regs_dump); + static int qm_regs_show(struct seq_file *s, void *unused) { struct hisi_qm *qm = s->private; - struct qm_dfx_registers *regs; - u32 val; + struct debugfs_regset32 regset; - if (qm->fun_type == QM_HW_PF) - regs = qm_dfx_regs; - else - regs = qm_vf_dfx_regs; - - while (regs->reg_name) { - val = readl(qm->io_base + regs->reg_offset); - seq_printf(s, "%s= 0x%08x\n", regs->reg_name, val); - regs++; + if (qm->fun_type == QM_HW_PF) { + regset.regs = qm_dfx_regs; + regset.nregs = ARRAY_SIZE(qm_dfx_regs); + } else { + regset.regs = qm_vf_dfx_regs; + regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs); } + regset.base = qm->io_base; + regset.dev = &qm->pdev->dev; + + hisi_qm_regs_dump(s, ®set); + return 0; } @@ -1823,16 +1883,24 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer, if (*pos) return 0; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + /* Judge if the instance is being reset. */ if (unlikely(atomic_read(&qm->status.flags) == QM_STOP)) return 0; - if (count > QM_DBG_WRITE_LEN) - return -ENOSPC; + if (count > QM_DBG_WRITE_LEN) { + ret = -ENOSPC; + goto put_dfx_access; + } cmd_buf = memdup_user_nul(buffer, count); - if (IS_ERR(cmd_buf)) - return PTR_ERR(cmd_buf); + if (IS_ERR(cmd_buf)) { + ret = PTR_ERR(cmd_buf); + goto put_dfx_access; + } cmd_buf_tmp = strchr(cmd_buf, '\n'); if (cmd_buf_tmp) { @@ -1843,12 +1911,16 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer, ret = qm_cmd_write_dump(qm, cmd_buf); if (ret) { kfree(cmd_buf); - return ret; + goto put_dfx_access; } kfree(cmd_buf); - return count; + ret = count; + +put_dfx_access: + hisi_qm_put_dfx_access(qm); + return ret; } static const struct file_operations qm_cmd_fops = { @@ -2445,11 +2517,19 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type) struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) { struct hisi_qp *qp; + int ret; + + ret = qm_pm_get_sync(qm); + if (ret) + return ERR_PTR(ret); down_write(&qm->qps_lock); qp = qm_create_qp_nolock(qm, alg_type); up_write(&qm->qps_lock); + if (IS_ERR(qp)) + qm_pm_put_sync(qm); + return qp; } EXPORT_SYMBOL_GPL(hisi_qm_create_qp); @@ -2475,6 +2555,8 @@ void hisi_qm_release_qp(struct hisi_qp *qp) idr_remove(&qm->qp_idr, qp->qp_id); up_write(&qm->qps_lock); + + qm_pm_put_sync(qm); } EXPORT_SYMBOL_GPL(hisi_qm_release_qp); @@ -3200,6 +3282,10 @@ static void hisi_qm_pre_init(struct hisi_qm *qm) init_rwsem(&qm->qps_lock); qm->qp_in_used = 0; qm->misc_ctl = false; + if (qm->fun_type == QM_HW_PF && qm->ver > QM_HW_V2) { + if (!acpi_device_power_manageable(ACPI_COMPANION(&pdev->dev))) + dev_info(&pdev->dev, "_PS0 and _PR0 are not defined"); + } } static void qm_cmd_uninit(struct hisi_qm *qm) @@ -4057,10 +4143,15 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf, u32 qos_val, ir; int ret; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + /* Mailbox and reset cannot be operated at the same time */ if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) { pci_err(qm->pdev, "dev resetting, read alg qos failed!\n"); - return -EAGAIN; + ret = -EAGAIN; + goto err_put_dfx_access; } if (qm->fun_type == QM_HW_PF) { @@ -4079,6 +4170,8 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf, err_get_status: clear_bit(QM_RESETTING, &qm->misc_ctl); +err_put_dfx_access: + hisi_qm_put_dfx_access(qm); return ret; } @@ -4159,15 +4252,23 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf, fun_index = device * 8 + function; - ret = qm_func_shaper_enable(qm, fun_index, val); + ret = qm_pm_get_sync(qm); if (ret) { - pci_err(qm->pdev, "failed to enable function shaper!\n"); ret = -EINVAL; goto err_get_status; } - ret = count; + ret = qm_func_shaper_enable(qm, fun_index, val); + if (ret) { + pci_err(qm->pdev, "failed to enable function shaper!\n"); + ret = -EINVAL; + goto err_put_sync; + } + ret = count; + +err_put_sync: + qm_pm_put_sync(qm); err_get_status: clear_bit(QM_RESETTING, &qm->misc_ctl); return ret; @@ -4245,7 +4346,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_debug_init); */ void hisi_qm_debug_regs_clear(struct hisi_qm *qm) { - struct qm_dfx_registers *regs; + const struct debugfs_reg32 *regs; int i; /* clear current_qm */ @@ -4264,7 +4365,7 @@ void hisi_qm_debug_regs_clear(struct hisi_qm *qm) regs = qm_dfx_regs; for (i = 0; i < CNT_CYC_REGS_NUM; i++) { - readl(qm->io_base + regs->reg_offset); + readl(qm->io_base + regs->offset); regs++; } @@ -4287,19 +4388,23 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs) struct hisi_qm *qm = pci_get_drvdata(pdev); int pre_existing_vfs, num_vfs, total_vfs, ret; + ret = qm_pm_get_sync(qm); + if (ret) + return ret; + total_vfs = pci_sriov_get_totalvfs(pdev); pre_existing_vfs = pci_num_vf(pdev); if (pre_existing_vfs) { pci_err(pdev, "%d VFs already enabled. Please disable pre-enabled VFs!\n", pre_existing_vfs); - return 0; + goto err_put_sync; } num_vfs = min_t(int, max_vfs, total_vfs); ret = qm_vf_q_assign(qm, num_vfs); if (ret) { pci_err(pdev, "Can't assign queues for VF!\n"); - return ret; + goto err_put_sync; } qm->vfs_num = num_vfs; @@ -4308,12 +4413,16 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs) if (ret) { pci_err(pdev, "Can't enable VF!\n"); qm_clear_vft_config(qm); - return ret; + goto err_put_sync; } pci_info(pdev, "VF enabled, vfs_num(=%d)!\n", num_vfs); return num_vfs; + +err_put_sync: + qm_pm_put_sync(qm); + return ret; } EXPORT_SYMBOL_GPL(hisi_qm_sriov_enable); @@ -4328,6 +4437,7 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen) { struct hisi_qm *qm = pci_get_drvdata(pdev); int total_vfs = pci_sriov_get_totalvfs(qm->pdev); + int ret; if (pci_vfs_assigned(pdev)) { pci_err(pdev, "Failed to disable VFs as VFs are assigned!\n"); @@ -4343,8 +4453,13 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen) pci_disable_sriov(pdev); /* clear vf function shaper configure array */ memset(qm->factor + 1, 0, sizeof(struct qm_shaper_factor) * total_vfs); + ret = qm_clear_vft_config(qm); + if (ret) + return ret; - return qm_clear_vft_config(qm); + qm_pm_put_sync(qm); + + return 0; } EXPORT_SYMBOL_GPL(hisi_qm_sriov_disable); @@ -5164,11 +5279,18 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work) struct hisi_qm *qm = container_of(rst_work, struct hisi_qm, rst_work); int ret; + ret = qm_pm_get_sync(qm); + if (ret) { + clear_bit(QM_RST_SCHED, &qm->misc_ctl); + return; + } + /* reset pcie device controller */ ret = qm_controller_reset(qm); if (ret) dev_err(&qm->pdev->dev, "controller reset failed (%d)\n", ret); + qm_pm_put_sync(qm); } static void qm_pf_reset_vf_prepare(struct hisi_qm *qm, @@ -5680,6 +5802,194 @@ int hisi_qm_init(struct hisi_qm *qm) } EXPORT_SYMBOL_GPL(hisi_qm_init); +/** + * hisi_qm_get_dfx_access() - Try to get dfx access. + * @qm: pointer to accelerator device. + * + * Try to get dfx access, then user can get message. + * + * If device is in suspended, return failure, otherwise + * bump up the runtime PM usage counter. + */ +int hisi_qm_get_dfx_access(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + + if (pm_runtime_suspended(dev)) { + dev_info(dev, "can not read/write - device in suspended.\n"); + return -EAGAIN; + } + + return qm_pm_get_sync(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_get_dfx_access); + +/** + * hisi_qm_put_dfx_access() - Put dfx access. + * @qm: pointer to accelerator device. + * + * Put dfx access, drop runtime PM usage counter. + */ +void hisi_qm_put_dfx_access(struct hisi_qm *qm) +{ + qm_pm_put_sync(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_put_dfx_access); + +/** + * hisi_qm_pm_init() - Initialize qm runtime PM. + * @qm: pointer to accelerator device. + * + * Function that initialize qm runtime PM. + */ +void hisi_qm_pm_init(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + + if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3) + return; + + pm_runtime_set_autosuspend_delay(dev, QM_AUTOSUSPEND_DELAY); + pm_runtime_use_autosuspend(dev); + pm_runtime_put_noidle(dev); +} +EXPORT_SYMBOL_GPL(hisi_qm_pm_init); + +/** + * hisi_qm_pm_uninit() - Uninitialize qm runtime PM. + * @qm: pointer to accelerator device. + * + * Function that uninitialize qm runtime PM. + */ +void hisi_qm_pm_uninit(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + + if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3) + return; + + pm_runtime_get_noresume(dev); + pm_runtime_dont_use_autosuspend(dev); +} +EXPORT_SYMBOL_GPL(hisi_qm_pm_uninit); + +static int qm_prepare_for_suspend(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + int ret; + u32 val; + + ret = qm->ops->set_msi(qm, false); + if (ret) { + pci_err(pdev, "failed to disable MSI before suspending!\n"); + return ret; + } + + /* shutdown OOO register */ + writel(ACC_MASTER_GLOBAL_CTRL_SHUTDOWN, + qm->io_base + ACC_MASTER_GLOBAL_CTRL); + + ret = readl_relaxed_poll_timeout(qm->io_base + ACC_MASTER_TRANS_RETURN, + val, + (val == ACC_MASTER_TRANS_RETURN_RW), + POLL_PERIOD, POLL_TIMEOUT); + if (ret) { + pci_emerg(pdev, "Bus lock! Please reset system.\n"); + return ret; + } + + ret = qm_set_pf_mse(qm, false); + if (ret) + pci_err(pdev, "failed to disable MSE before suspending!\n"); + + return ret; +} + +static int qm_rebuild_for_resume(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + int ret; + + ret = qm_set_pf_mse(qm, true); + if (ret) { + pci_err(pdev, "failed to enable MSE after resuming!\n"); + return ret; + } + + ret = qm->ops->set_msi(qm, true); + if (ret) { + pci_err(pdev, "failed to enable MSI after resuming!\n"); + return ret; + } + + ret = qm_dev_hw_init(qm); + if (ret) { + pci_err(pdev, "failed to init device after resuming\n"); + return ret; + } + + qm_cmd_init(qm); + hisi_qm_dev_err_init(qm); + + return 0; +} + +/** + * hisi_qm_suspend() - Runtime suspend of given device. + * @dev: device to suspend. + * + * Function that suspend the device. + */ +int hisi_qm_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct hisi_qm *qm = pci_get_drvdata(pdev); + int ret; + + pci_info(pdev, "entering suspended state\n"); + + ret = hisi_qm_stop(qm, QM_NORMAL); + if (ret) { + pci_err(pdev, "failed to stop qm(%d)\n", ret); + return ret; + } + + ret = qm_prepare_for_suspend(qm); + if (ret) + pci_err(pdev, "failed to prepare suspended(%d)\n", ret); + + return ret; +} +EXPORT_SYMBOL_GPL(hisi_qm_suspend); + +/** + * hisi_qm_resume() - Runtime resume of given device. + * @dev: device to resume. + * + * Function that resume the device. + */ +int hisi_qm_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct hisi_qm *qm = pci_get_drvdata(pdev); + int ret; + + pci_info(pdev, "resuming from suspend state\n"); + + ret = qm_rebuild_for_resume(qm); + if (ret) { + pci_err(pdev, "failed to rebuild resume(%d)\n", ret); + return ret; + } + + ret = hisi_qm_start(qm); + if (ret) + pci_err(pdev, "failed to start qm(%d)\n", ret); + + return 0; +} +EXPORT_SYMBOL_GPL(hisi_qm_resume); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Zhou Wang "); MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver"); diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 035eaf8c442d..3068093229a5 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -4,6 +4,7 @@ #define HISI_ACC_QM_H #include +#include #include #include #include @@ -430,4 +431,11 @@ void hisi_qm_dev_shutdown(struct pci_dev *pdev); void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_resume(struct device *dev); +int hisi_qm_suspend(struct device *dev); +void hisi_qm_pm_uninit(struct hisi_qm *qm); +void hisi_qm_pm_init(struct hisi_qm *qm); +int hisi_qm_get_dfx_access(struct hisi_qm *qm); +void hisi_qm_put_dfx_access(struct hisi_qm *qm); +void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); #endif diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 018415b9840a..d97cf02b1df7 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -157,11 +157,6 @@ struct sec_ctx { struct device *dev; }; -enum sec_endian { - SEC_LE = 0, - SEC_32BE, - SEC_64BE -}; enum sec_debug_file_index { SEC_CLEAR_ENABLE, diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 490db7bccf61..90551bf38b52 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -57,10 +58,16 @@ #define SEC_MEM_START_INIT_REG 0x301100 #define SEC_MEM_INIT_DONE_REG 0x301104 +/* clock gating */ #define SEC_CONTROL_REG 0x301200 -#define SEC_TRNG_EN_SHIFT 8 +#define SEC_DYNAMIC_GATE_REG 0x30121c +#define SEC_CORE_AUTO_GATE 0x30212c +#define SEC_DYNAMIC_GATE_EN 0x7bff +#define SEC_CORE_AUTO_GATE_EN GENMASK(3, 0) #define SEC_CLK_GATE_ENABLE BIT(3) #define SEC_CLK_GATE_DISABLE (~BIT(3)) + +#define SEC_TRNG_EN_SHIFT 8 #define SEC_AXI_SHUTDOWN_ENABLE BIT(12) #define SEC_AXI_SHUTDOWN_DISABLE 0xFFFFEFFF @@ -312,31 +319,20 @@ static const struct pci_device_id sec_dev_ids[] = { }; MODULE_DEVICE_TABLE(pci, sec_dev_ids); -static u8 sec_get_endian(struct hisi_qm *qm) +static void sec_set_endian(struct hisi_qm *qm) { u32 reg; - /* - * As for VF, it is a wrong way to get endian setting by - * reading a register of the engine - */ - if (qm->pdev->is_virtfn) { - dev_err_ratelimited(&qm->pdev->dev, - "cannot access a register in VF!\n"); - return SEC_LE; - } reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG); - /* BD little endian mode */ - if (!(reg & BIT(0))) - return SEC_LE; + reg &= ~(BIT(1) | BIT(0)); + if (!IS_ENABLED(CONFIG_64BIT)) + reg |= BIT(1); - /* BD 32-bits big endian mode */ - else if (!(reg & BIT(1))) - return SEC_32BE; - /* BD 64-bits big endian mode */ - else - return SEC_64BE; + if (!IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + reg |= BIT(0); + + writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); } static void sec_open_sva_prefetch(struct hisi_qm *qm) @@ -378,15 +374,43 @@ static void sec_close_sva_prefetch(struct hisi_qm *qm) pci_err(qm->pdev, "failed to close sva prefetch\n"); } +static void sec_enable_clock_gate(struct hisi_qm *qm) +{ + u32 val; + + if (qm->ver < QM_HW_V3) + return; + + val = readl_relaxed(qm->io_base + SEC_CONTROL_REG); + val |= SEC_CLK_GATE_ENABLE; + writel_relaxed(val, qm->io_base + SEC_CONTROL_REG); + + val = readl(qm->io_base + SEC_DYNAMIC_GATE_REG); + val |= SEC_DYNAMIC_GATE_EN; + writel(val, qm->io_base + SEC_DYNAMIC_GATE_REG); + + val = readl(qm->io_base + SEC_CORE_AUTO_GATE); + val |= SEC_CORE_AUTO_GATE_EN; + writel(val, qm->io_base + SEC_CORE_AUTO_GATE); +} + +static void sec_disable_clock_gate(struct hisi_qm *qm) +{ + u32 val; + + /* Kunpeng920 needs to close clock gating */ + val = readl_relaxed(qm->io_base + SEC_CONTROL_REG); + val &= SEC_CLK_GATE_DISABLE; + writel_relaxed(val, qm->io_base + SEC_CONTROL_REG); +} + static int sec_engine_init(struct hisi_qm *qm) { int ret; u32 reg; - /* disable clock gate control */ - reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG); - reg &= SEC_CLK_GATE_DISABLE; - writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); + /* disable clock gate control before mem init */ + sec_disable_clock_gate(qm); writel_relaxed(0x1, qm->io_base + SEC_MEM_START_INIT_REG); @@ -429,9 +453,9 @@ static int sec_engine_init(struct hisi_qm *qm) qm->io_base + SEC_BD_ERR_CHK_EN_REG3); /* config endian */ - reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG); - reg |= sec_get_endian(qm); - writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); + sec_set_endian(qm); + + sec_enable_clock_gate(qm); return 0; } @@ -533,17 +557,14 @@ static void sec_hw_error_disable(struct hisi_qm *qm) writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG); } -static u32 sec_clear_enable_read(struct sec_debug_file *file) +static u32 sec_clear_enable_read(struct hisi_qm *qm) { - struct hisi_qm *qm = file->qm; - return readl(qm->io_base + SEC_CTRL_CNT_CLR_CE) & SEC_CTRL_CNT_CLR_CE_BIT; } -static int sec_clear_enable_write(struct sec_debug_file *file, u32 val) +static int sec_clear_enable_write(struct hisi_qm *qm, u32 val) { - struct hisi_qm *qm = file->qm; u32 tmp; if (val != 1 && val) @@ -561,24 +582,34 @@ static ssize_t sec_debug_read(struct file *filp, char __user *buf, { struct sec_debug_file *file = filp->private_data; char tbuf[SEC_DBGFS_VAL_MAX_LEN]; + struct hisi_qm *qm = file->qm; u32 val; int ret; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->index) { case SEC_CLEAR_ENABLE: - val = sec_clear_enable_read(file); + val = sec_clear_enable_read(qm); break; default: - spin_unlock_irq(&file->lock); - return -EINVAL; + goto err_input; } spin_unlock_irq(&file->lock); - ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val); + hisi_qm_put_dfx_access(qm); + ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val); return simple_read_from_buffer(buf, count, pos, tbuf, ret); + +err_input: + spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); + return -EINVAL; } static ssize_t sec_debug_write(struct file *filp, const char __user *buf, @@ -586,6 +617,7 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf, { struct sec_debug_file *file = filp->private_data; char tbuf[SEC_DBGFS_VAL_MAX_LEN]; + struct hisi_qm *qm = file->qm; unsigned long val; int len, ret; @@ -604,11 +636,15 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf, if (kstrtoul(tbuf, 0, &val)) return -EFAULT; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->index) { case SEC_CLEAR_ENABLE: - ret = sec_clear_enable_write(file, val); + ret = sec_clear_enable_write(qm, val); if (ret) goto err_input; break; @@ -617,12 +653,11 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf, goto err_input; } - spin_unlock_irq(&file->lock); - - return count; + ret = count; err_input: spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); return ret; } @@ -653,6 +688,15 @@ static int sec_debugfs_atomic64_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(sec_atomic64_ops, sec_debugfs_atomic64_get, sec_debugfs_atomic64_set, "%lld\n"); +static int sec_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(sec_regs); + static int sec_core_debug_init(struct hisi_qm *qm) { struct sec_dev *sec = container_of(qm, struct sec_dev, qm); @@ -671,9 +715,10 @@ static int sec_core_debug_init(struct hisi_qm *qm) regset->regs = sec_dfx_regs; regset->nregs = ARRAY_SIZE(sec_dfx_regs); regset->base = qm->io_base; + regset->dev = dev; if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) - debugfs_create_regset32("regs", 0444, tmp_d, regset); + debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops); for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) { atomic64_t *data = (atomic64_t *)((uintptr_t)dfx + @@ -981,10 +1026,13 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_alg_unregister; } + hisi_qm_pm_init(qm); + return 0; err_alg_unregister: - hisi_qm_alg_unregister(qm, &sec_devices); + if (qm->qp_num >= ctx_q_num) + hisi_qm_alg_unregister(qm, &sec_devices); err_qm_stop: sec_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); @@ -999,6 +1047,7 @@ static void sec_remove(struct pci_dev *pdev) { struct hisi_qm *qm = pci_get_drvdata(pdev); + hisi_qm_pm_uninit(qm); hisi_qm_wait_task_finish(qm, &sec_devices); if (qm->qp_num >= ctx_q_num) hisi_qm_alg_unregister(qm, &sec_devices); @@ -1018,6 +1067,10 @@ static void sec_remove(struct pci_dev *pdev) sec_qm_uninit(qm); } +static const struct dev_pm_ops sec_pm_ops = { + SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL) +}; + static const struct pci_error_handlers sec_err_handler = { .error_detected = hisi_qm_dev_err_detected, .slot_reset = hisi_qm_dev_slot_reset, @@ -1033,6 +1086,7 @@ static struct pci_driver sec_pci_driver = { .err_handler = &sec_err_handler, .sriov_configure = hisi_qm_sriov_configure, .shutdown = hisi_qm_dev_shutdown, + .driver.pm = &sec_pm_ops, }; static void sec_register_debugfs(void) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index f8482ceebf2a..7148201ce76e 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,14 @@ #define HZIP_DELAY_1_US 1 #define HZIP_POLL_TIMEOUT_US 1000 +/* clock gating */ +#define HZIP_PEH_CFG_AUTO_GATE 0x3011A8 +#define HZIP_PEH_CFG_AUTO_GATE_EN BIT(0) +#define HZIP_CORE_GATED_EN GENMASK(15, 8) +#define HZIP_CORE_GATED_OOO_EN BIT(29) +#define HZIP_CLOCK_GATED_EN (HZIP_CORE_GATED_EN | \ + HZIP_CORE_GATED_OOO_EN) + static const char hisi_zip_name[] = "hisi_zip"; static struct dentry *hzip_debugfs_root; @@ -312,6 +321,22 @@ static void hisi_zip_close_sva_prefetch(struct hisi_qm *qm) pci_err(qm->pdev, "failed to close sva prefetch\n"); } +static void hisi_zip_enable_clock_gate(struct hisi_qm *qm) +{ + u32 val; + + if (qm->ver < QM_HW_V3) + return; + + val = readl(qm->io_base + HZIP_CLOCK_GATE_CTRL); + val |= HZIP_CLOCK_GATED_EN; + writel(val, qm->io_base + HZIP_CLOCK_GATE_CTRL); + + val = readl(qm->io_base + HZIP_PEH_CFG_AUTO_GATE); + val |= HZIP_PEH_CFG_AUTO_GATE_EN; + writel(val, qm->io_base + HZIP_PEH_CFG_AUTO_GATE); +} + static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm) { void __iomem *base = qm->io_base; @@ -359,6 +384,8 @@ static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm) CQC_CACHE_WB_ENABLE | FIELD_PREP(SQC_CACHE_WB_THRD, 1) | FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL); + hisi_zip_enable_clock_gate(qm); + return 0; } @@ -423,17 +450,14 @@ static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file) return &hisi_zip->qm; } -static u32 clear_enable_read(struct ctrl_debug_file *file) +static u32 clear_enable_read(struct hisi_qm *qm) { - struct hisi_qm *qm = file_to_qm(file); - return readl(qm->io_base + HZIP_SOFT_CTRL_CNT_CLR_CE) & HZIP_SOFT_CTRL_CNT_CLR_CE_BIT; } -static int clear_enable_write(struct ctrl_debug_file *file, u32 val) +static int clear_enable_write(struct hisi_qm *qm, u32 val) { - struct hisi_qm *qm = file_to_qm(file); u32 tmp; if (val != 1 && val != 0) @@ -450,22 +474,33 @@ static ssize_t hisi_zip_ctrl_debug_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ctrl_debug_file *file = filp->private_data; + struct hisi_qm *qm = file_to_qm(file); char tbuf[HZIP_BUF_SIZE]; u32 val; int ret; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->index) { case HZIP_CLEAR_ENABLE: - val = clear_enable_read(file); + val = clear_enable_read(qm); break; default: - spin_unlock_irq(&file->lock); - return -EINVAL; + goto err_input; } spin_unlock_irq(&file->lock); + + hisi_qm_put_dfx_access(qm); ret = scnprintf(tbuf, sizeof(tbuf), "%u\n", val); return simple_read_from_buffer(buf, count, pos, tbuf, ret); + +err_input: + spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); + return -EINVAL; } static ssize_t hisi_zip_ctrl_debug_write(struct file *filp, @@ -473,6 +508,7 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp, size_t count, loff_t *pos) { struct ctrl_debug_file *file = filp->private_data; + struct hisi_qm *qm = file_to_qm(file); char tbuf[HZIP_BUF_SIZE]; unsigned long val; int len, ret; @@ -491,10 +527,14 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp, if (kstrtoul(tbuf, 0, &val)) return -EFAULT; + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + spin_lock_irq(&file->lock); switch (file->index) { case HZIP_CLEAR_ENABLE: - ret = clear_enable_write(file, val); + ret = clear_enable_write(qm, val); if (ret) goto err_input; break; @@ -502,12 +542,12 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp, ret = -EINVAL; goto err_input; } - spin_unlock_irq(&file->lock); - return count; + ret = count; err_input: spin_unlock_irq(&file->lock); + hisi_qm_put_dfx_access(qm); return ret; } @@ -538,6 +578,15 @@ static int zip_debugfs_atomic64_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(zip_atomic64_ops, zip_debugfs_atomic64_get, zip_debugfs_atomic64_set, "%llu\n"); +static int hisi_zip_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hisi_zip_regs); + static int hisi_zip_core_debug_init(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; @@ -560,9 +609,11 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm) regset->regs = hzip_dfx_regs; regset->nregs = ARRAY_SIZE(hzip_dfx_regs); regset->base = qm->io_base + core_offsets[i]; + regset->dev = dev; tmp_d = debugfs_create_dir(buf, qm->debug.debug_root); - debugfs_create_regset32("regs", 0444, tmp_d, regset); + debugfs_create_file("regs", 0444, tmp_d, regset, + &hisi_zip_regs_fops); } return 0; @@ -898,6 +949,8 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_qm_alg_unregister; } + hisi_qm_pm_init(qm); + return 0; err_qm_alg_unregister: @@ -920,6 +973,7 @@ static void hisi_zip_remove(struct pci_dev *pdev) { struct hisi_qm *qm = pci_get_drvdata(pdev); + hisi_qm_pm_uninit(qm); hisi_qm_wait_task_finish(qm, &zip_devices); hisi_qm_alg_unregister(qm, &zip_devices); @@ -932,6 +986,10 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_zip_qm_uninit(qm); } +static const struct dev_pm_ops hisi_zip_pm_ops = { + SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL) +}; + static const struct pci_error_handlers hisi_zip_err_handler = { .error_detected = hisi_qm_dev_err_detected, .slot_reset = hisi_qm_dev_slot_reset, @@ -948,6 +1006,7 @@ static struct pci_driver hisi_zip_pci_driver = { hisi_qm_sriov_configure : NULL, .err_handler = &hisi_zip_err_handler, .shutdown = hisi_qm_dev_shutdown, + .driver.pm = &hisi_zip_pm_ops, }; static void hisi_zip_register_debugfs(void) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index d6a7784d2988..d19e5ffb5104 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -170,15 +170,19 @@ static struct dcp *global_sdcp; static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) { + int dma_err; struct dcp *sdcp = global_sdcp; const int chan = actx->chan; uint32_t stat; unsigned long ret; struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; - dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc), DMA_TO_DEVICE); + dma_err = dma_mapping_error(sdcp->dev, desc_phys); + if (dma_err) + return dma_err; + reinit_completion(&sdcp->completion[chan]); /* Clear status register. */ @@ -216,18 +220,29 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, struct skcipher_request *req, int init) { + dma_addr_t key_phys, src_phys, dst_phys; struct dcp *sdcp = global_sdcp; struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req); int ret; - dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, - 2 * AES_KEYSIZE_128, - DMA_TO_DEVICE); - dma_addr_t src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf, - DCP_BUF_SZ, DMA_TO_DEVICE); - dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf, - DCP_BUF_SZ, DMA_FROM_DEVICE); + key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, + 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, key_phys); + if (ret) + return ret; + + src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf, + DCP_BUF_SZ, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, src_phys); + if (ret) + goto err_src; + + dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf, + DCP_BUF_SZ, DMA_FROM_DEVICE); + ret = dma_mapping_error(sdcp->dev, dst_phys); + if (ret) + goto err_dst; if (actx->fill % AES_BLOCK_SIZE) { dev_err(sdcp->dev, "Invalid block size!\n"); @@ -265,10 +280,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, ret = mxs_dcp_start_dma(actx); aes_done_run: + dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE); +err_dst: + dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); +err_src: dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); - dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); - dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE); return ret; } @@ -283,21 +300,20 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) struct scatterlist *dst = req->dst; struct scatterlist *src = req->src; - const int nents = sg_nents(req->src); + int dst_nents = sg_nents(dst); const int out_off = DCP_BUF_SZ; uint8_t *in_buf = sdcp->coh->aes_in_buf; uint8_t *out_buf = sdcp->coh->aes_out_buf; - uint8_t *out_tmp, *src_buf, *dst_buf = NULL; uint32_t dst_off = 0; + uint8_t *src_buf = NULL; uint32_t last_out_len = 0; uint8_t *key = sdcp->coh->aes_key; int ret = 0; - int split = 0; - unsigned int i, len, clen, rem = 0, tlen = 0; + unsigned int i, len, clen, tlen = 0; int init = 0; bool limit_hit = false; @@ -315,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128); } - for_each_sg(req->src, src, nents, i) { + for_each_sg(req->src, src, sg_nents(src), i) { src_buf = sg_virt(src); len = sg_dma_len(src); tlen += len; @@ -340,34 +356,17 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) * submit the buffer. */ if (actx->fill == out_off || sg_is_last(src) || - limit_hit) { + limit_hit) { ret = mxs_dcp_run_aes(actx, req, init); if (ret) return ret; init = 0; - out_tmp = out_buf; + sg_pcopy_from_buffer(dst, dst_nents, out_buf, + actx->fill, dst_off); + dst_off += actx->fill; last_out_len = actx->fill; - while (dst && actx->fill) { - if (!split) { - dst_buf = sg_virt(dst); - dst_off = 0; - } - rem = min(sg_dma_len(dst) - dst_off, - actx->fill); - - memcpy(dst_buf + dst_off, out_tmp, rem); - out_tmp += rem; - dst_off += rem; - actx->fill -= rem; - - if (dst_off == sg_dma_len(dst)) { - dst = sg_next(dst); - split = 0; - } else { - split = 1; - } - } + actx->fill = 0; } } while (len); @@ -557,6 +556,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req) dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf, DCP_BUF_SZ, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, buf_phys); + if (ret) + return ret; + /* Fill in the DMA descriptor. */ desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE | MXS_DCP_CONTROL0_INTERRUPT | @@ -589,6 +592,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req) if (rctx->fini) { digest_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_out_buf, DCP_SHA_PAY_SZ, DMA_FROM_DEVICE); + ret = dma_mapping_error(sdcp->dev, digest_phys); + if (ret) + goto done_run; + desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM; desc->payload = digest_phys; } diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 0dd4c6b157de..9b968ac4ee7b 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1175,9 +1175,9 @@ static int omap_aes_probe(struct platform_device *pdev) spin_lock_init(&dd->lock); INIT_LIST_HEAD(&dd->list); - spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_add_tail(&dd->list, &dev_list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock); /* Initialize crypto engine */ dd->engine = crypto_engine_alloc_init(dev, 1); @@ -1264,9 +1264,9 @@ static int omap_aes_remove(struct platform_device *pdev) if (!dd) return -ENODEV; - spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_del(&dd->list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock); for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) { diff --git a/drivers/crypto/omap-crypto.c b/drivers/crypto/omap-crypto.c index 31bdb1d76d11..a4cc6bf146ec 100644 --- a/drivers/crypto/omap-crypto.c +++ b/drivers/crypto/omap-crypto.c @@ -210,7 +210,7 @@ void omap_crypto_cleanup(struct scatterlist *sg, struct scatterlist *orig, buf = sg_virt(sg); pages = get_order(len); - if (orig && (flags & OMAP_CRYPTO_COPY_MASK)) + if (orig && (flags & OMAP_CRYPTO_DATA_COPIED)) omap_crypto_copy_data(sg, orig, offset, len); if (flags & OMAP_CRYPTO_DATA_COPIED) diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index bc8631363d72..be77656864e3 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -1033,9 +1033,9 @@ static int omap_des_probe(struct platform_device *pdev) INIT_LIST_HEAD(&dd->list); - spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_add_tail(&dd->list, &dev_list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock); /* Initialize des crypto engine */ dd->engine = crypto_engine_alloc_init(dev, 1); @@ -1094,9 +1094,9 @@ static int omap_des_remove(struct platform_device *pdev) if (!dd) return -ENODEV; - spin_lock(&list_lock); + spin_lock_bh(&list_lock); list_del(&dd->list); - spin_unlock(&list_lock); + spin_unlock_bh(&list_lock); for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index dd53ad9987b0..f6bf53c00b61 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -105,7 +105,6 @@ #define FLAGS_FINAL 1 #define FLAGS_DMA_ACTIVE 2 #define FLAGS_OUTPUT_READY 3 -#define FLAGS_INIT 4 #define FLAGS_CPU 5 #define FLAGS_DMA_READY 6 #define FLAGS_AUTO_XOR 7 @@ -368,24 +367,6 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req) hash[i] = le32_to_cpup((__le32 *)in + i); } -static int omap_sham_hw_init(struct omap_sham_dev *dd) -{ - int err; - - err = pm_runtime_resume_and_get(dd->dev); - if (err < 0) { - dev_err(dd->dev, "failed to get sync: %d\n", err); - return err; - } - - if (!test_bit(FLAGS_INIT, &dd->flags)) { - set_bit(FLAGS_INIT, &dd->flags); - dd->err = 0; - } - - return 0; -} - static void omap_sham_write_ctrl_omap2(struct omap_sham_dev *dd, size_t length, int final, int dma) { @@ -1093,11 +1074,14 @@ static int omap_sham_hash_one_req(struct crypto_engine *engine, void *areq) dev_dbg(dd->dev, "hash-one: op: %u, total: %u, digcnt: %zd, final: %d", ctx->op, ctx->total, ctx->digcnt, final); - dd->req = req; - - err = omap_sham_hw_init(dd); - if (err) + err = pm_runtime_resume_and_get(dd->dev); + if (err < 0) { + dev_err(dd->dev, "failed to get sync: %d\n", err); return err; + } + + dd->err = 0; + dd->req = req; if (ctx->digcnt) dd->pdata->copy_hash(req, 0); @@ -1736,7 +1720,7 @@ static void omap_sham_done_task(unsigned long data) if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags)) goto finish; } else if (test_bit(FLAGS_DMA_READY, &dd->flags)) { - if (test_and_clear_bit(FLAGS_DMA_ACTIVE, &dd->flags)) { + if (test_bit(FLAGS_DMA_ACTIVE, &dd->flags)) { omap_sham_update_dma_stop(dd); if (dd->err) { err = dd->err; @@ -2129,7 +2113,6 @@ static int omap_sham_probe(struct platform_device *pdev) dd->fallback_sz = OMAP_SHA_DMA_THRESHOLD; pm_runtime_enable(dev); - pm_runtime_irq_safe(dev); err = pm_runtime_get_sync(dev); if (err < 0) { @@ -2144,9 +2127,9 @@ static int omap_sham_probe(struct platform_device *pdev) (rev & dd->pdata->major_mask) >> dd->pdata->major_shift, (rev & dd->pdata->minor_mask) >> dd->pdata->minor_shift); - spin_lock(&sham.lock); + spin_lock_bh(&sham.lock); list_add_tail(&dd->list, &sham.dev_list); - spin_unlock(&sham.lock); + spin_unlock_bh(&sham.lock); dd->engine = crypto_engine_alloc_init(dev, 1); if (!dd->engine) { @@ -2194,10 +2177,11 @@ static int omap_sham_probe(struct platform_device *pdev) err_engine_start: crypto_engine_exit(dd->engine); err_engine: - spin_lock(&sham.lock); + spin_lock_bh(&sham.lock); list_del(&dd->list); - spin_unlock(&sham.lock); + spin_unlock_bh(&sham.lock); err_pm: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); if (!dd->polling_mode) dma_release_channel(dd->dma_lch); @@ -2215,9 +2199,9 @@ static int omap_sham_remove(struct platform_device *pdev) dd = platform_get_drvdata(pdev); if (!dd) return -ENODEV; - spin_lock(&sham.lock); + spin_lock_bh(&sham.lock); list_del(&dd->list); - spin_unlock(&sham.lock); + spin_unlock_bh(&sham.lock); for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) { crypto_unregister_ahash( @@ -2225,6 +2209,7 @@ static int omap_sham_remove(struct platform_device *pdev) dd->pdata->algs_info[i].registered--; } tasklet_kill(&dd->done_task); + pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_disable(&pdev->dev); if (!dd->polling_mode) @@ -2235,32 +2220,11 @@ static int omap_sham_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int omap_sham_suspend(struct device *dev) -{ - pm_runtime_put_sync(dev); - return 0; -} - -static int omap_sham_resume(struct device *dev) -{ - int err = pm_runtime_resume_and_get(dev); - if (err < 0) { - dev_err(dev, "failed to get sync: %d\n", err); - return err; - } - return 0; -} -#endif - -static SIMPLE_DEV_PM_OPS(omap_sham_pm_ops, omap_sham_suspend, omap_sham_resume); - static struct platform_driver omap_sham_driver = { .probe = omap_sham_probe, .remove = omap_sham_remove, .driver = { .name = "omap-sham", - .pm = &omap_sham_pm_ops, .of_match_table = omap_sham_of_match, }, }; diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index 3524ddd48930..33d8e50dcbda 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -161,7 +161,7 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_MASK_OFFSET, 0); } -static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) +static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev) { return 0; } @@ -210,21 +210,21 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) hw_data->fw_mmp_name = ADF_4XXX_MMP; hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; - hw_data->disable_iov = adf_disable_sriov; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_flr; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK; hw_data->uof_get_num_objs = uof_get_num_objs; hw_data->uof_get_name = uof_get_name; hw_data->uof_get_ae_mask = uof_get_ae_mask; hw_data->set_msix_rttable = set_msix_default_rttable; hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; + hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms; + hw_data->disable_iov = adf_disable_sriov; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; adf_gen4_init_hw_csr_ops(&hw_data->csr_ops); } diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c index a8805c815d16..359fb7989dfb 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -221,16 +221,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Set DMA identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration.\n"); - ret = -EFAULT; - goto out_err; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration.\n"); + goto out_err; } /* Get accelerator capabilities mask */ diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index 1dd64af22bea..3027c01bc89e 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -111,11 +111,6 @@ static u32 get_pf2vf_offset(u32 i) return ADF_C3XXX_PF2VF_OFFSET(i); } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_C3XXX_VINTMSK_OFFSET(i); -} - static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_device = accel_dev->hw_device; @@ -159,8 +154,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) ADF_C3XXX_SMIA1_MASK); } -static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) +static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev) { + spin_lock_init(&accel_dev->pf.vf2pf_ints_lock); + return 0; } @@ -193,8 +190,6 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->get_sram_bar_id = get_sram_bar_id; hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; - hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_admin_info = adf_gen2_get_admin_info; hw_data->get_arb_info = adf_gen2_get_arb_info; hw_data->get_sku = get_sku; @@ -203,16 +198,18 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; hw_data->configure_iov_threads = configure_iov_threads; - hw_data->disable_iov = adf_disable_sriov; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_flr; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; + hw_data->get_pf2vf_offset = get_pf2vf_offset; + hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms; + hw_data->disable_iov = adf_disable_sriov; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h index fece8e38025a..86ee02a86789 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h @@ -29,7 +29,6 @@ #define ADF_C3XXX_ERRSSMSH_EN BIT(3) #define ADF_C3XXX_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04)) -#define ADF_C3XXX_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04)) /* AE to function mapping */ #define ADF_C3XXX_AE2FUNC_MAP_GRP_A_NUM_REGS 48 diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index 7fb3343ae8b0..cc6e75dc60de 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_C3XXX_DEVICE_NAME)) { @@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_save_state(pdev)) { dev_err(&pdev->dev, "Failed to save pci state\n"); ret = -ENOMEM; - goto out_err_free_reg; + goto out_err_disable_aer; } ret = qat_crypto_dev_config(accel_dev); if (ret) - goto out_err_free_reg; + goto out_err_disable_aer; ret = adf_dev_init(accel_dev); if (ret) @@ -229,6 +222,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adf_dev_stop(accel_dev); out_err_dev_shutdown: adf_dev_shutdown(accel_dev); +out_err_disable_aer: + adf_disable_aer(accel_dev); out_err_free_reg: pci_release_regions(accel_pci_dev->pci_dev); out_err_disable: diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index 15f6b9bdfb22..3e69b520e82f 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i) return ADF_C3XXXIOV_PF2VF_OFFSET; } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_C3XXXIOV_VINTMSK_OFFSET; -} - static int adf_vf_int_noop(struct adf_accel_dev *accel_dev) { return 0; @@ -81,10 +76,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels; @@ -92,11 +87,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; - hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h index 7945a9cd1c60..f5de4ce66014 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h @@ -13,7 +13,6 @@ #define ADF_C3XXXIOV_ETR_BAR 0 #define ADF_C3XXXIOV_ETR_MAX_BANKS 1 #define ADF_C3XXXIOV_PF2VF_OFFSET 0x200 -#define ADF_C3XXXIOV_VINTMSK_OFFSET 0x208 void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data); void adf_clean_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data); diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index 067ca5e17d38..1df1b868978d 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_C3XXXVF_DEVICE_NAME)) { @@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } + adf_flush_vf_wq(accel_dev); adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_cleanup_accel(accel_dev); diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index 30337390513c..b023c80873bb 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -113,11 +113,6 @@ static u32 get_pf2vf_offset(u32 i) return ADF_C62X_PF2VF_OFFSET(i); } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_C62X_VINTMSK_OFFSET(i); -} - static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_device = accel_dev->hw_device; @@ -161,8 +156,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) ADF_C62X_SMIA1_MASK); } -static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) +static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev) { + spin_lock_init(&accel_dev->pf.vf2pf_ints_lock); + return 0; } @@ -195,8 +192,6 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->get_sram_bar_id = get_sram_bar_id; hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; - hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_admin_info = adf_gen2_get_admin_info; hw_data->get_arb_info = adf_gen2_get_arb_info; hw_data->get_sku = get_sku; @@ -205,16 +200,18 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; hw_data->configure_iov_threads = configure_iov_threads; - hw_data->disable_iov = adf_disable_sriov; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_flr; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; + hw_data->get_pf2vf_offset = get_pf2vf_offset; + hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms; + hw_data->disable_iov = adf_disable_sriov; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h index 53d3cb577f5b..e6664bd20c91 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h @@ -30,7 +30,6 @@ #define ADF_C62X_ERRSSMSH_EN BIT(3) #define ADF_C62X_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04)) -#define ADF_C62X_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04)) /* AE to function mapping */ #define ADF_C62X_AE2FUNC_MAP_GRP_A_NUM_REGS 80 diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index 1f5de442e1e6..bf251dfe74b3 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_C62X_DEVICE_NAME)) { @@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_save_state(pdev)) { dev_err(&pdev->dev, "Failed to save pci state\n"); ret = -ENOMEM; - goto out_err_free_reg; + goto out_err_disable_aer; } ret = qat_crypto_dev_config(accel_dev); if (ret) - goto out_err_free_reg; + goto out_err_disable_aer; ret = adf_dev_init(accel_dev); if (ret) @@ -229,6 +222,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adf_dev_stop(accel_dev); out_err_dev_shutdown: adf_dev_shutdown(accel_dev); +out_err_disable_aer: + adf_disable_aer(accel_dev); out_err_free_reg: pci_release_regions(accel_pci_dev->pci_dev); out_err_disable: diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index d231583428c9..3bee3e467363 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i) return ADF_C62XIOV_PF2VF_OFFSET; } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_C62XIOV_VINTMSK_OFFSET; -} - static int adf_vf_int_noop(struct adf_accel_dev *accel_dev) { return 0; @@ -81,10 +76,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels; @@ -92,11 +87,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; - hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h index a6c04cf7a43c..794778c48678 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h @@ -13,7 +13,6 @@ #define ADF_C62XIOV_ETR_BAR 0 #define ADF_C62XIOV_ETR_MAX_BANKS 1 #define ADF_C62XIOV_PF2VF_OFFSET 0x200 -#define ADF_C62XIOV_VINTMSK_OFFSET 0x208 void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data); void adf_clean_hw_data_c62xiov(struct adf_hw_device_data *hw_data); diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index 51ea88c0b17d..8103bd81d617 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_C62XVF_DEVICE_NAME)) { @@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } + adf_flush_vf_wq(accel_dev); adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_cleanup_accel(accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index ac435b44f1d2..38c0af6d4e43 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -18,8 +18,6 @@ #define ADF_4XXX_DEVICE_NAME "4xxx" #define ADF_4XXX_PCI_DEVICE_ID 0x4940 #define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941 -#define ADF_ERRSOU3 (0x3A000 + 0x0C) -#define ADF_ERRSOU5 (0x3A000 + 0xD8) #define ADF_DEVICE_FUSECTL_OFFSET 0x40 #define ADF_DEVICE_LEGFUSE_OFFSET 0x4C #define ADF_DEVICE_FUSECTL_MASK 0x80000000 @@ -156,7 +154,6 @@ struct adf_hw_device_data { u32 (*get_num_aes)(struct adf_hw_device_data *self); u32 (*get_num_accels)(struct adf_hw_device_data *self); u32 (*get_pf2vf_offset)(u32 i); - u32 (*get_vintmsk_offset)(u32 i); void (*get_arb_info)(struct arb_info *arb_csrs_info); void (*get_admin_info)(struct admin_info *admin_csrs_info); enum dev_sku_info (*get_sku)(struct adf_hw_device_data *self); @@ -174,7 +171,7 @@ struct adf_hw_device_data { bool enable); void (*enable_ints)(struct adf_accel_dev *accel_dev); void (*set_ssm_wdtimer)(struct adf_accel_dev *accel_dev); - int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev); + int (*enable_pfvf_comms)(struct adf_accel_dev *accel_dev); void (*reset_device)(struct adf_accel_dev *accel_dev); void (*set_msix_rttable)(struct adf_accel_dev *accel_dev); char *(*uof_get_name)(u32 obj_num); @@ -227,7 +224,6 @@ struct adf_fw_loader_data { struct adf_accel_vf_info { struct adf_accel_dev *accel_dev; - struct tasklet_struct vf2pf_bh_tasklet; struct mutex pf2vf_lock; /* protect CSR access for PF2VF messages */ struct ratelimit_state vf2pf_ratelimit; u32 vf_nr; @@ -249,6 +245,8 @@ struct adf_accel_dev { struct adf_accel_pci accel_pci_dev; union { struct { + /* protects VF2PF interrupts access */ + spinlock_t vf2pf_ints_lock; /* vf_info is non-zero when SR-IOV is init'ed */ struct adf_accel_vf_info *vf_info; } pf; diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index d2ae293d0df6..ed3e40bc56eb 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -194,7 +194,7 @@ int adf_enable_aer(struct adf_accel_dev *accel_dev) EXPORT_SYMBOL_GPL(adf_enable_aer); /** - * adf_disable_aer() - Enable Advance Error Reporting for acceleration device + * adf_disable_aer() - Disable Advance Error Reporting for acceleration device * @accel_dev: Pointer to acceleration device. * * Function disables PCI Advance Error Reporting for the diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index c61476553728..4261749fae8d 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -193,22 +193,23 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs); void adf_disable_sriov(struct adf_accel_dev *accel_dev); void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask); +void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev, + u32 vf_mask); void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask); void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); +void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info); -int adf_vf2pf_init(struct adf_accel_dev *accel_dev); -void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev); +int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev); +void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev); int adf_init_pf_wq(void); void adf_exit_pf_wq(void); int adf_init_vf_wq(void); void adf_exit_vf_wq(void); +void adf_flush_vf_wq(struct adf_accel_dev *accel_dev); #else -static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs) -{ - return 0; -} +#define adf_sriov_configure NULL static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev) { @@ -222,12 +223,12 @@ static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) { } -static inline int adf_vf2pf_init(struct adf_accel_dev *accel_dev) +static inline int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev) { return 0; } -static inline void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) +static inline void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev) { } @@ -249,5 +250,9 @@ static inline void adf_exit_vf_wq(void) { } +static inline void adf_flush_vf_wq(struct adf_accel_dev *accel_dev) +{ +} + #endif #endif diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 744c40351428..60bc7b991d35 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -61,6 +61,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) struct service_hndl *service; struct list_head *list_itr; struct adf_hw_device_data *hw_data = accel_dev->hw_device; + int ret; if (!hw_data) { dev_err(&GET_DEV(accel_dev), @@ -88,8 +89,6 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) return -EFAULT; } - hw_data->enable_ints(accel_dev); - if (adf_ae_init(accel_dev)) { dev_err(&GET_DEV(accel_dev), "Failed to initialise Acceleration Engine\n"); @@ -110,6 +109,13 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) } set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status); + hw_data->enable_ints(accel_dev); + hw_data->enable_error_correction(accel_dev); + + ret = hw_data->enable_pfvf_comms(accel_dev); + if (ret) + return ret; + /* * Subservice initialisation is divided into two stages: init and start. * This is to facilitate any ordering dependencies between services @@ -126,9 +132,6 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) set_bit(accel_dev->accel_id, service->init_status); } - hw_data->enable_error_correction(accel_dev); - hw_data->enable_vf2pf_comms(accel_dev); - return 0; } EXPORT_SYMBOL_GPL(adf_dev_init); diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index e3ad5587be49..c678d5c531aa 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -15,6 +15,14 @@ #include "adf_transport_access_macros.h" #include "adf_transport_internal.h" +#define ADF_MAX_NUM_VFS 32 +#define ADF_ERRSOU3 (0x3A000 + 0x0C) +#define ADF_ERRSOU5 (0x3A000 + 0xD8) +#define ADF_ERRMSK3 (0x3A000 + 0x1C) +#define ADF_ERRMSK5 (0x3A000 + 0xDC) +#define ADF_ERR_REG_VF2PF_L(vf_src) (((vf_src) & 0x01FFFE00) >> 9) +#define ADF_ERR_REG_VF2PF_U(vf_src) (((vf_src) & 0x0000FFFF) << 16) + static int adf_enable_msix(struct adf_accel_dev *accel_dev) { struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev; @@ -71,14 +79,23 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) struct adf_hw_device_data *hw_data = accel_dev->hw_device; struct adf_bar *pmisc = &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; - void __iomem *pmisc_bar_addr = pmisc->virt_addr; - u32 vf_mask; + void __iomem *pmisc_addr = pmisc->virt_addr; + u32 errsou3, errsou5, errmsk3, errmsk5; + unsigned long vf_mask; /* Get the interrupt sources triggered by VFs */ - vf_mask = ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU5) & - 0x0000FFFF) << 16) | - ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU3) & - 0x01FFFE00) >> 9); + errsou3 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU3); + errsou5 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU5); + vf_mask = ADF_ERR_REG_VF2PF_L(errsou3); + vf_mask |= ADF_ERR_REG_VF2PF_U(errsou5); + + /* To avoid adding duplicate entries to work queue, clear + * vf_int_mask_sets bits that are already masked in ERRMSK register. + */ + errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK3); + errmsk5 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK5); + vf_mask &= ~ADF_ERR_REG_VF2PF_L(errmsk3); + vf_mask &= ~ADF_ERR_REG_VF2PF_U(errmsk5); if (vf_mask) { struct adf_accel_vf_info *vf_info; @@ -86,15 +103,13 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) int i; /* Disable VF2PF interrupts for VFs with pending ints */ - adf_disable_vf2pf_interrupts(accel_dev, vf_mask); + adf_disable_vf2pf_interrupts_irq(accel_dev, vf_mask); /* - * Schedule tasklets to handle VF2PF interrupt BHs - * unless the VF is malicious and is attempting to - * flood the host OS with VF2PF interrupts. + * Handle VF2PF interrupt unless the VF is malicious and + * is attempting to flood the host OS with VF2PF interrupts. */ - for_each_set_bit(i, (const unsigned long *)&vf_mask, - (sizeof(vf_mask) * BITS_PER_BYTE)) { + for_each_set_bit(i, &vf_mask, ADF_MAX_NUM_VFS) { vf_info = accel_dev->pf.vf_info + i; if (!__ratelimit(&vf_info->vf2pf_ratelimit)) { @@ -104,8 +119,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) continue; } - /* Tasklet will re-enable ints from this VF */ - tasklet_hi_schedule(&vf_info->vf2pf_bh_tasklet); + adf_schedule_vf2pf_handler(vf_info); irq_handled = true; } diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c index a1b77bd7a894..976b9ab7617c 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c @@ -11,28 +11,8 @@ #define ADF_DH895XCC_ERRMSK5 (ADF_DH895XCC_EP_OFFSET + 0xDC) #define ADF_DH895XCC_ERRMSK5_VF2PF_U_MASK(vf_mask) (vf_mask >> 16) -void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) -{ - struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev; - struct adf_hw_device_data *hw_data = accel_dev->hw_device; - void __iomem *pmisc_bar_addr = - pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr; - - ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x0); -} - -void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) -{ - struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev; - struct adf_hw_device_data *hw_data = accel_dev->hw_device; - void __iomem *pmisc_bar_addr = - pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr; - - ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x2); -} - -void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, - u32 vf_mask) +static void __adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, + u32 vf_mask) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; struct adf_bar *pmisc = @@ -55,7 +35,17 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, } } -void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) +void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) +{ + unsigned long flags; + + spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags); + __adf_enable_vf2pf_interrupts(accel_dev, vf_mask); + spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags); +} + +static void __adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, + u32 vf_mask) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; struct adf_bar *pmisc = @@ -78,6 +68,22 @@ void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) } } +void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) +{ + unsigned long flags; + + spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags); + __adf_disable_vf2pf_interrupts(accel_dev, vf_mask); + spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags); +} + +void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev, u32 vf_mask) +{ + spin_lock(&accel_dev->pf.vf2pf_ints_lock); + __adf_disable_vf2pf_interrupts(accel_dev, vf_mask); + spin_unlock(&accel_dev->pf.vf2pf_ints_lock); +} + static int __adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr) { struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev; @@ -186,7 +192,6 @@ int adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr) return ret; } -EXPORT_SYMBOL_GPL(adf_iov_putmsg); void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) { @@ -216,7 +221,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) resp = (ADF_PF2VF_MSGORIGIN_SYSTEM | (ADF_PF2VF_MSGTYPE_VERSION_RESP << ADF_PF2VF_MSGTYPE_SHIFT) | - (ADF_PFVF_COMPATIBILITY_VERSION << + (ADF_PFVF_COMPAT_THIS_VERSION << ADF_PF2VF_VERSION_RESP_VERS_SHIFT)); dev_dbg(&GET_DEV(accel_dev), @@ -226,19 +231,19 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) if (vf_compat_ver < hw_data->min_iov_compat_ver) { dev_err(&GET_DEV(accel_dev), "VF (vers %d) incompatible with PF (vers %d)\n", - vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION); + vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION); resp |= ADF_PF2VF_VF_INCOMPATIBLE << ADF_PF2VF_VERSION_RESP_RESULT_SHIFT; - } else if (vf_compat_ver > ADF_PFVF_COMPATIBILITY_VERSION) { + } else if (vf_compat_ver > ADF_PFVF_COMPAT_THIS_VERSION) { dev_err(&GET_DEV(accel_dev), "VF (vers %d) compat with PF (vers %d) unkn.\n", - vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION); + vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION); resp |= ADF_PF2VF_VF_COMPAT_UNKNOWN << ADF_PF2VF_VERSION_RESP_RESULT_SHIFT; } else { dev_dbg(&GET_DEV(accel_dev), "VF (vers %d) compatible with PF (vers %d)\n", - vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION); + vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION); resp |= ADF_PF2VF_VF_COMPATIBLE << ADF_PF2VF_VERSION_RESP_RESULT_SHIFT; } @@ -251,7 +256,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) resp = (ADF_PF2VF_MSGORIGIN_SYSTEM | (ADF_PF2VF_MSGTYPE_VERSION_RESP << ADF_PF2VF_MSGTYPE_SHIFT) | - (ADF_PFVF_COMPATIBILITY_VERSION << + (ADF_PFVF_COMPAT_THIS_VERSION << ADF_PF2VF_VERSION_RESP_VERS_SHIFT)); resp |= ADF_PF2VF_VF_COMPATIBLE << ADF_PF2VF_VERSION_RESP_RESULT_SHIFT; @@ -284,6 +289,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) /* re-enable interrupt on PF from this VF */ adf_enable_vf2pf_interrupts(accel_dev, (1 << vf_nr)); + return; err: dev_dbg(&GET_DEV(accel_dev), "Unknown message from VF%d (0x%x);\n", @@ -313,8 +319,10 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev) msg = ADF_VF2PF_MSGORIGIN_SYSTEM; msg |= ADF_VF2PF_MSGTYPE_COMPAT_VER_REQ << ADF_VF2PF_MSGTYPE_SHIFT; - msg |= ADF_PFVF_COMPATIBILITY_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT; - BUILD_BUG_ON(ADF_PFVF_COMPATIBILITY_VERSION > 255); + msg |= ADF_PFVF_COMPAT_THIS_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT; + BUILD_BUG_ON(ADF_PFVF_COMPAT_THIS_VERSION > 255); + + reinit_completion(&accel_dev->vf.iov_msg_completion); /* Send request from VF to PF */ ret = adf_iov_putmsg(accel_dev, msg, 0); @@ -338,14 +346,16 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev) break; case ADF_PF2VF_VF_COMPAT_UNKNOWN: /* VF is newer than PF and decides whether it is compatible */ - if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver) + if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver) { + accel_dev->vf.compatible = ADF_PF2VF_VF_COMPATIBLE; break; + } fallthrough; case ADF_PF2VF_VF_INCOMPATIBLE: dev_err(&GET_DEV(accel_dev), "PF (vers %d) and VF (vers %d) are not compatible\n", accel_dev->vf.pf_version, - ADF_PFVF_COMPATIBILITY_VERSION); + ADF_PFVF_COMPAT_THIS_VERSION); return -EINVAL; default: dev_err(&GET_DEV(accel_dev), diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h index 0690c031bfce..ffd43aa50b57 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h @@ -52,7 +52,7 @@ * IN_USE_BY pattern as part of a collision control scheme (see adf_iov_putmsg). */ -#define ADF_PFVF_COMPATIBILITY_VERSION 0x1 /* PF<->VF compat */ +#define ADF_PFVF_COMPAT_THIS_VERSION 0x1 /* PF<->VF compat */ /* PF->VF messages */ #define ADF_PF2VF_INT BIT(0) diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 8c822c2861c2..90ec057f9183 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -24,9 +24,8 @@ static void adf_iov_send_resp(struct work_struct *work) kfree(pf2vf_resp); } -static void adf_vf2pf_bh_handler(void *data) +void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info) { - struct adf_accel_vf_info *vf_info = (struct adf_accel_vf_info *)data; struct adf_pf2vf_resp *pf2vf_resp; pf2vf_resp = kzalloc(sizeof(*pf2vf_resp), GFP_ATOMIC); @@ -52,9 +51,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) vf_info->accel_dev = accel_dev; vf_info->vf_nr = i; - tasklet_init(&vf_info->vf2pf_bh_tasklet, - (void *)adf_vf2pf_bh_handler, - (unsigned long)vf_info); mutex_init(&vf_info->pf2vf_lock); ratelimit_state_init(&vf_info->vf2pf_ratelimit, DEFAULT_RATELIMIT_INTERVAL, @@ -110,8 +106,6 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) hw_data->configure_iov_threads(accel_dev, false); for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) { - tasklet_disable(&vf->vf2pf_bh_tasklet); - tasklet_kill(&vf->vf2pf_bh_tasklet); mutex_destroy(&vf->pf2vf_lock); } diff --git a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c index e85bd62d134a..3e25fac051b2 100644 --- a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c @@ -5,14 +5,14 @@ #include "adf_pf2vf_msg.h" /** - * adf_vf2pf_init() - send init msg to PF + * adf_vf2pf_notify_init() - send init msg to PF * @accel_dev: Pointer to acceleration VF device. * * Function sends an init message from the VF to a PF * * Return: 0 on success, error code otherwise. */ -int adf_vf2pf_init(struct adf_accel_dev *accel_dev) +int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev) { u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | (ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT)); @@ -25,17 +25,17 @@ int adf_vf2pf_init(struct adf_accel_dev *accel_dev) set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); return 0; } -EXPORT_SYMBOL_GPL(adf_vf2pf_init); +EXPORT_SYMBOL_GPL(adf_vf2pf_notify_init); /** - * adf_vf2pf_shutdown() - send shutdown msg to PF + * adf_vf2pf_notify_shutdown() - send shutdown msg to PF * @accel_dev: Pointer to acceleration VF device. * * Function sends a shutdown message from the VF to a PF * * Return: void */ -void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) +void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev) { u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | (ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT)); @@ -45,4 +45,4 @@ void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) dev_err(&GET_DEV(accel_dev), "Failed to send Shutdown event to PF\n"); } -EXPORT_SYMBOL_GPL(adf_vf2pf_shutdown); +EXPORT_SYMBOL_GPL(adf_vf2pf_notify_shutdown); diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index 888388acb6bd..7828a6573f3e 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -18,6 +18,7 @@ #include "adf_pf2vf_msg.h" #define ADF_VINTSOU_OFFSET 0x204 +#define ADF_VINTMSK_OFFSET 0x208 #define ADF_VINTSOU_BUN BIT(0) #define ADF_VINTSOU_PF2VF BIT(1) @@ -28,6 +29,27 @@ struct adf_vf_stop_data { struct work_struct work; }; +void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) +{ + struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + void __iomem *pmisc_bar_addr = + pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr; + + ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x0); +} + +void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) +{ + struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + void __iomem *pmisc_bar_addr = + pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr; + + ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x2); +} +EXPORT_SYMBOL_GPL(adf_disable_pf2vf_interrupts); + static int adf_enable_msi(struct adf_accel_dev *accel_dev) { struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev; @@ -160,11 +182,21 @@ static irqreturn_t adf_isr(int irq, void *privdata) struct adf_bar *pmisc = &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; void __iomem *pmisc_bar_addr = pmisc->virt_addr; - u32 v_int; + bool handled = false; + u32 v_int, v_mask; /* Read VF INT source CSR to determine the source of VF interrupt */ v_int = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTSOU_OFFSET); + /* Read VF INT mask CSR to determine which sources are masked */ + v_mask = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTMSK_OFFSET); + + /* + * Recompute v_int ignoring sources that are masked. This is to + * avoid rescheduling the tasklet for interrupts already handled + */ + v_int &= ~v_mask; + /* Check for PF2VF interrupt */ if (v_int & ADF_VINTSOU_PF2VF) { /* Disable PF to VF interrupt */ @@ -172,7 +204,7 @@ static irqreturn_t adf_isr(int irq, void *privdata) /* Schedule tasklet to handle interrupt BH */ tasklet_hi_schedule(&accel_dev->vf.pf2vf_bh_tasklet); - return IRQ_HANDLED; + handled = true; } /* Check bundle interrupt */ @@ -184,10 +216,10 @@ static irqreturn_t adf_isr(int irq, void *privdata) csr_ops->write_csr_int_flag_and_col(bank->csr_addr, bank->bank_number, 0); tasklet_hi_schedule(&bank->resp_handler); - return IRQ_HANDLED; + handled = true; } - return IRQ_NONE; + return handled ? IRQ_HANDLED : IRQ_NONE; } static int adf_request_msi_irq(struct adf_accel_dev *accel_dev) @@ -285,6 +317,30 @@ int adf_vf_isr_resource_alloc(struct adf_accel_dev *accel_dev) } EXPORT_SYMBOL_GPL(adf_vf_isr_resource_alloc); +/** + * adf_flush_vf_wq() - Flush workqueue for VF + * @accel_dev: Pointer to acceleration device. + * + * Function disables the PF/VF interrupts on the VF so that no new messages + * are received and flushes the workqueue 'adf_vf_stop_wq'. + * + * Return: void. + */ +void adf_flush_vf_wq(struct adf_accel_dev *accel_dev) +{ + adf_disable_pf2vf_interrupts(accel_dev); + + flush_workqueue(adf_vf_stop_wq); +} +EXPORT_SYMBOL_GPL(adf_flush_vf_wq); + +/** + * adf_init_vf_wq() - Init workqueue for VF + * + * Function init workqueue 'adf_vf_stop_wq' for VF. + * + * Return: 0 on success, error code otherwise. + */ int __init adf_init_vf_wq(void) { adf_vf_stop_wq = alloc_workqueue("adf_vf_stop_wq", WQ_MEM_RECLAIM, 0); diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 7dd7cd6c3ef8..0a9ce365a544 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -131,11 +131,6 @@ static u32 get_pf2vf_offset(u32 i) return ADF_DH895XCC_PF2VF_OFFSET(i); } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_DH895XCC_VINTMSK_OFFSET(i); -} - static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_device = accel_dev->hw_device; @@ -180,8 +175,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) ADF_DH895XCC_SMIA1_MASK); } -static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) +static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev) { + spin_lock_init(&accel_dev->pf.vf2pf_ints_lock); + return 0; } @@ -213,8 +210,6 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->get_num_aes = get_num_aes; hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; - hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_admin_info = adf_gen2_get_admin_info; hw_data->get_arb_info = adf_gen2_get_arb_info; hw_data->get_sram_bar_id = get_sram_bar_id; @@ -224,15 +219,17 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; hw_data->configure_iov_threads = configure_iov_threads; - hw_data->disable_iov = adf_disable_sriov; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; - hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_sbr; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->get_pf2vf_offset = get_pf2vf_offset; + hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms; + hw_data->disable_iov = adf_disable_sriov; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h index 4d613923d155..f99319cd4543 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h @@ -35,7 +35,6 @@ #define ADF_DH895XCC_ERRSSMSH_EN BIT(3) #define ADF_DH895XCC_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04)) -#define ADF_DH895XCC_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04)) /* AE to function mapping */ #define ADF_DH895XCC_AE2FUNC_MAP_GRP_A_NUM_REGS 96 diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index a9ec4357144c..3976a81bd99b 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_DH895XCC_DEVICE_NAME)) { @@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_save_state(pdev)) { dev_err(&pdev->dev, "Failed to save pci state\n"); ret = -ENOMEM; - goto out_err_free_reg; + goto out_err_disable_aer; } ret = qat_crypto_dev_config(accel_dev); if (ret) - goto out_err_free_reg; + goto out_err_disable_aer; ret = adf_dev_init(accel_dev); if (ret) @@ -229,6 +222,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adf_dev_stop(accel_dev); out_err_dev_shutdown: adf_dev_shutdown(accel_dev); +out_err_disable_aer: + adf_disable_aer(accel_dev); out_err_free_reg: pci_release_regions(accel_pci_dev->pci_dev); out_err_disable: diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index f14fb82ed6df..7c6ed6bc8abf 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i) return ADF_DH895XCCIOV_PF2VF_OFFSET; } -static u32 get_vintmsk_offset(u32 i) -{ - return ADF_DH895XCCIOV_VINTMSK_OFFSET; -} - static int adf_vf_int_noop(struct adf_accel_dev *accel_dev) { return 0; @@ -81,10 +76,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_vf_void_noop; hw_data->init_admin_comms = adf_vf_int_noop; hw_data->exit_admin_comms = adf_vf_void_noop; - hw_data->send_admin_init = adf_vf2pf_init; + hw_data->send_admin_init = adf_vf2pf_notify_init; hw_data->init_arb = adf_vf_int_noop; hw_data->exit_arb = adf_vf_void_noop; - hw_data->disable_iov = adf_vf2pf_shutdown; + hw_data->disable_iov = adf_vf2pf_notify_shutdown; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; hw_data->get_num_accels = get_num_accels; @@ -92,11 +87,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; hw_data->get_pf2vf_offset = get_pf2vf_offset; - hw_data->get_vintmsk_offset = get_vintmsk_offset; hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; - hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms; - hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h index 2bfcc67f8f39..306ebb71a408 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h @@ -13,7 +13,6 @@ #define ADF_DH895XCCIOV_ETR_BAR 0 #define ADF_DH895XCCIOV_ETR_MAX_BANKS 1 #define ADF_DH895XCCIOV_PF2VF_OFFSET 0x200 -#define ADF_DH895XCCIOV_VINTMSK_OFFSET 0x208 void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data); void adf_clean_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index 29999da716cc..99d90f3ea2b7 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set dma identifier */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { - dev_err(&pdev->dev, "No usable DMA configuration\n"); - ret = -EFAULT; - goto out_err_disable; - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } - - } else { - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto out_err_disable; } if (pci_request_regions(pdev, ADF_DH895XCCVF_DEVICE_NAME)) { @@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } + adf_flush_vf_wq(accel_dev); adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_cleanup_accel(accel_dev); diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index 080955a1dd9c..e2375d992308 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -187,9 +187,9 @@ static int virtcrypto_init_vqs(struct virtio_crypto *vi) if (ret) goto err_free; - get_online_cpus(); + cpus_read_lock(); virtcrypto_set_affinity(vi); - put_online_cpus(); + cpus_read_unlock(); return 0; diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c index 9f937b125ab0..60ccf3e90d7d 100644 --- a/drivers/firmware/smccc/smccc.c +++ b/drivers/firmware/smccc/smccc.c @@ -9,6 +9,7 @@ #include #include #include +#include #include static u32 smccc_version = ARM_SMCCC_VERSION_1_0; @@ -42,3 +43,19 @@ u32 arm_smccc_get_version(void) return smccc_version; } EXPORT_SYMBOL_GPL(arm_smccc_get_version); + +static int __init smccc_devices_init(void) +{ + struct platform_device *pdev; + + if (smccc_trng_available) { + pdev = platform_device_register_simple("smccc_trng", -1, + NULL, 0); + if (IS_ERR(pdev)) + pr_err("smccc_trng: could not register device: %ld\n", + PTR_ERR(pdev)); + } + + return 0; +} +device_initcall(smccc_devices_init); diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h index 7afd730d16ff..709f286e7b25 100644 --- a/include/crypto/sm4.h +++ b/include/crypto/sm4.h @@ -3,6 +3,7 @@ /* * Common values for the SM4 algorithm * Copyright (C) 2018 ARM Limited or its affiliates. + * Copyright (c) 2021 Tianjia Zhang */ #ifndef _CRYPTO_SM4_H @@ -15,17 +16,29 @@ #define SM4_BLOCK_SIZE 16 #define SM4_RKEY_WORDS 32 -struct crypto_sm4_ctx { +struct sm4_ctx { u32 rkey_enc[SM4_RKEY_WORDS]; u32 rkey_dec[SM4_RKEY_WORDS]; }; -int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len); -int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key, +/** + * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016 + * @ctx: The location where the computed key will be stored. + * @in_key: The supplied key. + * @key_len: The length of the supplied key. + * + * Returns 0 on success. The function fails only if an invalid key size (or + * pointer) is supplied. + */ +int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key, unsigned int key_len); -void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in); -void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in); +/** + * sm4_crypt_block - Encrypt or decrypt a single SM4 block + * @rk: The rkey_enc for encrypt or rkey_dec for decrypt + * @out: Buffer to store output data + * @in: Buffer containing the input data + */ +void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in); #endif diff --git a/include/linux/padata.h b/include/linux/padata.h index a433f13fc4bf..495b16b6b4d7 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -12,6 +12,7 @@ #ifndef PADATA_H #define PADATA_H +#include #include #include #include @@ -96,7 +97,7 @@ struct parallel_data { struct padata_shell *ps; struct padata_list __percpu *reorder_list; struct padata_serial_queue __percpu *squeue; - atomic_t refcnt; + refcount_t refcnt; unsigned int seq_nr; unsigned int processed; int cpu; diff --git a/kernel/padata.c b/kernel/padata.c index d4d3ba6e1728..18d3a5c699d8 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -9,19 +9,6 @@ * * Copyright (c) 2020 Oracle and/or its affiliates. * Author: Daniel Jordan - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #include @@ -211,7 +198,7 @@ int padata_do_parallel(struct padata_shell *ps, if ((pinst->flags & PADATA_RESET)) goto out; - atomic_inc(&pd->refcnt); + refcount_inc(&pd->refcnt); padata->pd = pd; padata->cb_cpu = *cb_cpu; @@ -383,7 +370,7 @@ static void padata_serial_worker(struct work_struct *serial_work) } local_bh_enable(); - if (atomic_sub_and_test(cnt, &pd->refcnt)) + if (refcount_sub_and_test(cnt, &pd->refcnt)) padata_free_pd(pd); } @@ -593,7 +580,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_shell *ps) padata_init_reorder_list(pd); padata_init_squeues(pd); pd->seq_nr = -1; - atomic_set(&pd->refcnt, 1); + refcount_set(&pd->refcnt, 1); spin_lock_init(&pd->lock); pd->cpu = cpumask_first(pd->cpumask.pcpu); INIT_WORK(&pd->reorder_work, invoke_padata_reorder); @@ -667,7 +654,7 @@ static int padata_replace(struct padata_instance *pinst) synchronize_rcu(); list_for_each_entry_continue_reverse(ps, &pinst->pslist, list) - if (atomic_dec_and_test(&ps->opd->refcnt)) + if (refcount_dec_and_test(&ps->opd->refcnt)) padata_free_pd(ps->opd); pinst->flags &= ~PADATA_RESET; @@ -733,7 +720,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, struct cpumask *serial_mask, *parallel_mask; int err = -EINVAL; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&pinst->lock); switch (cpumask_type) { @@ -753,7 +740,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, out: mutex_unlock(&pinst->lock); - put_online_cpus(); + cpus_read_unlock(); return err; } @@ -992,7 +979,7 @@ struct padata_instance *padata_alloc(const char *name) if (!pinst->parallel_wq) goto err_free_inst; - get_online_cpus(); + cpus_read_lock(); pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1, name); @@ -1026,7 +1013,7 @@ struct padata_instance *padata_alloc(const char *name) &pinst->cpu_dead_node); #endif - put_online_cpus(); + cpus_read_unlock(); return pinst; @@ -1036,7 +1023,7 @@ struct padata_instance *padata_alloc(const char *name) err_free_serial_wq: destroy_workqueue(pinst->serial_wq); err_put_cpus: - put_online_cpus(); + cpus_read_unlock(); destroy_workqueue(pinst->parallel_wq); err_free_inst: kfree(pinst); @@ -1074,9 +1061,9 @@ struct padata_shell *padata_alloc_shell(struct padata_instance *pinst) ps->pinst = pinst; - get_online_cpus(); + cpus_read_lock(); pd = padata_alloc_pd(ps); - put_online_cpus(); + cpus_read_unlock(); if (!pd) goto out_free_ps; diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 14c032de276e..545ccbddf6a1 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -128,3 +128,6 @@ config CRYPTO_LIB_CHACHA20POLY1305 config CRYPTO_LIB_SHA256 tristate + +config CRYPTO_LIB_SM4 + tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 3a435629d9ce..73205ed269ba 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -38,6 +38,9 @@ libpoly1305-y += poly1305.o obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o libsha256-y := sha256.o +obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o +libsm4-y := sm4.o + ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) libblake2s-y += blake2s-selftest.o libchacha20poly1305-y += chacha20poly1305-selftest.o diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index c64ac8bfb6a9..4055aa593ec4 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -73,7 +73,7 @@ void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, } EXPORT_SYMBOL(blake2s256_hmac); -static int __init mod_init(void) +static int __init blake2s_mod_init(void) { if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && WARN_ON(!blake2s_selftest())) @@ -81,12 +81,12 @@ static int __init mod_init(void) return 0; } -static void __exit mod_exit(void) +static void __exit blake2s_mod_exit(void) { } -module_init(mod_init); -module_exit(mod_exit); +module_init(blake2s_mod_init); +module_exit(blake2s_mod_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("BLAKE2s hash function"); MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c index c2fcdb98cc02..fa6a9440fc95 100644 --- a/lib/crypto/chacha20poly1305.c +++ b/lib/crypto/chacha20poly1305.c @@ -354,7 +354,7 @@ bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len } EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace); -static int __init mod_init(void) +static int __init chacha20poly1305_init(void) { if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && WARN_ON(!chacha20poly1305_selftest())) @@ -362,12 +362,12 @@ static int __init mod_init(void) return 0; } -static void __exit mod_exit(void) +static void __exit chacha20poly1305_exit(void) { } -module_init(mod_init); -module_exit(mod_exit); +module_init(chacha20poly1305_init); +module_exit(chacha20poly1305_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction"); MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c index fb29739e8c29..064b352c6907 100644 --- a/lib/crypto/curve25519.c +++ b/lib/crypto/curve25519.c @@ -13,7 +13,7 @@ #include #include -static int __init mod_init(void) +static int __init curve25519_init(void) { if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && WARN_ON(!curve25519_selftest())) @@ -21,12 +21,12 @@ static int __init mod_init(void) return 0; } -static void __exit mod_exit(void) +static void __exit curve25519_exit(void) { } -module_init(mod_init); -module_exit(mod_exit); +module_init(curve25519_init); +module_exit(curve25519_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Curve25519 scalar multiplication"); diff --git a/lib/crypto/sm4.c b/lib/crypto/sm4.c new file mode 100644 index 000000000000..633b59fed9db --- /dev/null +++ b/lib/crypto/sm4.c @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4, as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2018 ARM Limited or its affiliates. + * Copyright (c) 2021 Tianjia Zhang + */ + +#include +#include +#include + +static const u32 fk[4] = { + 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc +}; + +static const u32 __cacheline_aligned ck[32] = { + 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, + 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, + 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, + 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, + 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, + 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, + 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, + 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 +}; + +static const u8 __cacheline_aligned sbox[256] = { + 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, + 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, + 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, + 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, + 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, + 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, + 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, + 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, + 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, + 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, + 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, + 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, + 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, + 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, + 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, + 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, + 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, + 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, + 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, + 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, + 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, + 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, + 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, + 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, + 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, + 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, + 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, + 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, + 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, + 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, + 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, + 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 +}; + +static inline u32 sm4_t_non_lin_sub(u32 x) +{ + u32 out; + + out = (u32)sbox[x & 0xff]; + out |= (u32)sbox[(x >> 8) & 0xff] << 8; + out |= (u32)sbox[(x >> 16) & 0xff] << 16; + out |= (u32)sbox[(x >> 24) & 0xff] << 24; + + return out; +} + +static inline u32 sm4_key_lin_sub(u32 x) +{ + return x ^ rol32(x, 13) ^ rol32(x, 23); +} + +static inline u32 sm4_enc_lin_sub(u32 x) +{ + return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24); +} + +static inline u32 sm4_key_sub(u32 x) +{ + return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); +} + +static inline u32 sm4_enc_sub(u32 x) +{ + return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); +} + +static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk) +{ + return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk); +} + + +/** + * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016 + * @ctx: The location where the computed key will be stored. + * @in_key: The supplied key. + * @key_len: The length of the supplied key. + * + * Returns 0 on success. The function fails only if an invalid key size (or + * pointer) is supplied. + */ +int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key, + unsigned int key_len) +{ + u32 rk[4]; + const u32 *key = (u32 *)in_key; + int i; + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + rk[0] = get_unaligned_be32(&key[0]) ^ fk[0]; + rk[1] = get_unaligned_be32(&key[1]) ^ fk[1]; + rk[2] = get_unaligned_be32(&key[2]) ^ fk[2]; + rk[3] = get_unaligned_be32(&key[3]) ^ fk[3]; + + for (i = 0; i < 32; i += 4) { + rk[0] ^= sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]); + rk[1] ^= sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]); + rk[2] ^= sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]); + rk[3] ^= sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]); + + ctx->rkey_enc[i + 0] = rk[0]; + ctx->rkey_enc[i + 1] = rk[1]; + ctx->rkey_enc[i + 2] = rk[2]; + ctx->rkey_enc[i + 3] = rk[3]; + ctx->rkey_dec[31 - 0 - i] = rk[0]; + ctx->rkey_dec[31 - 1 - i] = rk[1]; + ctx->rkey_dec[31 - 2 - i] = rk[2]; + ctx->rkey_dec[31 - 3 - i] = rk[3]; + } + + return 0; +} +EXPORT_SYMBOL_GPL(sm4_expandkey); + +/** + * sm4_crypt_block - Encrypt or decrypt a single SM4 block + * @rk: The rkey_enc for encrypt or rkey_dec for decrypt + * @out: Buffer to store output data + * @in: Buffer containing the input data + */ +void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in) +{ + u32 x[4], i; + + x[0] = get_unaligned_be32(in + 0 * 4); + x[1] = get_unaligned_be32(in + 1 * 4); + x[2] = get_unaligned_be32(in + 2 * 4); + x[3] = get_unaligned_be32(in + 3 * 4); + + for (i = 0; i < 32; i += 4) { + x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]); + x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]); + x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]); + x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]); + } + + put_unaligned_be32(x[3 - 0], out + 0 * 4); + put_unaligned_be32(x[3 - 1], out + 1 * 4); + put_unaligned_be32(x[3 - 2], out + 2 * 4); + put_unaligned_be32(x[3 - 3], out + 3 * 4); +} +EXPORT_SYMBOL_GPL(sm4_crypt_block); + +MODULE_DESCRIPTION("Generic SM4 library"); +MODULE_LICENSE("GPL v2"); diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index 9a75ca3f7edf..bc81419f400c 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -148,7 +148,7 @@ int mpi_resize(MPI a, unsigned nlimbs) return 0; /* no need to do it */ if (a->d) { - p = kmalloc_array(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); + p = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); if (!p) return -ENOMEM; memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));