mirror of https://gitee.com/openkylin/linux.git
crypto: caam - simplify RNG implementation
Rework CAAM RNG implementation as follows: - Make use of the fact that HWRNG supports partial reads and will handle such cases gracefully by removing recursion in caam_read() - Convert blocking caam_read() codepath to do a single blocking job read directly into requested buffer, bypassing any intermediary buffers - Convert async caam_read() codepath into a simple single reader/single writer FIFO use-case, thus simplifying concurrency handling and delegating buffer read/write position management to KFIFO subsystem. - Leverage the same low level RNG data extraction code for both async and blocking caam_read() scenarios, get rid of the shared job descriptor and make non-shared one as a simple as possible (just HEADER + ALGORITHM OPERATION + FIFO STORE) - Split private context from DMA related memory, so that the former could be allocated without GFP_DMA. NOTE: On its face value this commit decreased throughput numbers reported by dd if=/dev/hwrng of=/dev/null bs=1 count=100K [iflag=nonblock] by about 15%, however commits that enable prediction resistance and limit JR total size impact the performance so much and move the bottleneck such as to make this regression irrelevant. NOTE: On the bright side, this commit reduces RNG in kernel DMA buffer memory usage from 2 x RN_BUF_SIZE (~256K) to 32K. Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com> Reviewed-by: Horia Geantă <horia.geanta@nxp.com> Cc: Chris Healy <cphealy@gmail.com> Cc: Lucas Stach <l.stach@pengutronix.de> Cc: Horia Geantă <horia.geanta@nxp.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Iuliana Prodan <iuliana.prodan@nxp.com> Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-imx@nxp.com Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
1517f63cd8
commit
2c5e88dc90
|
@ -7,35 +7,12 @@
|
|||
*
|
||||
* Based on caamalg.c crypto API driver.
|
||||
*
|
||||
* relationship between job descriptors to shared descriptors:
|
||||
*
|
||||
* --------------- --------------
|
||||
* | JobDesc #0 |-------------------->| ShareDesc |
|
||||
* | *(buffer 0) | |------------->| (generate) |
|
||||
* --------------- | | (move) |
|
||||
* | | (store) |
|
||||
* --------------- | --------------
|
||||
* | JobDesc #1 |------|
|
||||
* | *(buffer 1) |
|
||||
* ---------------
|
||||
*
|
||||
* A job desc looks like this:
|
||||
*
|
||||
* ---------------------
|
||||
* | Header |
|
||||
* | ShareDesc Pointer |
|
||||
* | SEQ_OUT_PTR |
|
||||
* | (output buffer) |
|
||||
* ---------------------
|
||||
*
|
||||
* The SharedDesc never changes, and each job descriptor points to one of two
|
||||
* buffers for each device, from which the data will be copied into the
|
||||
* requested destination
|
||||
*/
|
||||
|
||||
#include <linux/hw_random.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/kfifo.h>
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
|
@ -45,38 +22,26 @@
|
|||
#include "jr.h"
|
||||
#include "error.h"
|
||||
|
||||
#define CAAM_RNG_MAX_FIFO_STORE_SIZE U16_MAX
|
||||
|
||||
#define CAAM_RNG_FIFO_LEN SZ_32K /* Must be a multiple of 2 */
|
||||
|
||||
/*
|
||||
* Maximum buffer size: maximum number of random, cache-aligned bytes that
|
||||
* will be generated and moved to seq out ptr (extlen not allowed)
|
||||
* Length of used descriptors, see caam_init_desc()
|
||||
*/
|
||||
#define RN_BUF_SIZE (0xffff / L1_CACHE_BYTES * \
|
||||
L1_CACHE_BYTES)
|
||||
|
||||
/* length of descriptors */
|
||||
#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ_MAX * 2)
|
||||
#define DESC_RNG_LEN (3 * CAAM_CMD_SZ)
|
||||
|
||||
/* Buffer, its dma address and lock */
|
||||
struct buf_data {
|
||||
u8 buf[RN_BUF_SIZE] ____cacheline_aligned;
|
||||
dma_addr_t addr;
|
||||
struct completion filled;
|
||||
u32 hw_desc[DESC_JOB_O_LEN];
|
||||
#define BUF_NOT_EMPTY 0
|
||||
#define BUF_EMPTY 1
|
||||
#define BUF_PENDING 2 /* Empty, but with job pending --don't submit another */
|
||||
atomic_t empty;
|
||||
};
|
||||
#define CAAM_RNG_DESC_LEN (CAAM_CMD_SZ + \
|
||||
CAAM_CMD_SZ + \
|
||||
CAAM_CMD_SZ + CAAM_PTR_SZ_MAX)
|
||||
|
||||
/* rng per-device context */
|
||||
struct caam_rng_ctx {
|
||||
struct hwrng rng;
|
||||
struct device *jrdev;
|
||||
dma_addr_t sh_desc_dma;
|
||||
u32 sh_desc[DESC_RNG_LEN];
|
||||
unsigned int cur_buf_idx;
|
||||
int current_buf;
|
||||
struct buf_data bufs[2];
|
||||
struct device *ctrldev;
|
||||
void *desc_async;
|
||||
void *desc_sync;
|
||||
struct work_struct worker;
|
||||
struct kfifo fifo;
|
||||
};
|
||||
|
||||
static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r)
|
||||
|
@ -84,194 +49,116 @@ static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r)
|
|||
return (struct caam_rng_ctx *)r->priv;
|
||||
}
|
||||
|
||||
static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd)
|
||||
static void caam_rng_done(struct device *jrdev, u32 *desc, u32 err,
|
||||
void *context)
|
||||
{
|
||||
if (bd->addr)
|
||||
dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
}
|
||||
|
||||
static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx)
|
||||
{
|
||||
struct device *jrdev = ctx->jrdev;
|
||||
|
||||
if (ctx->sh_desc_dma)
|
||||
dma_unmap_single(jrdev, ctx->sh_desc_dma,
|
||||
desc_bytes(ctx->sh_desc), DMA_TO_DEVICE);
|
||||
rng_unmap_buf(jrdev, &ctx->bufs[0]);
|
||||
rng_unmap_buf(jrdev, &ctx->bufs[1]);
|
||||
}
|
||||
|
||||
static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context)
|
||||
{
|
||||
struct buf_data *bd;
|
||||
|
||||
bd = container_of(desc, struct buf_data, hw_desc[0]);
|
||||
struct completion *done = context;
|
||||
|
||||
if (err)
|
||||
caam_jr_strstatus(jrdev, err);
|
||||
|
||||
atomic_set(&bd->empty, BUF_NOT_EMPTY);
|
||||
complete(&bd->filled);
|
||||
|
||||
/* Buffer refilled, invalidate cache */
|
||||
dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE, DMA_FROM_DEVICE);
|
||||
|
||||
print_hex_dump_debug("rng refreshed buf@: ", DUMP_PREFIX_ADDRESS, 16, 4,
|
||||
bd->buf, RN_BUF_SIZE, 1);
|
||||
complete(done);
|
||||
}
|
||||
|
||||
static inline int submit_job(struct caam_rng_ctx *ctx, int to_current)
|
||||
static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma, int len)
|
||||
{
|
||||
struct buf_data *bd = &ctx->bufs[!(to_current ^ ctx->current_buf)];
|
||||
struct device *jrdev = ctx->jrdev;
|
||||
u32 *desc = bd->hw_desc;
|
||||
init_job_desc(desc, 0); /* + 1 cmd_sz */
|
||||
/* Generate random bytes: + 1 cmd_sz */
|
||||
append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
|
||||
/* Store bytes: + 1 cmd_sz + caam_ptr_sz */
|
||||
append_fifo_store(desc, dst_dma, len, FIFOST_TYPE_RNGSTORE);
|
||||
|
||||
print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS,
|
||||
16, 4, desc, desc_bytes(desc), 1);
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
static int caam_rng_read_one(struct device *jrdev,
|
||||
void *dst, int len,
|
||||
void *desc,
|
||||
struct completion *done)
|
||||
{
|
||||
dma_addr_t dst_dma;
|
||||
int err;
|
||||
|
||||
dev_dbg(jrdev, "submitting job %d\n", !(to_current ^ ctx->current_buf));
|
||||
init_completion(&bd->filled);
|
||||
err = caam_jr_enqueue(jrdev, desc, rng_done, ctx);
|
||||
if (err != -EINPROGRESS)
|
||||
complete(&bd->filled); /* don't wait on failed job*/
|
||||
else
|
||||
atomic_inc(&bd->empty); /* note if pending */
|
||||
len = min_t(int, len, CAAM_RNG_MAX_FIFO_STORE_SIZE);
|
||||
|
||||
return err;
|
||||
dst_dma = dma_map_single(jrdev, dst, len, DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(jrdev, dst_dma)) {
|
||||
dev_err(jrdev, "unable to map destination memory\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
init_completion(done);
|
||||
err = caam_jr_enqueue(jrdev,
|
||||
caam_init_desc(desc, dst_dma, len),
|
||||
caam_rng_done, done);
|
||||
if (err == -EINPROGRESS) {
|
||||
wait_for_completion(done);
|
||||
err = 0;
|
||||
}
|
||||
|
||||
dma_unmap_single(jrdev, dst_dma, len, DMA_FROM_DEVICE);
|
||||
|
||||
return err ?: len;
|
||||
}
|
||||
|
||||
static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait)
|
||||
static void caam_rng_fill_async(struct caam_rng_ctx *ctx)
|
||||
{
|
||||
struct scatterlist sg[1];
|
||||
struct completion done;
|
||||
int len, nents;
|
||||
|
||||
sg_init_table(sg, ARRAY_SIZE(sg));
|
||||
nents = kfifo_dma_in_prepare(&ctx->fifo, sg, ARRAY_SIZE(sg),
|
||||
CAAM_RNG_FIFO_LEN);
|
||||
if (!nents)
|
||||
return;
|
||||
|
||||
len = caam_rng_read_one(ctx->jrdev, sg_virt(&sg[0]),
|
||||
sg[0].length,
|
||||
ctx->desc_async,
|
||||
&done);
|
||||
if (len < 0)
|
||||
return;
|
||||
|
||||
kfifo_dma_in_finish(&ctx->fifo, len);
|
||||
}
|
||||
|
||||
static void caam_rng_worker(struct work_struct *work)
|
||||
{
|
||||
struct caam_rng_ctx *ctx = container_of(work, struct caam_rng_ctx,
|
||||
worker);
|
||||
caam_rng_fill_async(ctx);
|
||||
}
|
||||
|
||||
static int caam_read(struct hwrng *rng, void *dst, size_t max, bool wait)
|
||||
{
|
||||
struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
|
||||
struct buf_data *bd = &ctx->bufs[ctx->current_buf];
|
||||
int next_buf_idx, copied_idx;
|
||||
int err;
|
||||
int out;
|
||||
|
||||
if (atomic_read(&bd->empty)) {
|
||||
/* try to submit job if there wasn't one */
|
||||
if (atomic_read(&bd->empty) == BUF_EMPTY) {
|
||||
err = submit_job(ctx, 1);
|
||||
/* if can't submit job, can't even wait */
|
||||
if (err != -EINPROGRESS)
|
||||
return 0;
|
||||
}
|
||||
/* no immediate data, so exit if not waiting */
|
||||
if (!wait)
|
||||
return 0;
|
||||
if (wait) {
|
||||
struct completion done;
|
||||
|
||||
/* waiting for pending job */
|
||||
if (atomic_read(&bd->empty))
|
||||
wait_for_completion(&bd->filled);
|
||||
return caam_rng_read_one(ctx->jrdev, dst, max,
|
||||
ctx->desc_sync, &done);
|
||||
}
|
||||
|
||||
next_buf_idx = ctx->cur_buf_idx + max;
|
||||
dev_dbg(ctx->jrdev, "%s: start reading at buffer %d, idx %d\n",
|
||||
__func__, ctx->current_buf, ctx->cur_buf_idx);
|
||||
out = kfifo_out(&ctx->fifo, dst, max);
|
||||
if (kfifo_len(&ctx->fifo) <= CAAM_RNG_FIFO_LEN / 2)
|
||||
schedule_work(&ctx->worker);
|
||||
|
||||
/* if enough data in current buffer */
|
||||
if (next_buf_idx < RN_BUF_SIZE) {
|
||||
memcpy(data, bd->buf + ctx->cur_buf_idx, max);
|
||||
ctx->cur_buf_idx = next_buf_idx;
|
||||
return max;
|
||||
}
|
||||
|
||||
/* else, copy what's left... */
|
||||
copied_idx = RN_BUF_SIZE - ctx->cur_buf_idx;
|
||||
memcpy(data, bd->buf + ctx->cur_buf_idx, copied_idx);
|
||||
ctx->cur_buf_idx = 0;
|
||||
atomic_set(&bd->empty, BUF_EMPTY);
|
||||
|
||||
/* ...refill... */
|
||||
submit_job(ctx, 1);
|
||||
|
||||
/* and use next buffer */
|
||||
ctx->current_buf = !ctx->current_buf;
|
||||
dev_dbg(ctx->jrdev, "switched to buffer %d\n", ctx->current_buf);
|
||||
|
||||
/* since there already is some data read, don't wait */
|
||||
return copied_idx + caam_read(rng, data + copied_idx,
|
||||
max - copied_idx, false);
|
||||
}
|
||||
|
||||
static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx)
|
||||
{
|
||||
struct device *jrdev = ctx->jrdev;
|
||||
u32 *desc = ctx->sh_desc;
|
||||
|
||||
init_sh_desc(desc, HDR_SHARE_SERIAL);
|
||||
|
||||
/* Generate random bytes */
|
||||
append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
|
||||
|
||||
/* Store bytes */
|
||||
append_seq_fifo_store(desc, RN_BUF_SIZE, FIFOST_TYPE_RNGSTORE);
|
||||
|
||||
ctx->sh_desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
|
||||
DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(jrdev, ctx->sh_desc_dma)) {
|
||||
dev_err(jrdev, "unable to map shared descriptor\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
print_hex_dump_debug("rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
|
||||
desc, desc_bytes(desc), 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id)
|
||||
{
|
||||
struct device *jrdev = ctx->jrdev;
|
||||
struct buf_data *bd = &ctx->bufs[buf_id];
|
||||
u32 *desc = bd->hw_desc;
|
||||
int sh_len = desc_len(ctx->sh_desc);
|
||||
|
||||
init_job_desc_shared(desc, ctx->sh_desc_dma, sh_len, HDR_SHARE_DEFER |
|
||||
HDR_REVERSE);
|
||||
|
||||
bd->addr = dma_map_single(jrdev, bd->buf, RN_BUF_SIZE, DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(jrdev, bd->addr)) {
|
||||
dev_err(jrdev, "unable to map dst\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0);
|
||||
|
||||
print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
|
||||
desc, desc_bytes(desc), 1);
|
||||
|
||||
return 0;
|
||||
return out;
|
||||
}
|
||||
|
||||
static void caam_cleanup(struct hwrng *rng)
|
||||
{
|
||||
struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
|
||||
int i;
|
||||
struct buf_data *bd;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
bd = &ctx->bufs[i];
|
||||
if (atomic_read(&bd->empty) == BUF_PENDING)
|
||||
wait_for_completion(&bd->filled);
|
||||
}
|
||||
|
||||
rng_unmap_ctx(ctx);
|
||||
flush_work(&ctx->worker);
|
||||
caam_jr_free(ctx->jrdev);
|
||||
}
|
||||
|
||||
static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id)
|
||||
{
|
||||
struct buf_data *bd = &ctx->bufs[buf_id];
|
||||
int err;
|
||||
|
||||
err = rng_create_job_desc(ctx, buf_id);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
atomic_set(&bd->empty, BUF_EMPTY);
|
||||
submit_job(ctx, buf_id == ctx->current_buf);
|
||||
wait_for_completion(&bd->filled);
|
||||
|
||||
return 0;
|
||||
kfifo_free(&ctx->fifo);
|
||||
}
|
||||
|
||||
static int caam_init(struct hwrng *rng)
|
||||
|
@ -279,33 +166,36 @@ static int caam_init(struct hwrng *rng)
|
|||
struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
|
||||
int err;
|
||||
|
||||
ctx->desc_sync = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN,
|
||||
GFP_DMA | GFP_KERNEL);
|
||||
if (!ctx->desc_sync)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->desc_async = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN,
|
||||
GFP_DMA | GFP_KERNEL);
|
||||
if (!ctx->desc_async)
|
||||
return -ENOMEM;
|
||||
|
||||
if (kfifo_alloc(&ctx->fifo, CAAM_RNG_FIFO_LEN, GFP_DMA | GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_WORK(&ctx->worker, caam_rng_worker);
|
||||
|
||||
ctx->jrdev = caam_jr_alloc();
|
||||
err = PTR_ERR_OR_ZERO(ctx->jrdev);
|
||||
if (err) {
|
||||
kfifo_free(&ctx->fifo);
|
||||
pr_err("Job Ring Device allocation for transform failed\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
err = rng_create_sh_desc(ctx);
|
||||
if (err)
|
||||
goto free_jrdev;
|
||||
|
||||
ctx->current_buf = 0;
|
||||
ctx->cur_buf_idx = 0;
|
||||
|
||||
err = caam_init_buf(ctx, 0);
|
||||
if (err)
|
||||
goto free_jrdev;
|
||||
|
||||
err = caam_init_buf(ctx, 1);
|
||||
if (err)
|
||||
goto free_jrdev;
|
||||
/*
|
||||
* Fill async buffer to have early randomness data for
|
||||
* hw_random
|
||||
*/
|
||||
caam_rng_fill_async(ctx);
|
||||
|
||||
return 0;
|
||||
|
||||
free_jrdev:
|
||||
caam_jr_free(ctx->jrdev);
|
||||
return err;
|
||||
}
|
||||
|
||||
int caam_rng_init(struct device *ctrldev);
|
||||
|
@ -335,10 +225,12 @@ int caam_rng_init(struct device *ctrldev)
|
|||
if (!devres_open_group(ctrldev, caam_rng_init, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_DMA | GFP_KERNEL);
|
||||
ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_KERNEL);
|
||||
if (!ctx)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->ctrldev = ctrldev;
|
||||
|
||||
ctx->rng.name = "rng-caam";
|
||||
ctx->rng.init = caam_init;
|
||||
ctx->rng.cleanup = caam_cleanup;
|
||||
|
|
Loading…
Reference in New Issue