RDMA/hns: Optimize eqe buffer allocation flow

The eqe has a private multi-hop addressing implementation, but there is
already a set of interfaces in the hns driver that can achieve this.

So, simplify the eqe buffer allocation process by using the mtr interface
and remove large amount of repeated logic.

Link: https://lore.kernel.org/r/20200126145835.11368-1-liweihang@huawei.com
Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Xi Wang 2020-01-26 22:58:35 +08:00 committed by Jason Gunthorpe
parent b14c95bee8
commit d7e2d3432a
2 changed files with 108 additions and 383 deletions

View File

@ -762,14 +762,8 @@ struct hns_roce_eq {
int eqe_ba_pg_sz;
int eqe_buf_pg_sz;
int hop_num;
u64 *bt_l0; /* Base address table for L0 */
u64 **bt_l1; /* Base address table for L1 */
u64 **buf;
dma_addr_t l0_dma;
dma_addr_t *l1_dma;
dma_addr_t *buf_dma;
u32 l0_last_num; /* L0 last chunk num */
u32 l1_last_num; /* L1 last chunk num */
struct hns_roce_mtr mtr;
struct hns_roce_buf buf;
int eq_max_cnt;
int eq_period;
int shift;

View File

@ -5326,44 +5326,24 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
hns_roce_write64(hr_dev, doorbell, eq->doorbell);
}
static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset)
{
u32 buf_chk_sz;
unsigned long off;
buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) +
off % buf_chk_sz);
}
static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry)
{
u32 buf_chk_sz;
unsigned long off;
buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) +
off % buf_chk_sz);
if (eq->buf.nbufs == 1)
return eq->buf.direct.buf + offset % buf_chk_sz;
else
return (struct hns_roce_aeqe *)((u8 *)
(eq->buf[off / buf_chk_sz]) + off % buf_chk_sz);
return eq->buf.page_list[offset / buf_chk_sz].buf +
offset % buf_chk_sz;
}
static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
{
struct hns_roce_aeqe *aeqe;
if (!eq->hop_num)
aeqe = get_aeqe_v2(eq, eq->cons_index);
else
aeqe = mhop_get_aeqe(eq, eq->cons_index);
aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
HNS_ROCE_AEQ_ENTRY_SIZE);
return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
}
@ -5456,44 +5436,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
return aeqe_found;
}
static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry)
{
u32 buf_chk_sz;
unsigned long off;
buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) +
off % buf_chk_sz);
}
static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry)
{
u32 buf_chk_sz;
unsigned long off;
buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) +
off % buf_chk_sz);
else
return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off /
buf_chk_sz]) + off % buf_chk_sz);
}
static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
{
struct hns_roce_ceqe *ceqe;
if (!eq->hop_num)
ceqe = get_ceqe_v2(eq, eq->cons_index);
else
ceqe = mhop_get_ceqe(eq, eq->cons_index);
ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
HNS_ROCE_CEQ_ENTRY_SIZE);
return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
@ -5653,90 +5601,11 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
}
static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
struct hns_roce_eq *eq)
static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
{
struct device *dev = hr_dev->dev;
u64 idx;
u64 size;
u32 buf_chk_sz;
u32 bt_chk_sz;
u32 mhop_num;
int eqe_alloc;
int i = 0;
int j = 0;
mhop_num = hr_dev->caps.eqe_hop_num;
buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
if (mhop_num == HNS_ROCE_HOP_NUM_0) {
dma_free_coherent(dev, (unsigned int)(eq->entries *
eq->eqe_size), eq->bt_l0, eq->l0_dma);
return;
}
dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
if (mhop_num == 1) {
for (i = 0; i < eq->l0_last_num; i++) {
if (i == eq->l0_last_num - 1) {
eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
size = (eq->entries - eqe_alloc) * eq->eqe_size;
dma_free_coherent(dev, size, eq->buf[i],
eq->buf_dma[i]);
break;
}
dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
eq->buf_dma[i]);
}
} else if (mhop_num == 2) {
for (i = 0; i < eq->l0_last_num; i++) {
dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
eq->l1_dma[i]);
for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
idx = i * (bt_chk_sz / BA_BYTE_LEN) + j;
if ((i == eq->l0_last_num - 1)
&& j == eq->l1_last_num - 1) {
eqe_alloc = (buf_chk_sz / eq->eqe_size)
* idx;
size = (eq->entries - eqe_alloc)
* eq->eqe_size;
dma_free_coherent(dev, size,
eq->buf[idx],
eq->buf_dma[idx]);
break;
}
dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
eq->buf_dma[idx]);
}
}
}
kfree(eq->buf_dma);
kfree(eq->buf);
kfree(eq->l1_dma);
kfree(eq->bt_l1);
eq->buf_dma = NULL;
eq->buf = NULL;
eq->l1_dma = NULL;
eq->bt_l1 = NULL;
}
static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
struct hns_roce_eq *eq)
{
u32 buf_chk_sz;
buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
if (hr_dev->caps.eqe_hop_num) {
hns_roce_mhop_free_eq(hr_dev, eq);
return;
}
dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
eq->buf_list->map);
kfree(eq->buf_list);
if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0)
hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf);
}
static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
@ -5744,6 +5613,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
void *mb_buf)
{
struct hns_roce_eq_context *eqc;
u64 ba[MTT_MIN_COUNT] = { 0 };
int count;
eqc = mb_buf;
memset(eqc, 0, sizeof(struct hns_roce_eq_context));
@ -5759,10 +5630,23 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
eq->shift = ilog2((unsigned int)eq->entries);
if (!eq->hop_num)
eq->eqe_ba = eq->buf_list->map;
else
eq->eqe_ba = eq->l0_dma;
/* if not muti-hop, eqe buffer only use one trunk */
if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) {
eq->eqe_ba = eq->buf.direct.map;
eq->cur_eqe_ba = eq->eqe_ba;
if (eq->buf.npages > 1)
eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz);
else
eq->nxt_eqe_ba = eq->eqe_ba;
} else {
count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba,
MTT_MIN_COUNT, &eq->eqe_ba);
eq->cur_eqe_ba = ba[0];
if (count > 1)
eq->nxt_eqe_ba = ba[1];
else
eq->nxt_eqe_ba = ba[0];
}
/* set eqc state */
roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S,
@ -5860,220 +5744,97 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
}
static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
struct hns_roce_eq *eq)
static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
u32 page_shift)
{
struct device *dev = hr_dev->dev;
int eq_alloc_done = 0;
int eq_buf_cnt = 0;
int eqe_alloc;
u32 buf_chk_sz;
u32 bt_chk_sz;
u32 mhop_num;
u64 size;
u64 idx;
struct hns_roce_buf_region region = {};
dma_addr_t *buf_list = NULL;
int ba_num;
int bt_num;
int record_i;
int record_j;
int i = 0;
int j = 0;
mhop_num = hr_dev->caps.eqe_hop_num;
buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
int ret;
ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size),
buf_chk_sz);
bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN);
1 << page_shift);
hns_roce_init_buf_region(&region, hr_dev->caps.eqe_hop_num, 0, ba_num);
if (mhop_num == HNS_ROCE_HOP_NUM_0) {
if (eq->entries > buf_chk_sz / eq->eqe_size) {
dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
eq->entries);
return -EINVAL;
}
eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size,
&(eq->l0_dma), GFP_KERNEL);
if (!eq->bt_l0)
return -ENOMEM;
eq->cur_eqe_ba = eq->l0_dma;
eq->nxt_eqe_ba = 0;
return 0;
/* alloc a tmp list for storing eq buf address */
ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
if (ret) {
dev_err(hr_dev->dev, "alloc eq buf_list error\n");
return ret;
}
eq->buf_dma = kcalloc(ba_num, sizeof(*eq->buf_dma), GFP_KERNEL);
if (!eq->buf_dma)
return -ENOMEM;
eq->buf = kcalloc(ba_num, sizeof(*eq->buf), GFP_KERNEL);
if (!eq->buf)
goto err_kcalloc_buf;
if (mhop_num == 2) {
eq->l1_dma = kcalloc(bt_num, sizeof(*eq->l1_dma), GFP_KERNEL);
if (!eq->l1_dma)
goto err_kcalloc_l1_dma;
eq->bt_l1 = kcalloc(bt_num, sizeof(*eq->bt_l1), GFP_KERNEL);
if (!eq->bt_l1)
goto err_kcalloc_bt_l1;
ba_num = hns_roce_get_kmem_bufs(hr_dev, buf_list, region.count,
region.offset, &eq->buf);
if (ba_num != region.count) {
dev_err(hr_dev->dev, "get eqe buf err,expect %d,ret %d.\n",
region.count, ba_num);
ret = -ENOBUFS;
goto done;
}
/* alloc L0 BT */
eq->bt_l0 = dma_alloc_coherent(dev, bt_chk_sz, &eq->l0_dma, GFP_KERNEL);
if (!eq->bt_l0)
goto err_dma_alloc_l0;
hns_roce_mtr_init(&eq->mtr, PAGE_SHIFT + hr_dev->caps.eqe_ba_pg_sz,
page_shift);
ret = hns_roce_mtr_attach(hr_dev, &eq->mtr, &buf_list, &region, 1);
if (ret)
dev_err(hr_dev->dev, "mtr attatch error for eqe\n");
if (mhop_num == 1) {
if (ba_num > (bt_chk_sz / BA_BYTE_LEN))
dev_err(dev, "ba_num %d is too large for 1 hop\n",
ba_num);
goto done;
/* alloc buf */
for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) {
if (eq_buf_cnt + 1 < ba_num) {
size = buf_chk_sz;
} else {
eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
size = (eq->entries - eqe_alloc) * eq->eqe_size;
}
eq->buf[i] = dma_alloc_coherent(dev, size,
&(eq->buf_dma[i]),
GFP_KERNEL);
if (!eq->buf[i])
goto err_dma_alloc_buf;
hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
done:
hns_roce_free_buf_list(&buf_list, 1);
*(eq->bt_l0 + i) = eq->buf_dma[i];
return ret;
}
eq_buf_cnt++;
if (eq_buf_cnt >= ba_num)
break;
}
eq->cur_eqe_ba = eq->buf_dma[0];
if (ba_num > 1)
eq->nxt_eqe_ba = eq->buf_dma[1];
static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
{
struct hns_roce_buf *buf = &eq->buf;
bool is_mhop = false;
u32 page_shift;
u32 mhop_num;
u32 max_size;
int ret;
} else if (mhop_num == 2) {
/* alloc L1 BT and buf */
for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) {
eq->bt_l1[i] = dma_alloc_coherent(dev, bt_chk_sz,
&(eq->l1_dma[i]),
GFP_KERNEL);
if (!eq->bt_l1[i])
goto err_dma_alloc_l1;
*(eq->bt_l0 + i) = eq->l1_dma[i];
for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
idx = i * bt_chk_sz / BA_BYTE_LEN + j;
if (eq_buf_cnt + 1 < ba_num) {
size = buf_chk_sz;
} else {
eqe_alloc = (buf_chk_sz / eq->eqe_size)
* idx;
size = (eq->entries - eqe_alloc)
* eq->eqe_size;
}
eq->buf[idx] = dma_alloc_coherent(dev, size,
&(eq->buf_dma[idx]),
GFP_KERNEL);
if (!eq->buf[idx])
goto err_dma_alloc_buf;
*(eq->bt_l1[i] + j) = eq->buf_dma[idx];
eq_buf_cnt++;
if (eq_buf_cnt >= ba_num) {
eq_alloc_done = 1;
break;
}
}
if (eq_alloc_done)
break;
}
eq->cur_eqe_ba = eq->buf_dma[0];
if (ba_num > 1)
eq->nxt_eqe_ba = eq->buf_dma[1];
page_shift = PAGE_SHIFT + hr_dev->caps.eqe_buf_pg_sz;
mhop_num = hr_dev->caps.eqe_hop_num;
if (!mhop_num) {
max_size = 1 << page_shift;
buf->size = max_size;
} else if (mhop_num == HNS_ROCE_HOP_NUM_0) {
max_size = eq->entries * eq->eqe_size;
buf->size = max_size;
} else {
max_size = 1 << page_shift;
buf->size = PAGE_ALIGN(eq->entries * eq->eqe_size);
is_mhop = true;
}
eq->l0_last_num = i + 1;
if (mhop_num == 2)
eq->l1_last_num = j + 1;
ret = hns_roce_buf_alloc(hr_dev, buf->size, max_size, buf, page_shift);
if (ret) {
dev_err(hr_dev->dev, "alloc eq buf error\n");
return ret;
}
if (is_mhop) {
ret = map_eq_buf(hr_dev, eq, page_shift);
if (ret) {
dev_err(hr_dev->dev, "map roce buf error\n");
goto err_alloc;
}
}
return 0;
err_dma_alloc_l1:
dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
eq->bt_l0 = NULL;
eq->l0_dma = 0;
for (i -= 1; i >= 0; i--) {
dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
eq->l1_dma[i]);
for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
idx = i * bt_chk_sz / BA_BYTE_LEN + j;
dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
eq->buf_dma[idx]);
}
}
goto err_dma_alloc_l0;
err_dma_alloc_buf:
dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
eq->bt_l0 = NULL;
eq->l0_dma = 0;
if (mhop_num == 1)
for (i -= 1; i >= 0; i--)
dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
eq->buf_dma[i]);
else if (mhop_num == 2) {
record_i = i;
record_j = j;
for (; i >= 0; i--) {
dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
eq->l1_dma[i]);
for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
if (i == record_i && j >= record_j)
break;
idx = i * bt_chk_sz / BA_BYTE_LEN + j;
dma_free_coherent(dev, buf_chk_sz,
eq->buf[idx],
eq->buf_dma[idx]);
}
}
}
err_dma_alloc_l0:
kfree(eq->bt_l1);
eq->bt_l1 = NULL;
err_kcalloc_bt_l1:
kfree(eq->l1_dma);
eq->l1_dma = NULL;
err_kcalloc_l1_dma:
kfree(eq->buf);
eq->buf = NULL;
err_kcalloc_buf:
kfree(eq->buf_dma);
eq->buf_dma = NULL;
return -ENOMEM;
err_alloc:
hns_roce_buf_free(hr_dev, buf->size, buf);
return ret;
}
static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
struct hns_roce_eq *eq,
unsigned int eq_cmd)
{
struct device *dev = hr_dev->dev;
struct hns_roce_cmd_mailbox *mailbox;
u32 buf_chk_sz = 0;
int ret;
/* Allocate mailbox memory */
@ -6081,38 +5842,17 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
if (!hr_dev->caps.eqe_hop_num) {
buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
eq->buf_list = kzalloc(sizeof(struct hns_roce_buf_list),
GFP_KERNEL);
if (!eq->buf_list) {
ret = -ENOMEM;
goto free_cmd_mbox;
}
eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz,
&(eq->buf_list->map),
GFP_KERNEL);
if (!eq->buf_list->buf) {
ret = -ENOMEM;
goto err_alloc_buf;
}
} else {
ret = hns_roce_mhop_alloc_eq(hr_dev, eq);
if (ret) {
ret = -ENOMEM;
goto free_cmd_mbox;
}
ret = alloc_eq_buf(hr_dev, eq);
if (ret) {
ret = -ENOMEM;
goto free_cmd_mbox;
}
hns_roce_config_eqc(hr_dev, eq, mailbox->buf);
ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0,
eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS);
if (ret) {
dev_err(dev, "[mailbox cmd] create eqc failed.\n");
dev_err(hr_dev->dev, "[mailbox cmd] create eqc failed.\n");
goto err_cmd_mbox;
}
@ -6121,16 +5861,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
return 0;
err_cmd_mbox:
if (!hr_dev->caps.eqe_hop_num)
dma_free_coherent(dev, buf_chk_sz, eq->buf_list->buf,
eq->buf_list->map);
else {
hns_roce_mhop_free_eq(hr_dev, eq);
goto free_cmd_mbox;
}
err_alloc_buf:
kfree(eq->buf_list);
free_eq_buf(hr_dev, eq);
free_cmd_mbox:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@ -6310,7 +6041,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
err_create_eq_fail:
for (i -= 1; i >= 0; i--)
hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
free_eq_buf(hr_dev, &eq_table->eq[i]);
kfree(eq_table->eq);
return ret;
@ -6332,7 +6063,7 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < eq_num; i++) {
hns_roce_v2_destroy_eqc(hr_dev, i);
hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
free_eq_buf(hr_dev, &eq_table->eq[i]);
}
kfree(eq_table->eq);