scsi: hisi_sas: optimise the usage of hisi_hba.lock

Currently hisi_hba.lock is locked to deliver and receive a command
to/from any hw queue. This causes much contention at high data-rates.

To boost performance, lock on a per queue basis for sending and
receiving commands to/from hw.

Certain critical regions still need to be locked in the delivery and
completion stages with hisi_hba.lock.

New element hisi_sas_device.dq is added to store the delivery queue for
a device, so it does not need to be needlessly re-calculated for every
task.

Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
Xiang Chen 2017-06-14 23:33:13 +08:00 committed by Martin K. Petersen
parent ad6048325c
commit b1a49412f0
4 changed files with 77 additions and 58 deletions

View File

@ -102,6 +102,8 @@ struct hisi_sas_cq {
struct hisi_sas_dq { struct hisi_sas_dq {
struct hisi_hba *hisi_hba; struct hisi_hba *hisi_hba;
struct hisi_sas_slot *slot_prep;
spinlock_t lock;
int wr_point; int wr_point;
int id; int id;
}; };
@ -109,6 +111,7 @@ struct hisi_sas_dq {
struct hisi_sas_device { struct hisi_sas_device {
struct hisi_hba *hisi_hba; struct hisi_hba *hisi_hba;
struct domain_device *sas_device; struct domain_device *sas_device;
struct hisi_sas_dq *dq;
struct list_head list; struct list_head list;
u64 attached_phy; u64 attached_phy;
atomic64_t running_req; atomic64_t running_req;
@ -154,9 +157,8 @@ struct hisi_sas_hw {
struct domain_device *device); struct domain_device *device);
struct hisi_sas_device *(*alloc_dev)(struct domain_device *device); struct hisi_sas_device *(*alloc_dev)(struct domain_device *device);
void (*sl_notify)(struct hisi_hba *hisi_hba, int phy_no); void (*sl_notify)(struct hisi_hba *hisi_hba, int phy_no);
int (*get_free_slot)(struct hisi_hba *hisi_hba, u32 dev_id, int (*get_free_slot)(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq);
int *q, int *s); void (*start_delivery)(struct hisi_sas_dq *dq);
void (*start_delivery)(struct hisi_hba *hisi_hba);
int (*prep_ssp)(struct hisi_hba *hisi_hba, int (*prep_ssp)(struct hisi_hba *hisi_hba,
struct hisi_sas_slot *slot, int is_tmf, struct hisi_sas_slot *slot, int is_tmf,
struct hisi_sas_tmf_task *tmf); struct hisi_sas_tmf_task *tmf);
@ -217,7 +219,6 @@ struct hisi_hba {
struct hisi_sas_port port[HISI_SAS_MAX_PHYS]; struct hisi_sas_port port[HISI_SAS_MAX_PHYS];
int queue_count; int queue_count;
struct hisi_sas_slot *slot_prep;
struct dma_pool *sge_page_pool; struct dma_pool *sge_page_pool;
struct hisi_sas_device devices[HISI_SAS_MAX_DEVICES]; struct hisi_sas_device devices[HISI_SAS_MAX_DEVICES];

View File

@ -179,10 +179,11 @@ static void hisi_sas_slot_abort(struct work_struct *work)
task->task_done(task); task->task_done(task);
} }
static int hisi_sas_task_prep(struct sas_task *task, struct hisi_hba *hisi_hba, static int hisi_sas_task_prep(struct sas_task *task, struct hisi_sas_dq
int is_tmf, struct hisi_sas_tmf_task *tmf, *dq, int is_tmf, struct hisi_sas_tmf_task *tmf,
int *pass) int *pass)
{ {
struct hisi_hba *hisi_hba = dq->hisi_hba;
struct domain_device *device = task->dev; struct domain_device *device = task->dev;
struct hisi_sas_device *sas_dev = device->lldd_dev; struct hisi_sas_device *sas_dev = device->lldd_dev;
struct hisi_sas_port *port; struct hisi_sas_port *port;
@ -240,18 +241,24 @@ static int hisi_sas_task_prep(struct sas_task *task, struct hisi_hba *hisi_hba,
} else } else
n_elem = task->num_scatter; n_elem = task->num_scatter;
spin_lock_irqsave(&hisi_hba->lock, flags);
if (hisi_hba->hw->slot_index_alloc) if (hisi_hba->hw->slot_index_alloc)
rc = hisi_hba->hw->slot_index_alloc(hisi_hba, &slot_idx, rc = hisi_hba->hw->slot_index_alloc(hisi_hba, &slot_idx,
device); device);
else else
rc = hisi_sas_slot_index_alloc(hisi_hba, &slot_idx); rc = hisi_sas_slot_index_alloc(hisi_hba, &slot_idx);
if (rc) if (rc) {
spin_unlock_irqrestore(&hisi_hba->lock, flags);
goto err_out; goto err_out;
rc = hisi_hba->hw->get_free_slot(hisi_hba, sas_dev->device_id, }
&dlvry_queue, &dlvry_queue_slot); spin_unlock_irqrestore(&hisi_hba->lock, flags);
rc = hisi_hba->hw->get_free_slot(hisi_hba, dq);
if (rc) if (rc)
goto err_out_tag; goto err_out_tag;
dlvry_queue = dq->id;
dlvry_queue_slot = dq->wr_point;
slot = &hisi_hba->slot_info[slot_idx]; slot = &hisi_hba->slot_info[slot_idx];
memset(slot, 0, sizeof(struct hisi_sas_slot)); memset(slot, 0, sizeof(struct hisi_sas_slot));
@ -316,7 +323,7 @@ static int hisi_sas_task_prep(struct sas_task *task, struct hisi_hba *hisi_hba,
task->task_state_flags |= SAS_TASK_AT_INITIATOR; task->task_state_flags |= SAS_TASK_AT_INITIATOR;
spin_unlock_irqrestore(&task->task_state_lock, flags); spin_unlock_irqrestore(&task->task_state_lock, flags);
hisi_hba->slot_prep = slot; dq->slot_prep = slot;
atomic64_inc(&sas_dev->running_req); atomic64_inc(&sas_dev->running_req);
++(*pass); ++(*pass);
@ -335,7 +342,9 @@ static int hisi_sas_task_prep(struct sas_task *task, struct hisi_hba *hisi_hba,
err_out_slot_buf: err_out_slot_buf:
/* Nothing to be done */ /* Nothing to be done */
err_out_tag: err_out_tag:
spin_lock_irqsave(&hisi_hba->lock, flags);
hisi_sas_slot_index_free(hisi_hba, slot_idx); hisi_sas_slot_index_free(hisi_hba, slot_idx);
spin_unlock_irqrestore(&hisi_hba->lock, flags);
err_out: err_out:
dev_err(dev, "task prep: failed[%d]!\n", rc); dev_err(dev, "task prep: failed[%d]!\n", rc);
if (!sas_protocol_ata(task->task_proto)) if (!sas_protocol_ata(task->task_proto))
@ -354,19 +363,22 @@ static int hisi_sas_task_exec(struct sas_task *task, gfp_t gfp_flags,
unsigned long flags; unsigned long flags;
struct hisi_hba *hisi_hba = dev_to_hisi_hba(task->dev); struct hisi_hba *hisi_hba = dev_to_hisi_hba(task->dev);
struct device *dev = &hisi_hba->pdev->dev; struct device *dev = &hisi_hba->pdev->dev;
struct domain_device *device = task->dev;
struct hisi_sas_device *sas_dev = device->lldd_dev;
struct hisi_sas_dq *dq = sas_dev->dq;
if (unlikely(test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags))) if (unlikely(test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)))
return -EINVAL; return -EINVAL;
/* protect task_prep and start_delivery sequence */ /* protect task_prep and start_delivery sequence */
spin_lock_irqsave(&hisi_hba->lock, flags); spin_lock_irqsave(&dq->lock, flags);
rc = hisi_sas_task_prep(task, hisi_hba, is_tmf, tmf, &pass); rc = hisi_sas_task_prep(task, dq, is_tmf, tmf, &pass);
if (rc) if (rc)
dev_err(dev, "task exec: failed[%d]!\n", rc); dev_err(dev, "task exec: failed[%d]!\n", rc);
if (likely(pass)) if (likely(pass))
hisi_hba->hw->start_delivery(hisi_hba); hisi_hba->hw->start_delivery(dq);
spin_unlock_irqrestore(&hisi_hba->lock, flags); spin_unlock_irqrestore(&dq->lock, flags);
return rc; return rc;
} }
@ -421,12 +433,16 @@ static struct hisi_sas_device *hisi_sas_alloc_dev(struct domain_device *device)
spin_lock(&hisi_hba->lock); spin_lock(&hisi_hba->lock);
for (i = 0; i < HISI_SAS_MAX_DEVICES; i++) { for (i = 0; i < HISI_SAS_MAX_DEVICES; i++) {
if (hisi_hba->devices[i].dev_type == SAS_PHY_UNUSED) { if (hisi_hba->devices[i].dev_type == SAS_PHY_UNUSED) {
int queue = i % hisi_hba->queue_count;
struct hisi_sas_dq *dq = &hisi_hba->dq[queue];
hisi_hba->devices[i].device_id = i; hisi_hba->devices[i].device_id = i;
sas_dev = &hisi_hba->devices[i]; sas_dev = &hisi_hba->devices[i];
sas_dev->dev_status = HISI_SAS_DEV_NORMAL; sas_dev->dev_status = HISI_SAS_DEV_NORMAL;
sas_dev->dev_type = device->dev_type; sas_dev->dev_type = device->dev_type;
sas_dev->hisi_hba = hisi_hba; sas_dev->hisi_hba = hisi_hba;
sas_dev->sas_device = device; sas_dev->sas_device = device;
sas_dev->dq = dq;
INIT_LIST_HEAD(&hisi_hba->devices[i].list); INIT_LIST_HEAD(&hisi_hba->devices[i].list);
break; break;
} }
@ -1140,8 +1156,9 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
struct hisi_sas_slot *slot; struct hisi_sas_slot *slot;
struct asd_sas_port *sas_port = device->port; struct asd_sas_port *sas_port = device->port;
struct hisi_sas_cmd_hdr *cmd_hdr_base; struct hisi_sas_cmd_hdr *cmd_hdr_base;
struct hisi_sas_dq *dq = sas_dev->dq;
int dlvry_queue_slot, dlvry_queue, n_elem = 0, rc, slot_idx; int dlvry_queue_slot, dlvry_queue, n_elem = 0, rc, slot_idx;
unsigned long flags; unsigned long flags, flags_dq;
if (unlikely(test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags))) if (unlikely(test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)))
return -EINVAL; return -EINVAL;
@ -1152,14 +1169,22 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
port = to_hisi_sas_port(sas_port); port = to_hisi_sas_port(sas_port);
/* simply get a slot and send abort command */ /* simply get a slot and send abort command */
spin_lock_irqsave(&hisi_hba->lock, flags);
rc = hisi_sas_slot_index_alloc(hisi_hba, &slot_idx); rc = hisi_sas_slot_index_alloc(hisi_hba, &slot_idx);
if (rc) if (rc) {
spin_unlock_irqrestore(&hisi_hba->lock, flags);
goto err_out; goto err_out;
rc = hisi_hba->hw->get_free_slot(hisi_hba, sas_dev->device_id, }
&dlvry_queue, &dlvry_queue_slot); spin_unlock_irqrestore(&hisi_hba->lock, flags);
spin_lock_irqsave(&dq->lock, flags_dq);
rc = hisi_hba->hw->get_free_slot(hisi_hba, dq);
if (rc) if (rc)
goto err_out_tag; goto err_out_tag;
dlvry_queue = dq->id;
dlvry_queue_slot = dq->wr_point;
slot = &hisi_hba->slot_info[slot_idx]; slot = &hisi_hba->slot_info[slot_idx];
memset(slot, 0, sizeof(struct hisi_sas_slot)); memset(slot, 0, sizeof(struct hisi_sas_slot));
@ -1186,17 +1211,21 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
task->task_state_flags |= SAS_TASK_AT_INITIATOR; task->task_state_flags |= SAS_TASK_AT_INITIATOR;
spin_unlock_irqrestore(&task->task_state_lock, flags); spin_unlock_irqrestore(&task->task_state_lock, flags);
hisi_hba->slot_prep = slot; dq->slot_prep = slot;
atomic64_inc(&sas_dev->running_req); atomic64_inc(&sas_dev->running_req);
/* send abort command to our chip */ /* send abort command to the chip */
hisi_hba->hw->start_delivery(hisi_hba); hisi_hba->hw->start_delivery(dq);
spin_unlock_irqrestore(&dq->lock, flags_dq);
return 0; return 0;
err_out_tag: err_out_tag:
spin_lock_irqsave(&hisi_hba->lock, flags);
hisi_sas_slot_index_free(hisi_hba, slot_idx); hisi_sas_slot_index_free(hisi_hba, slot_idx);
spin_unlock_irqrestore(&hisi_hba->lock, flags);
spin_unlock_irqrestore(&dq->lock, flags_dq);
err_out: err_out:
dev_err(dev, "internal abort task prep: failed[%d]!\n", rc); dev_err(dev, "internal abort task prep: failed[%d]!\n", rc);
@ -1221,7 +1250,6 @@ hisi_sas_internal_task_abort(struct hisi_hba *hisi_hba,
struct hisi_sas_device *sas_dev = device->lldd_dev; struct hisi_sas_device *sas_dev = device->lldd_dev;
struct device *dev = &hisi_hba->pdev->dev; struct device *dev = &hisi_hba->pdev->dev;
int res; int res;
unsigned long flags;
if (!hisi_hba->hw->prep_abort) if (!hisi_hba->hw->prep_abort)
return -EOPNOTSUPP; return -EOPNOTSUPP;
@ -1238,11 +1266,8 @@ hisi_sas_internal_task_abort(struct hisi_hba *hisi_hba,
task->slow_task->timer.expires = jiffies + msecs_to_jiffies(110); task->slow_task->timer.expires = jiffies + msecs_to_jiffies(110);
add_timer(&task->slow_task->timer); add_timer(&task->slow_task->timer);
/* Lock as we are alloc'ing a slot, which cannot be interrupted */
spin_lock_irqsave(&hisi_hba->lock, flags);
res = hisi_sas_internal_abort_task_exec(hisi_hba, sas_dev->device_id, res = hisi_sas_internal_abort_task_exec(hisi_hba, sas_dev->device_id,
task, abort_flag, tag); task, abort_flag, tag);
spin_unlock_irqrestore(&hisi_hba->lock, flags);
if (res) { if (res) {
del_timer(&task->slow_task->timer); del_timer(&task->slow_task->timer);
dev_err(dev, "internal task abort: executing internal task failed: %d\n", dev_err(dev, "internal task abort: executing internal task failed: %d\n",

View File

@ -900,22 +900,17 @@ static int get_wideport_bitmap_v1_hw(struct hisi_hba *hisi_hba, int port_id)
return bitmap; return bitmap;
} }
/** /*
* This function allocates across all queues to load balance.
* Slots are allocated from queues in a round-robin fashion.
*
* The callpath to this function and upto writing the write * The callpath to this function and upto writing the write
* queue pointer should be safe from interruption. * queue pointer should be safe from interruption.
*/ */
static int get_free_slot_v1_hw(struct hisi_hba *hisi_hba, u32 dev_id, static int
int *q, int *s) get_free_slot_v1_hw(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq)
{ {
struct device *dev = &hisi_hba->pdev->dev; struct device *dev = &hisi_hba->pdev->dev;
struct hisi_sas_dq *dq; int queue = dq->id;
u32 r, w; u32 r, w;
int queue = dev_id % hisi_hba->queue_count;
dq = &hisi_hba->dq[queue];
w = dq->wr_point; w = dq->wr_point;
r = hisi_sas_read32_relaxed(hisi_hba, r = hisi_sas_read32_relaxed(hisi_hba,
DLVRY_Q_0_RD_PTR + (queue * 0x14)); DLVRY_Q_0_RD_PTR + (queue * 0x14));
@ -924,16 +919,14 @@ static int get_free_slot_v1_hw(struct hisi_hba *hisi_hba, u32 dev_id,
return -EAGAIN; return -EAGAIN;
} }
*q = queue;
*s = w;
return 0; return 0;
} }
static void start_delivery_v1_hw(struct hisi_hba *hisi_hba) static void start_delivery_v1_hw(struct hisi_sas_dq *dq)
{ {
int dlvry_queue = hisi_hba->slot_prep->dlvry_queue; struct hisi_hba *hisi_hba = dq->hisi_hba;
int dlvry_queue_slot = hisi_hba->slot_prep->dlvry_queue_slot; int dlvry_queue = dq->slot_prep->dlvry_queue;
struct hisi_sas_dq *dq = &hisi_hba->dq[dlvry_queue]; int dlvry_queue_slot = dq->slot_prep->dlvry_queue_slot;
dq->wr_point = ++dlvry_queue_slot % HISI_SAS_QUEUE_SLOTS; dq->wr_point = ++dlvry_queue_slot % HISI_SAS_QUEUE_SLOTS;
hisi_sas_write32(hisi_hba, DLVRY_Q_0_WR_PTR + (dlvry_queue * 0x14), hisi_sas_write32(hisi_hba, DLVRY_Q_0_WR_PTR + (dlvry_queue * 0x14),

View File

@ -695,6 +695,9 @@ hisi_sas_device *alloc_dev_quirk_v2_hw(struct domain_device *device)
if (sata_dev && (i & 1)) if (sata_dev && (i & 1))
continue; continue;
if (hisi_hba->devices[i].dev_type == SAS_PHY_UNUSED) { if (hisi_hba->devices[i].dev_type == SAS_PHY_UNUSED) {
int queue = i % hisi_hba->queue_count;
struct hisi_sas_dq *dq = &hisi_hba->dq[queue];
hisi_hba->devices[i].device_id = i; hisi_hba->devices[i].device_id = i;
sas_dev = &hisi_hba->devices[i]; sas_dev = &hisi_hba->devices[i];
sas_dev->dev_status = HISI_SAS_DEV_NORMAL; sas_dev->dev_status = HISI_SAS_DEV_NORMAL;
@ -702,6 +705,7 @@ hisi_sas_device *alloc_dev_quirk_v2_hw(struct domain_device *device)
sas_dev->hisi_hba = hisi_hba; sas_dev->hisi_hba = hisi_hba;
sas_dev->sas_device = device; sas_dev->sas_device = device;
sas_dev->sata_idx = sata_idx; sas_dev->sata_idx = sata_idx;
sas_dev->dq = dq;
INIT_LIST_HEAD(&hisi_hba->devices[i].list); INIT_LIST_HEAD(&hisi_hba->devices[i].list);
break; break;
} }
@ -1454,22 +1458,17 @@ static int get_wideport_bitmap_v2_hw(struct hisi_hba *hisi_hba, int port_id)
return bitmap; return bitmap;
} }
/** /*
* This function allocates across all queues to load balance.
* Slots are allocated from queues in a round-robin fashion.
*
* The callpath to this function and upto writing the write * The callpath to this function and upto writing the write
* queue pointer should be safe from interruption. * queue pointer should be safe from interruption.
*/ */
static int get_free_slot_v2_hw(struct hisi_hba *hisi_hba, u32 dev_id, static int
int *q, int *s) get_free_slot_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq)
{ {
struct device *dev = &hisi_hba->pdev->dev; struct device *dev = &hisi_hba->pdev->dev;
struct hisi_sas_dq *dq; int queue = dq->id;
u32 r, w; u32 r, w;
int queue = dev_id % hisi_hba->queue_count;
dq = &hisi_hba->dq[queue];
w = dq->wr_point; w = dq->wr_point;
r = hisi_sas_read32_relaxed(hisi_hba, r = hisi_sas_read32_relaxed(hisi_hba,
DLVRY_Q_0_RD_PTR + (queue * 0x14)); DLVRY_Q_0_RD_PTR + (queue * 0x14));
@ -1479,16 +1478,14 @@ static int get_free_slot_v2_hw(struct hisi_hba *hisi_hba, u32 dev_id,
return -EAGAIN; return -EAGAIN;
} }
*q = queue;
*s = w;
return 0; return 0;
} }
static void start_delivery_v2_hw(struct hisi_hba *hisi_hba) static void start_delivery_v2_hw(struct hisi_sas_dq *dq)
{ {
int dlvry_queue = hisi_hba->slot_prep->dlvry_queue; struct hisi_hba *hisi_hba = dq->hisi_hba;
int dlvry_queue_slot = hisi_hba->slot_prep->dlvry_queue_slot; int dlvry_queue = dq->slot_prep->dlvry_queue;
struct hisi_sas_dq *dq = &hisi_hba->dq[dlvry_queue]; int dlvry_queue_slot = dq->slot_prep->dlvry_queue_slot;
dq->wr_point = ++dlvry_queue_slot % HISI_SAS_QUEUE_SLOTS; dq->wr_point = ++dlvry_queue_slot % HISI_SAS_QUEUE_SLOTS;
hisi_sas_write32(hisi_hba, DLVRY_Q_0_WR_PTR + (dlvry_queue * 0x14), hisi_sas_write32(hisi_hba, DLVRY_Q_0_WR_PTR + (dlvry_queue * 0x14),
@ -2344,7 +2341,9 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
spin_lock_irqsave(&task->task_state_lock, flags); spin_lock_irqsave(&task->task_state_lock, flags);
task->task_state_flags |= SAS_TASK_STATE_DONE; task->task_state_flags |= SAS_TASK_STATE_DONE;
spin_unlock_irqrestore(&task->task_state_lock, flags); spin_unlock_irqrestore(&task->task_state_lock, flags);
spin_lock_irqsave(&hisi_hba->lock, flags);
hisi_sas_slot_task_free(hisi_hba, task, slot); hisi_sas_slot_task_free(hisi_hba, task, slot);
spin_unlock_irqrestore(&hisi_hba->lock, flags);
sts = ts->stat; sts = ts->stat;
if (task->task_done) if (task->task_done)
@ -3162,13 +3161,14 @@ static void cq_tasklet_v2_hw(unsigned long val)
struct hisi_sas_complete_v2_hdr *complete_queue; struct hisi_sas_complete_v2_hdr *complete_queue;
u32 rd_point = cq->rd_point, wr_point, dev_id; u32 rd_point = cq->rd_point, wr_point, dev_id;
int queue = cq->id; int queue = cq->id;
struct hisi_sas_dq *dq = &hisi_hba->dq[queue];
if (unlikely(hisi_hba->reject_stp_links_msk)) if (unlikely(hisi_hba->reject_stp_links_msk))
phys_try_accept_stp_links_v2_hw(hisi_hba); phys_try_accept_stp_links_v2_hw(hisi_hba);
complete_queue = hisi_hba->complete_hdr[queue]; complete_queue = hisi_hba->complete_hdr[queue];
spin_lock(&hisi_hba->lock); spin_lock(&dq->lock);
wr_point = hisi_sas_read32(hisi_hba, COMPL_Q_0_WR_PTR + wr_point = hisi_sas_read32(hisi_hba, COMPL_Q_0_WR_PTR +
(0x14 * queue)); (0x14 * queue));
@ -3218,7 +3218,7 @@ static void cq_tasklet_v2_hw(unsigned long val)
/* update rd_point */ /* update rd_point */
cq->rd_point = rd_point; cq->rd_point = rd_point;
hisi_sas_write32(hisi_hba, COMPL_Q_0_RD_PTR + (0x14 * queue), rd_point); hisi_sas_write32(hisi_hba, COMPL_Q_0_RD_PTR + (0x14 * queue), rd_point);
spin_unlock(&hisi_hba->lock); spin_unlock(&dq->lock);
} }
static irqreturn_t cq_interrupt_v2_hw(int irq_no, void *p) static irqreturn_t cq_interrupt_v2_hw(int irq_no, void *p)