mirror of https://gitee.com/openkylin/linux.git
habanalabs: use queue pi/ci in order to determine queue occupancy
Instead of using the free slots amount on the compute CQ to determine whether we can submit work to queues, use the queues pi/ci. This is needed in future ASICs where we don't have CQ per queue. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
3abc99bb7d
commit
79b1894c41
|
@ -1144,14 +1144,17 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||
* because there the addresses of the completion queues are being
|
||||
* passed as arguments to request_irq
|
||||
*/
|
||||
hdev->completion_queue = kcalloc(cq_cnt,
|
||||
sizeof(*hdev->completion_queue),
|
||||
GFP_KERNEL);
|
||||
if (cq_cnt) {
|
||||
hdev->completion_queue = kcalloc(cq_cnt,
|
||||
sizeof(*hdev->completion_queue),
|
||||
GFP_KERNEL);
|
||||
|
||||
if (!hdev->completion_queue) {
|
||||
dev_err(hdev->dev, "failed to allocate completion queues\n");
|
||||
rc = -ENOMEM;
|
||||
goto hw_queues_destroy;
|
||||
if (!hdev->completion_queue) {
|
||||
dev_err(hdev->dev,
|
||||
"failed to allocate completion queues\n");
|
||||
rc = -ENOMEM;
|
||||
goto hw_queues_destroy;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
|
||||
|
|
|
@ -461,7 +461,7 @@ struct hl_hw_queue {
|
|||
u64 kernel_address;
|
||||
dma_addr_t bus_address;
|
||||
u32 pi;
|
||||
u32 ci;
|
||||
atomic_t ci;
|
||||
u32 hw_queue_id;
|
||||
u32 cq_id;
|
||||
u32 msi_vec;
|
||||
|
|
|
@ -23,10 +23,14 @@ inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
|
|||
ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
|
||||
return ptr;
|
||||
}
|
||||
static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
|
||||
{
|
||||
return atomic_read(ci) & ((queue_len << 1) - 1);
|
||||
}
|
||||
|
||||
static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
|
||||
{
|
||||
int delta = (q->pi - q->ci);
|
||||
int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
|
||||
|
||||
if (delta >= 0)
|
||||
return (queue_len - delta);
|
||||
|
@ -40,21 +44,14 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs)
|
|||
struct hl_hw_queue *q;
|
||||
int i;
|
||||
|
||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||
|
||||
if (hdev->disabled)
|
||||
goto out;
|
||||
return;
|
||||
|
||||
q = &hdev->kernel_queues[0];
|
||||
for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
|
||||
if (q->queue_type == QUEUE_TYPE_INT) {
|
||||
q->ci += cs->jobs_in_queue_cnt[i];
|
||||
q->ci &= ((q->int_queue_len << 1) - 1);
|
||||
}
|
||||
if (q->queue_type == QUEUE_TYPE_INT)
|
||||
atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
|
||||
}
|
||||
|
||||
out:
|
||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -174,38 +171,26 @@ static int int_queue_sanity_checks(struct hl_device *hdev,
|
|||
}
|
||||
|
||||
/*
|
||||
* hw_queue_sanity_checks() - Perform some sanity checks on a H/W queue.
|
||||
* hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
|
||||
* @hdev: Pointer to hl_device structure.
|
||||
* @q: Pointer to hl_hw_queue structure.
|
||||
* @num_of_entries: How many entries to check for space.
|
||||
*
|
||||
* Perform the following:
|
||||
* - Make sure we have enough space in the completion queue.
|
||||
* This check also ensures that there is enough space in the h/w queue, as
|
||||
* both queues are of the same size.
|
||||
* - Reserve space in the completion queue (needs to be reversed if there
|
||||
* is a failure down the road before the actual submission of work).
|
||||
* Notice: We do not reserve queue entries so this function mustn't be called
|
||||
* more than once per CS for the same queue
|
||||
*
|
||||
* Both operations are done using the "free_slots_cnt" field of the completion
|
||||
* queue. The CI counters of the queue and the completion queue are not
|
||||
* needed/used for the H/W queue type.
|
||||
*/
|
||||
static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
|
||||
int num_of_entries)
|
||||
{
|
||||
atomic_t *free_slots =
|
||||
&hdev->completion_queue[q->cq_id].free_slots_cnt;
|
||||
int free_slots_cnt;
|
||||
|
||||
/*
|
||||
* Check we have enough space in the completion queue.
|
||||
* Add -1 to counter (decrement) unless counter was already 0.
|
||||
* In that case, CQ is full so we can't submit a new CB.
|
||||
* atomic_add_unless will return 0 if counter was already 0.
|
||||
*/
|
||||
if (atomic_add_negative(num_of_entries * -1, free_slots)) {
|
||||
dev_dbg(hdev->dev, "No space for %d entries on CQ %d\n",
|
||||
num_of_entries, q->hw_queue_id);
|
||||
atomic_add(num_of_entries, free_slots);
|
||||
/* Check we have enough space in the queue */
|
||||
free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
|
||||
|
||||
if (free_slots_cnt < num_of_entries) {
|
||||
dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
|
||||
q->hw_queue_id, num_of_entries);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
|
@ -366,7 +351,6 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
|
|||
{
|
||||
struct hl_device *hdev = job->cs->ctx->hdev;
|
||||
struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
|
||||
struct hl_cq *cq;
|
||||
u64 ptr;
|
||||
u32 offset, ctl, len;
|
||||
|
||||
|
@ -395,17 +379,6 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
|
|||
else
|
||||
ptr = (u64) (uintptr_t) job->user_cb;
|
||||
|
||||
/*
|
||||
* No need to protect pi_offset because scheduling to the
|
||||
* H/W queues is done under the scheduler mutex
|
||||
*
|
||||
* No need to check if CQ is full because it was already
|
||||
* checked in hw_queue_sanity_checks
|
||||
*/
|
||||
cq = &hdev->completion_queue[q->cq_id];
|
||||
|
||||
cq->pi = hl_cq_inc_ptr(cq->pi);
|
||||
|
||||
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
|
||||
}
|
||||
|
||||
|
@ -552,8 +525,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
|
|||
goto unroll_cq_resv;
|
||||
}
|
||||
|
||||
if (q->queue_type == QUEUE_TYPE_EXT ||
|
||||
q->queue_type == QUEUE_TYPE_HW)
|
||||
if (q->queue_type == QUEUE_TYPE_EXT)
|
||||
cq_cnt++;
|
||||
}
|
||||
}
|
||||
|
@ -605,9 +577,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
|
|||
unroll_cq_resv:
|
||||
q = &hdev->kernel_queues[0];
|
||||
for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
|
||||
if ((q->queue_type == QUEUE_TYPE_EXT ||
|
||||
q->queue_type == QUEUE_TYPE_HW) &&
|
||||
cs->jobs_in_queue_cnt[i]) {
|
||||
if ((q->queue_type == QUEUE_TYPE_EXT) &&
|
||||
(cs->jobs_in_queue_cnt[i])) {
|
||||
atomic_t *free_slots =
|
||||
&hdev->completion_queue[i].free_slots_cnt;
|
||||
atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
|
||||
|
@ -631,7 +602,7 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
|
|||
{
|
||||
struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
|
||||
|
||||
q->ci = hl_queue_inc_ptr(q->ci);
|
||||
atomic_inc(&q->ci);
|
||||
}
|
||||
|
||||
static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
|
||||
|
@ -666,7 +637,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
|
|||
}
|
||||
|
||||
/* Make sure read/write pointers are initialized to start of queue */
|
||||
q->ci = 0;
|
||||
atomic_set(&q->ci, 0);
|
||||
q->pi = 0;
|
||||
|
||||
return 0;
|
||||
|
@ -700,7 +671,7 @@ static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
|||
|
||||
q->kernel_address = (u64) (uintptr_t) p;
|
||||
q->pi = 0;
|
||||
q->ci = 0;
|
||||
atomic_set(&q->ci, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -729,7 +700,7 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
|||
q->kernel_address = (u64) (uintptr_t) p;
|
||||
|
||||
/* Make sure read/write pointers are initialized to start of queue */
|
||||
q->ci = 0;
|
||||
atomic_set(&q->ci, 0);
|
||||
q->pi = 0;
|
||||
|
||||
return 0;
|
||||
|
@ -931,7 +902,8 @@ void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
|
|||
if ((!q->valid) ||
|
||||
((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
|
||||
continue;
|
||||
q->pi = q->ci = 0;
|
||||
q->pi = 0;
|
||||
atomic_set(&q->ci, 0);
|
||||
|
||||
if (q->supports_sync_stream)
|
||||
sync_stream_queue_reset(hdev, q->hw_queue_id);
|
||||
|
|
|
@ -122,12 +122,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
|
|||
queue_work(hdev->cq_wq, &job->finish_work);
|
||||
}
|
||||
|
||||
/* Update ci of the context's queue. There is no
|
||||
* need to protect it with spinlock because this update is
|
||||
* done only inside IRQ and there is a different IRQ per
|
||||
* queue
|
||||
*/
|
||||
queue->ci = hl_queue_inc_ptr(queue->ci);
|
||||
atomic_inc(&queue->ci);
|
||||
|
||||
/* Clear CQ entry ready bit */
|
||||
cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
|
||||
|
|
Loading…
Reference in New Issue