habanalabs: flush only at the end of the map/unmap
Optimize hl_mmu_map and hl_mmu_unmap by not calling flush(ctx) within per-page loop. Signed-off-by: Pawel Piskorski <ppiskorski@habana.ai> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
bbde5709ee
commit
7fc40bcaa6
|
@ -4776,7 +4776,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
||||||
|
|
||||||
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
|
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
|
||||||
rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
|
rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
|
||||||
prop->dram_base_address + off, PAGE_SIZE_2MB);
|
prop->dram_base_address + off, PAGE_SIZE_2MB,
|
||||||
|
(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev, "Map failed for address 0x%llx\n",
|
dev_err(hdev->dev, "Map failed for address 0x%llx\n",
|
||||||
prop->dram_base_address + off);
|
prop->dram_base_address + off);
|
||||||
|
@ -4786,7 +4787,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
||||||
|
|
||||||
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
|
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
|
||||||
rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
|
rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
|
||||||
hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
|
hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
|
||||||
|
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
|
@ -4799,7 +4800,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
||||||
rc = hl_mmu_map(hdev->kernel_ctx,
|
rc = hl_mmu_map(hdev->kernel_ctx,
|
||||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||||
hdev->cpu_accessible_dma_address + cpu_off,
|
hdev->cpu_accessible_dma_address + cpu_off,
|
||||||
PAGE_SIZE_4KB);
|
PAGE_SIZE_4KB, true);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"Map failed for CPU accessible memory\n");
|
"Map failed for CPU accessible memory\n");
|
||||||
|
@ -4825,14 +4826,15 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
||||||
for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
|
for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
|
||||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||||
PAGE_SIZE_4KB))
|
PAGE_SIZE_4KB, true))
|
||||||
dev_warn_ratelimited(hdev->dev,
|
dev_warn_ratelimited(hdev->dev,
|
||||||
"failed to unmap address 0x%llx\n",
|
"failed to unmap address 0x%llx\n",
|
||||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
|
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
|
||||||
unmap:
|
unmap:
|
||||||
for (; off >= 0 ; off -= PAGE_SIZE_2MB)
|
for (; off >= 0 ; off -= PAGE_SIZE_2MB)
|
||||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||||
prop->dram_base_address + off, PAGE_SIZE_2MB))
|
prop->dram_base_address + off, PAGE_SIZE_2MB,
|
||||||
|
true))
|
||||||
dev_warn_ratelimited(hdev->dev,
|
dev_warn_ratelimited(hdev->dev,
|
||||||
"failed to unmap address 0x%llx\n",
|
"failed to unmap address 0x%llx\n",
|
||||||
prop->dram_base_address + off);
|
prop->dram_base_address + off);
|
||||||
|
@ -4857,14 +4859,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
|
||||||
|
|
||||||
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
|
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
|
||||||
if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
|
if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
|
||||||
PAGE_SIZE_2MB))
|
PAGE_SIZE_2MB, true))
|
||||||
dev_warn(hdev->dev,
|
dev_warn(hdev->dev,
|
||||||
"Failed to unmap CPU accessible memory\n");
|
"Failed to unmap CPU accessible memory\n");
|
||||||
} else {
|
} else {
|
||||||
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
|
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
|
||||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||||
PAGE_SIZE_4KB))
|
PAGE_SIZE_4KB,
|
||||||
|
(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
|
||||||
dev_warn_ratelimited(hdev->dev,
|
dev_warn_ratelimited(hdev->dev,
|
||||||
"failed to unmap address 0x%llx\n",
|
"failed to unmap address 0x%llx\n",
|
||||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
|
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
|
||||||
|
@ -4872,7 +4875,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
|
||||||
|
|
||||||
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
|
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
|
||||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||||
prop->dram_base_address + off, PAGE_SIZE_2MB))
|
prop->dram_base_address + off, PAGE_SIZE_2MB,
|
||||||
|
(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
|
||||||
dev_warn_ratelimited(hdev->dev,
|
dev_warn_ratelimited(hdev->dev,
|
||||||
"Failed to unmap address 0x%llx\n",
|
"Failed to unmap address 0x%llx\n",
|
||||||
prop->dram_base_address + off);
|
prop->dram_base_address + off);
|
||||||
|
|
|
@ -1573,8 +1573,10 @@ int hl_mmu_init(struct hl_device *hdev);
|
||||||
void hl_mmu_fini(struct hl_device *hdev);
|
void hl_mmu_fini(struct hl_device *hdev);
|
||||||
int hl_mmu_ctx_init(struct hl_ctx *ctx);
|
int hl_mmu_ctx_init(struct hl_ctx *ctx);
|
||||||
void hl_mmu_ctx_fini(struct hl_ctx *ctx);
|
void hl_mmu_ctx_fini(struct hl_ctx *ctx);
|
||||||
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size);
|
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||||
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size);
|
u32 page_size, bool flush_pte);
|
||||||
|
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
||||||
|
bool flush_pte);
|
||||||
void hl_mmu_swap_out(struct hl_ctx *ctx);
|
void hl_mmu_swap_out(struct hl_ctx *ctx);
|
||||||
void hl_mmu_swap_in(struct hl_ctx *ctx);
|
void hl_mmu_swap_in(struct hl_ctx *ctx);
|
||||||
|
|
||||||
|
|
|
@ -747,7 +747,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
|
||||||
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
|
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
|
||||||
paddr = phys_pg_pack->pages[i];
|
paddr = phys_pg_pack->pages[i];
|
||||||
|
|
||||||
rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
|
rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
|
||||||
|
(i + 1) == phys_pg_pack->npages);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"map failed for handle %u, npages: %llu, mapped: %llu",
|
"map failed for handle %u, npages: %llu, mapped: %llu",
|
||||||
|
@ -765,7 +766,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
|
||||||
err:
|
err:
|
||||||
next_vaddr = vaddr;
|
next_vaddr = vaddr;
|
||||||
for (i = 0 ; i < mapped_pg_cnt ; i++) {
|
for (i = 0 ; i < mapped_pg_cnt ; i++) {
|
||||||
if (hl_mmu_unmap(ctx, next_vaddr, page_size))
|
if (hl_mmu_unmap(ctx, next_vaddr, page_size,
|
||||||
|
(i + 1) == mapped_pg_cnt))
|
||||||
dev_warn_ratelimited(hdev->dev,
|
dev_warn_ratelimited(hdev->dev,
|
||||||
"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
|
"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
|
||||||
phys_pg_pack->handle, next_vaddr,
|
phys_pg_pack->handle, next_vaddr,
|
||||||
|
@ -794,7 +796,8 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
|
||||||
next_vaddr = vaddr;
|
next_vaddr = vaddr;
|
||||||
|
|
||||||
for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
|
for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
|
||||||
if (hl_mmu_unmap(ctx, next_vaddr, page_size))
|
if (hl_mmu_unmap(ctx, next_vaddr, page_size,
|
||||||
|
(i + 1) == phys_pg_pack->npages))
|
||||||
dev_warn_ratelimited(hdev->dev,
|
dev_warn_ratelimited(hdev->dev,
|
||||||
"unmap failed for vaddr: 0x%llx\n", next_vaddr);
|
"unmap failed for vaddr: 0x%llx\n", next_vaddr);
|
||||||
|
|
||||||
|
|
|
@ -637,29 +637,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
|
||||||
clear_hop3 = true;
|
clear_hop3 = true;
|
||||||
|
|
||||||
if (!clear_hop3)
|
if (!clear_hop3)
|
||||||
goto flush;
|
goto mapped;
|
||||||
|
|
||||||
clear_pte(ctx, hop3_pte_addr);
|
clear_pte(ctx, hop3_pte_addr);
|
||||||
|
|
||||||
if (put_pte(ctx, hop3_addr))
|
if (put_pte(ctx, hop3_addr))
|
||||||
goto flush;
|
goto mapped;
|
||||||
|
|
||||||
clear_pte(ctx, hop2_pte_addr);
|
clear_pte(ctx, hop2_pte_addr);
|
||||||
|
|
||||||
if (put_pte(ctx, hop2_addr))
|
if (put_pte(ctx, hop2_addr))
|
||||||
goto flush;
|
goto mapped;
|
||||||
|
|
||||||
clear_pte(ctx, hop1_pte_addr);
|
clear_pte(ctx, hop1_pte_addr);
|
||||||
|
|
||||||
if (put_pte(ctx, hop1_addr))
|
if (put_pte(ctx, hop1_addr))
|
||||||
goto flush;
|
goto mapped;
|
||||||
|
|
||||||
clear_pte(ctx, hop0_pte_addr);
|
clear_pte(ctx, hop0_pte_addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
flush:
|
mapped:
|
||||||
flush(ctx);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
not_mapped:
|
not_mapped:
|
||||||
|
@ -675,6 +673,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
|
||||||
* @ctx: pointer to the context structure
|
* @ctx: pointer to the context structure
|
||||||
* @virt_addr: virt addr to map from
|
* @virt_addr: virt addr to map from
|
||||||
* @page_size: size of the page to unmap
|
* @page_size: size of the page to unmap
|
||||||
|
* @flush_pte: whether to do a PCI flush
|
||||||
*
|
*
|
||||||
* This function does the following:
|
* This function does the following:
|
||||||
* - Check that the virt addr is mapped
|
* - Check that the virt addr is mapped
|
||||||
|
@ -685,15 +684,19 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
|
||||||
* changes the MMU hash, it must be protected by a lock.
|
* changes the MMU hash, it must be protected by a lock.
|
||||||
* However, because it maps only a single page, the lock should be implemented
|
* However, because it maps only a single page, the lock should be implemented
|
||||||
* in a higher level in order to protect the entire mapping of the memory area
|
* in a higher level in order to protect the entire mapping of the memory area
|
||||||
|
*
|
||||||
|
* For optimization reasons PCI flush may be requested once after unmapping of
|
||||||
|
* large area.
|
||||||
*/
|
*/
|
||||||
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
|
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
||||||
|
bool flush_pte)
|
||||||
{
|
{
|
||||||
struct hl_device *hdev = ctx->hdev;
|
struct hl_device *hdev = ctx->hdev;
|
||||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||||
struct hl_mmu_properties *mmu_prop;
|
struct hl_mmu_properties *mmu_prop;
|
||||||
u64 real_virt_addr;
|
u64 real_virt_addr;
|
||||||
u32 real_page_size, npages;
|
u32 real_page_size, npages;
|
||||||
int i, rc;
|
int i, rc = 0;
|
||||||
bool is_dram_addr;
|
bool is_dram_addr;
|
||||||
|
|
||||||
if (!hdev->mmu_enable)
|
if (!hdev->mmu_enable)
|
||||||
|
@ -729,12 +732,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
|
||||||
for (i = 0 ; i < npages ; i++) {
|
for (i = 0 ; i < npages ; i++) {
|
||||||
rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
|
rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
|
||||||
if (rc)
|
if (rc)
|
||||||
return rc;
|
break;
|
||||||
|
|
||||||
real_virt_addr += real_page_size;
|
real_virt_addr += real_page_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
if (flush_pte)
|
||||||
|
flush(ctx);
|
||||||
|
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||||
|
@ -885,8 +891,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||||
get_pte(ctx, hop3_addr);
|
get_pte(ctx, hop3_addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
flush(ctx);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err:
|
err:
|
||||||
|
@ -909,6 +913,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||||
* @virt_addr: virt addr to map from
|
* @virt_addr: virt addr to map from
|
||||||
* @phys_addr: phys addr to map to
|
* @phys_addr: phys addr to map to
|
||||||
* @page_size: physical page size
|
* @page_size: physical page size
|
||||||
|
* @flush_pte: whether to do a PCI flush
|
||||||
*
|
*
|
||||||
* This function does the following:
|
* This function does the following:
|
||||||
* - Check that the virt addr is not mapped
|
* - Check that the virt addr is not mapped
|
||||||
|
@ -919,8 +924,12 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||||
* changes the MMU hash, it must be protected by a lock.
|
* changes the MMU hash, it must be protected by a lock.
|
||||||
* However, because it maps only a single page, the lock should be implemented
|
* However, because it maps only a single page, the lock should be implemented
|
||||||
* in a higher level in order to protect the entire mapping of the memory area
|
* in a higher level in order to protect the entire mapping of the memory area
|
||||||
|
*
|
||||||
|
* For optimization reasons PCI flush may be requested once after mapping of
|
||||||
|
* large area.
|
||||||
*/
|
*/
|
||||||
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
|
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
|
||||||
|
bool flush_pte)
|
||||||
{
|
{
|
||||||
struct hl_device *hdev = ctx->hdev;
|
struct hl_device *hdev = ctx->hdev;
|
||||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||||
|
@ -976,6 +985,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
|
||||||
mapped_cnt++;
|
mapped_cnt++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (flush_pte)
|
||||||
|
flush(ctx);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err:
|
err:
|
||||||
|
@ -988,6 +1000,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
|
||||||
real_virt_addr += real_page_size;
|
real_virt_addr += real_page_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
flush(ctx);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue