From 3110c60fdc7a5a7626b7cd401c4918751d7c19db Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 4 Mar 2019 10:22:09 +0200 Subject: [PATCH] habanalabs: Move device CPU code into common file This patch moves the code that is responsible of the communication vs. the F/W to a dedicated file. This will allow us to share the code between different ASICs. Signed-off-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/Makefile | 2 +- drivers/misc/habanalabs/firmware_if.c | 325 +++++++++++++++++++ drivers/misc/habanalabs/goya/goya.c | 361 ++++----------------- drivers/misc/habanalabs/goya/goyaP.h | 11 - drivers/misc/habanalabs/habanalabs.h | 17 + drivers/misc/habanalabs/include/armcp_if.h | 8 + 6 files changed, 408 insertions(+), 316 deletions(-) create mode 100644 drivers/misc/habanalabs/firmware_if.c diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile index c6592db59b25..598740d235f8 100644 --- a/drivers/misc/habanalabs/Makefile +++ b/drivers/misc/habanalabs/Makefile @@ -6,7 +6,7 @@ obj-m := habanalabs.o habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \ command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \ - command_submission.o mmu.o + command_submission.o mmu.o firmware_if.o habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c new file mode 100644 index 000000000000..1acf82650b20 --- /dev/null +++ b/drivers/misc/habanalabs/firmware_if.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include +#include +#include + +/** + * hl_fw_push_fw_to_device() - Push FW code to device. + * @hdev: pointer to hl_device structure. + * + * Copy fw code from firmware file to device memory. + * + * Return: 0 on success, non-zero for failure. + */ +int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name, + void __iomem *dst) +{ + const struct firmware *fw; + const u64 *fw_data; + size_t fw_size, i; + int rc; + + rc = request_firmware(&fw, fw_name, hdev->dev); + if (rc) { + dev_err(hdev->dev, "Failed to request %s\n", fw_name); + goto out; + } + + fw_size = fw->size; + if ((fw_size % 4) != 0) { + dev_err(hdev->dev, "illegal %s firmware size %zu\n", + fw_name, fw_size); + rc = -EINVAL; + goto out; + } + + dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size); + + fw_data = (const u64 *) fw->data; + + if ((fw->size % 8) != 0) + fw_size -= 8; + + for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) { + if (!(i & (0x80000 - 1))) { + dev_dbg(hdev->dev, + "copied so far %zu out of %zu for %s firmware", + i, fw_size, fw_name); + usleep_range(20, 100); + } + + writeq(*fw_data, dst); + } + + if ((fw->size % 8) != 0) + writel(*(const u32 *) fw_data, dst); + +out: + release_firmware(fw); + return rc; +} + +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) +{ + struct armcp_packet pkt = {}; + + pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); + + return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, + sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); +} + +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, + u16 len, u32 timeout, long *result) +{ + struct armcp_packet *pkt; + dma_addr_t pkt_dma_addr; + u32 tmp; + int rc = 0; + + if (len > HL_CPU_CB_SIZE) { + dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n", + len); + return -ENOMEM; + } + + pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, + &pkt_dma_addr); + if (!pkt) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for packet to CPU\n"); + return -ENOMEM; + } + + memcpy(pkt, msg, len); + + mutex_lock(&hdev->send_cpu_message_lock); + + if (hdev->disabled) + goto out; + + if (hdev->device_cpu_disabled) { + rc = -EIO; + goto out; + } + + rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr); + if (rc) { + dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); + goto out; + } + + rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence, + timeout, &tmp); + + hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); + + if (rc == -ETIMEDOUT) { + dev_err(hdev->dev, "Timeout while waiting for device CPU\n"); + hdev->device_cpu_disabled = true; + goto out; + } + + if (tmp == ARMCP_PACKET_FENCE_VAL) { + u32 ctl = le32_to_cpu(pkt->ctl); + + rc = (ctl & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT; + if (rc) { + dev_err(hdev->dev, + "F/W ERROR %d for CPU packet %d\n", + rc, (ctl & ARMCP_PKT_CTL_OPCODE_MASK) + >> ARMCP_PKT_CTL_OPCODE_SHIFT); + rc = -EINVAL; + } else if (result) { + *result = (long) le64_to_cpu(pkt->result); + } + } else { + dev_err(hdev->dev, "CPU packet wrong fence value\n"); + rc = -EINVAL; + } + +out: + mutex_unlock(&hdev->send_cpu_message_lock); + + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt); + + return rc; +} + +int hl_fw_test_cpu_queue(struct hl_device *hdev) +{ + struct armcp_packet test_pkt = {}; + long result; + int rc; + + test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << + ARMCP_PKT_CTL_OPCODE_SHIFT); + test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, + sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + + if (!rc) { + if (result == ARMCP_PACKET_FENCE_VAL) + dev_info(hdev->dev, + "queue test on CPU queue succeeded\n"); + else + dev_err(hdev->dev, + "CPU queue test failed (0x%08lX)\n", result); + } else { + dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc); + } + + return rc; +} + +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle) +{ + u64 kernel_addr; + + /* roundup to HL_CPU_PKT_SIZE */ + size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK; + + kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size); + + *dma_handle = hdev->cpu_accessible_dma_address + + (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem); + + return (void *) (uintptr_t) kernel_addr; +} + +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, + void *vaddr) +{ + /* roundup to HL_CPU_PKT_SIZE */ + size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK; + + gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr, + size); +} + +int hl_fw_send_heartbeat(struct hl_device *hdev) +{ + struct armcp_packet hb_pkt = {}; + long result; + int rc; + + hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << + ARMCP_PKT_CTL_OPCODE_SHIFT); + hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, + sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + + if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) + rc = -EIO; + + return rc; +} + +int hl_fw_armcp_info_get(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct armcp_packet pkt = {}; + void *armcp_info_cpu_addr; + dma_addr_t armcp_info_dma_addr; + long result; + int rc; + + armcp_info_cpu_addr = + hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + sizeof(struct armcp_info), + &armcp_info_dma_addr); + if (!armcp_info_cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for ArmCP info packet\n"); + return -ENOMEM; + } + + memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(armcp_info_dma_addr + + prop->host_phys_base_address); + pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info)); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_ARMCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to send armcp info pkt, error %d\n", rc); + goto out; + } + + memcpy(&prop->armcp_info, armcp_info_cpu_addr, + sizeof(prop->armcp_info)); + + rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors); + if (rc) { + dev_err(hdev->dev, + "Failed to build hwmon channel info, error %d\n", rc); + rc = -EFAULT; + goto out; + } + +out: + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + sizeof(struct armcp_info), armcp_info_cpu_addr); + + return rc; +} + +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct armcp_packet pkt = {}; + void *eeprom_info_cpu_addr; + dma_addr_t eeprom_info_dma_addr; + long result; + int rc; + + eeprom_info_cpu_addr = + hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + max_size, &eeprom_info_dma_addr); + if (!eeprom_info_cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for EEPROM info packet\n"); + return -ENOMEM; + } + + memset(eeprom_info_cpu_addr, 0, max_size); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(eeprom_info_dma_addr + + prop->host_phys_base_address); + pkt.data_max_size = cpu_to_le32(max_size); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_ARMCP_EEPROM_TIMEOUT_USEC, &result); + + if (rc) { + dev_err(hdev->dev, + "Failed to send armcp EEPROM pkt, error %d\n", rc); + goto out; + } + + /* result contains the actual size */ + memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size)); + +out: + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size, + eeprom_info_cpu_addr); + + return rc; +} diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3dfac5393269..30f83521de90 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -12,10 +12,8 @@ #include #include -#include #include #include -#include /* * GOYA security scheme: @@ -373,18 +371,6 @@ static void goya_get_fixed_properties(struct hl_device *hdev) prop->high_pll = PLL_HIGH_DEFAULT; } -int goya_send_pci_access_msg(struct hl_device *hdev, u32 opcode) -{ - struct armcp_packet pkt; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); - - return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, - sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); -} - /* * goya_pci_bars_map - Map PCI BARS of Goya device * @@ -802,7 +788,7 @@ static int goya_late_init(struct hl_device *hdev) */ WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size)); - rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); if (rc) { dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); return rc; @@ -828,7 +814,7 @@ static int goya_late_init(struct hl_device *hdev) return 0; disable_pci_access: - goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); return rc; } @@ -900,19 +886,16 @@ static int goya_sw_init(struct hl_device *hdev) hdev->cpu_accessible_dma_mem = hdev->asic_funcs->dma_alloc_coherent(hdev, - CPU_ACCESSIBLE_MEM_SIZE, + HL_CPU_ACCESSIBLE_MEM_SIZE, &hdev->cpu_accessible_dma_address, GFP_KERNEL | __GFP_ZERO); if (!hdev->cpu_accessible_dma_mem) { - dev_err(hdev->dev, - "failed to allocate %d of dma memory for CPU accessible memory space\n", - CPU_ACCESSIBLE_MEM_SIZE); rc = -ENOMEM; goto free_dma_pool; } - hdev->cpu_accessible_dma_pool = gen_pool_create(CPU_PKT_SHIFT, -1); + hdev->cpu_accessible_dma_pool = gen_pool_create(HL_CPU_PKT_SHIFT, -1); if (!hdev->cpu_accessible_dma_pool) { dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n"); @@ -922,7 +905,7 @@ static int goya_sw_init(struct hl_device *hdev) rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem, - CPU_ACCESSIBLE_MEM_SIZE, -1); + HL_CPU_ACCESSIBLE_MEM_SIZE, -1); if (rc) { dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n"); @@ -937,7 +920,7 @@ static int goya_sw_init(struct hl_device *hdev) free_cpu_pq_pool: gen_pool_destroy(hdev->cpu_accessible_dma_pool); free_cpu_pq_dma_mem: - hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE, + hdev->asic_funcs->dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, hdev->cpu_accessible_dma_address); free_dma_pool: @@ -960,7 +943,7 @@ static int goya_sw_fini(struct hl_device *hdev) gen_pool_destroy(hdev->cpu_accessible_dma_pool); - hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE, + hdev->asic_funcs->dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, hdev->cpu_accessible_dma_address); @@ -1240,7 +1223,7 @@ static int goya_init_cpu_queues(struct hl_device *hdev) WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES); WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, CPU_ACCESSIBLE_MEM_SIZE); + WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, HL_CPU_ACCESSIBLE_MEM_SIZE); /* Used for EQ CI */ WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0); @@ -2328,67 +2311,45 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) } /* - * goya_push_fw_to_device - Push FW code to device + * goya_push_uboot_to_device() - Push u-boot FW code to device. + * @hdev: Pointer to hl_device structure. * - * @hdev: pointer to hl_device structure - * - * Copy fw code from firmware file to device memory. - * Returns 0 on success + * Copy u-boot fw code from firmware file to SRAM BAR. * + * Return: 0 on success, non-zero for failure. */ -static int goya_push_fw_to_device(struct hl_device *hdev, const char *fw_name, - void __iomem *dst) +static int goya_push_uboot_to_device(struct hl_device *hdev) { - const struct firmware *fw; - const u64 *fw_data; - size_t fw_size, i; - int rc; + char fw_name[200]; + void __iomem *dst; - rc = request_firmware(&fw, fw_name, hdev->dev); + snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin"); + dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET; - if (rc) { - dev_err(hdev->dev, "Failed to request %s\n", fw_name); - goto out; - } + return hl_fw_push_fw_to_device(hdev, fw_name, dst); +} - fw_size = fw->size; - if ((fw_size % 4) != 0) { - dev_err(hdev->dev, "illegal %s firmware size %zu\n", - fw_name, fw_size); - rc = -EINVAL; - goto out; - } +/* + * goya_push_linux_to_device() - Push LINUX FW code to device. + * @hdev: Pointer to hl_device structure. + * + * Copy LINUX fw code from firmware file to HBM BAR. + * + * Return: 0 on success, non-zero for failure. + */ +static int goya_push_linux_to_device(struct hl_device *hdev) +{ + char fw_name[200]; + void __iomem *dst; - dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size); + snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb"); + dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; - fw_data = (const u64 *) fw->data; - - if ((fw->size % 8) != 0) - fw_size -= 8; - - for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) { - if (!(i & (0x80000 - 1))) { - dev_dbg(hdev->dev, - "copied so far %zu out of %zu for %s firmware", - i, fw_size, fw_name); - usleep_range(20, 100); - } - - writeq(*fw_data, dst); - } - - if ((fw->size % 8) != 0) - writel(*(const u32 *) fw_data, dst); - -out: - release_firmware(fw); - return rc; + return hl_fw_push_fw_to_device(hdev, fw_name, dst); } static int goya_pldm_init_cpu(struct hl_device *hdev) { - char fw_name[200]; - void __iomem *dst; u32 val, unit_rst_val; int rc; @@ -2406,15 +2367,11 @@ static int goya_pldm_init_cpu(struct hl_device *hdev) WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val); val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N); - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin"); - dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET; - rc = goya_push_fw_to_device(hdev, fw_name, dst); + rc = goya_push_uboot_to_device(hdev); if (rc) return rc; - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb"); - dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; - rc = goya_push_fw_to_device(hdev, fw_name, dst); + rc = goya_push_linux_to_device(hdev); if (rc) return rc; @@ -2476,8 +2433,6 @@ static void goya_read_device_fw_version(struct hl_device *hdev, static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) { struct goya_device *goya = hdev->asic_specific; - char fw_name[200]; - void __iomem *dst; u32 status; int rc; @@ -2574,9 +2529,7 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) goto out; } - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb"); - dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; - rc = goya_push_fw_to_device(hdev, fw_name, dst); + rc = goya_push_linux_to_device(hdev); if (rc) return rc; @@ -2762,7 +2715,7 @@ static int goya_hw_init(struct hl_device *hdev) return 0; disable_pci_access: - goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); disable_msix: goya_disable_msix(hdev); disable_queues: @@ -2869,7 +2822,7 @@ int goya_suspend(struct hl_device *hdev) { int rc; - rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); if (rc) dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); @@ -3150,10 +3103,6 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, u32 timeout, long *result) { struct goya_device *goya = hdev->asic_specific; - struct armcp_packet *pkt; - dma_addr_t pkt_dma_addr; - u32 tmp; - int rc = 0; if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) { if (result) @@ -3161,74 +3110,8 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, return 0; } - if (len > CPU_CB_SIZE) { - dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n", - len); - return -ENOMEM; - } - - pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, - &pkt_dma_addr); - if (!pkt) { - dev_err(hdev->dev, - "Failed to allocate DMA memory for packet to CPU\n"); - return -ENOMEM; - } - - memcpy(pkt, msg, len); - - mutex_lock(&hdev->send_cpu_message_lock); - - if (hdev->disabled) - goto out; - - if (hdev->device_cpu_disabled) { - rc = -EIO; - goto out; - } - - rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_CPU_PQ, len, - pkt_dma_addr); - if (rc) { - dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); - goto out; - } - - rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence, - timeout, &tmp); - - hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_CPU_PQ); - - if (rc == -ETIMEDOUT) { - dev_err(hdev->dev, "Timeout while waiting for device CPU\n"); - hdev->device_cpu_disabled = true; - goto out; - } - - if (tmp == ARMCP_PACKET_FENCE_VAL) { - u32 ctl = le32_to_cpu(pkt->ctl); - - rc = (ctl & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT; - if (rc) { - dev_err(hdev->dev, - "F/W ERROR %d for CPU packet %d\n", - rc, (ctl & ARMCP_PKT_CTL_OPCODE_MASK) - >> ARMCP_PKT_CTL_OPCODE_SHIFT); - rc = -EINVAL; - } else if (result) { - *result = (long) le64_to_cpu(pkt->result); - } - } else { - dev_err(hdev->dev, "CPU packet wrong fence value\n"); - rc = -EINVAL; - } - -out: - mutex_unlock(&hdev->send_cpu_message_lock); - - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt); - - return rc; + return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len, + timeout, result); } int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) @@ -3306,33 +3189,16 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) int goya_test_cpu_queue(struct hl_device *hdev) { - struct armcp_packet test_pkt; - long result; - int rc; + struct goya_device *goya = hdev->asic_specific; - /* cpu_queues_enable flag is always checked in send cpu message */ + /* + * check capability here as send_cpu_message() won't update the result + * value if no capability + */ + if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) + return 0; - memset(&test_pkt, 0, sizeof(test_pkt)); - - test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << - ARMCP_PKT_CTL_OPCODE_SHIFT); - test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, - sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); - - if (!rc) { - if (result == ARMCP_PACKET_FENCE_VAL) - dev_info(hdev->dev, - "queue test on CPU queue succeeded\n"); - else - dev_err(hdev->dev, - "CPU queue test failed (0x%08lX)\n", result); - } else { - dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc); - } - - return rc; + return hl_fw_test_cpu_queue(hdev); } static int goya_test_queues(struct hl_device *hdev) @@ -3373,27 +3239,13 @@ static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr, static void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle) { - u64 kernel_addr; - - /* roundup to CPU_PKT_SIZE */ - size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK; - - kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size); - - *dma_handle = hdev->cpu_accessible_dma_address + - (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem); - - return (void *) (uintptr_t) kernel_addr; + return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); } static void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) { - /* roundup to CPU_PKT_SIZE */ - size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK; - - gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr, - size); + hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); } static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sg, @@ -5032,72 +4884,26 @@ static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, int goya_send_heartbeat(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; - struct armcp_packet hb_pkt; - long result; - int rc; if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - memset(&hb_pkt, 0, sizeof(hb_pkt)); - - hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << - ARMCP_PKT_CTL_OPCODE_SHIFT); - hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, - sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); - - if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) - rc = -EIO; - - return rc; + return hl_fw_send_heartbeat(hdev); } static int goya_armcp_info_get(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; struct asic_fixed_properties *prop = &hdev->asic_prop; - struct armcp_packet pkt; - void *armcp_info_cpu_addr; - dma_addr_t armcp_info_dma_addr; u64 dram_size; - long result; int rc; if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - armcp_info_cpu_addr = - hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, - sizeof(struct armcp_info), &armcp_info_dma_addr); - if (!armcp_info_cpu_addr) { - dev_err(hdev->dev, - "Failed to allocate DMA memory for ArmCP info packet\n"); - return -ENOMEM; - } - - memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info)); - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.addr = cpu_to_le64(armcp_info_dma_addr + - prop->host_phys_base_address); - pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info)); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - GOYA_ARMCP_INFO_TIMEOUT, &result); - - if (rc) { - dev_err(hdev->dev, - "Failed to send armcp info pkt, error %d\n", rc); - goto out; - } - - memcpy(&prop->armcp_info, armcp_info_cpu_addr, - sizeof(prop->armcp_info)); + rc = hl_fw_armcp_info_get(hdev); + if (rc) + return rc; dram_size = le64_to_cpu(prop->armcp_info.dram_size); if (dram_size) { @@ -5113,19 +4919,7 @@ static int goya_armcp_info_get(struct hl_device *hdev) prop->dram_end_address = prop->dram_base_address + dram_size; } - rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors); - if (rc) { - dev_err(hdev->dev, - "Failed to build hwmon channel info, error %d\n", rc); - rc = -EFAULT; - goto out; - } - -out: - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, - sizeof(struct armcp_info), armcp_info_cpu_addr); - - return rc; + return 0; } static void goya_init_clock_gating(struct hl_device *hdev) @@ -5214,52 +5008,11 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) { struct goya_device *goya = hdev->asic_specific; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct armcp_packet pkt; - void *eeprom_info_cpu_addr; - dma_addr_t eeprom_info_dma_addr; - long result; - int rc; if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - eeprom_info_cpu_addr = - hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, - max_size, &eeprom_info_dma_addr); - if (!eeprom_info_cpu_addr) { - dev_err(hdev->dev, - "Failed to allocate DMA memory for EEPROM info packet\n"); - return -ENOMEM; - } - - memset(eeprom_info_cpu_addr, 0, max_size); - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.addr = cpu_to_le64(eeprom_info_dma_addr + - prop->host_phys_base_address); - pkt.data_max_size = cpu_to_le32(max_size); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - GOYA_ARMCP_EEPROM_TIMEOUT, &result); - - if (rc) { - dev_err(hdev->dev, - "Failed to send armcp EEPROM pkt, error %d\n", rc); - goto out; - } - - /* result contains the actual size */ - memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size)); - -out: - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size, - eeprom_info_cpu_addr); - - return rc; + return hl_fw_get_eeprom_data(hdev, data, max_size); } static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev) diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 830551b6b062..6fdc76b5bde0 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -49,9 +49,6 @@ #define MAX_POWER_DEFAULT 200000 /* 200W */ -#define GOYA_ARMCP_INFO_TIMEOUT 10000000 /* 10s */ -#define GOYA_ARMCP_EEPROM_TIMEOUT 10000000 /* 10s */ - #define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */ /* DRAM Memory Map */ @@ -142,13 +139,6 @@ #define HW_CAP_GOLDEN 0x00000400 #define HW_CAP_TPC 0x00000800 -#define CPU_PKT_SHIFT 5 -#define CPU_PKT_SIZE (1 << CPU_PKT_SHIFT) -#define CPU_PKT_MASK (~((1 << CPU_PKT_SHIFT) - 1)) -#define CPU_MAX_PKTS_IN_CB 32 -#define CPU_CB_SIZE (CPU_PKT_SIZE * CPU_MAX_PKTS_IN_CB) -#define CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * CPU_CB_SIZE) - enum goya_fw_component { FW_COMP_UBOOT, FW_COMP_PREBOOT @@ -192,7 +182,6 @@ void goya_init_security(struct hl_device *hdev); u64 goya_get_max_power(struct hl_device *hdev); void goya_set_max_power(struct hl_device *hdev, u64 value); -int goya_send_pci_access_msg(struct hl_device *hdev, u32 opcode); void goya_late_fini(struct hl_device *hdev); int goya_suspend(struct hl_device *hdev); int goya_resume(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index a8ee52c880cd..1445ba1cafd4 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -33,6 +33,9 @@ #define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */ +#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ + #define HL_MAX_QUEUES 128 #define HL_MAX_JOBS_PER_CS 64 @@ -1351,6 +1354,20 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size); void hl_mmu_swap_out(struct hl_ctx *ctx); void hl_mmu_swap_in(struct hl_ctx *ctx); +int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name, + void __iomem *dst); +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode); +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, + u16 len, u32 timeout, long *result); +int hl_fw_test_cpu_queue(struct hl_device *hdev); +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle); +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, + void *vaddr); +int hl_fw_send_heartbeat(struct hl_device *hdev); +int hl_fw_armcp_info_get(struct hl_device *hdev); +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); + long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr); diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h index 9dddb917e72c..ccb82390241e 100644 --- a/drivers/misc/habanalabs/include/armcp_if.h +++ b/drivers/misc/habanalabs/include/armcp_if.h @@ -302,6 +302,14 @@ enum armcp_pwm_attributes { armcp_pwm_enable }; +#define HL_CPU_PKT_SHIFT 5 +#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT) +#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1)) +#define HL_CPU_MAX_PKTS_IN_CB 32 +#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \ + HL_CPU_MAX_PKTS_IN_CB) +#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE) + /* Event Queue Packets */ struct eq_generic_event {