Commit 54bb6744 authored by Omer Shpigelman's avatar Omer Shpigelman Committed by Oded Gabbay

habanalabs: split MMU properties to PCI/DRAM

Split the properties used for MMU mappings to DRAM and PCI (host) types.
This is a prerequisite for future ASICs support.
Note that in Goya ASIC, the PMMU and DMMU are the same (except of page
sizes) as only one MMU mechanism is used for both of the mapping types.
Hence this patch should not have any effect on current behavior.
Signed-off-by: default avatarOmer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 30919ede
......@@ -307,39 +307,51 @@ static inline u64 get_hop0_addr(struct hl_ctx *ctx)
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
}
static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
u64 virt_addr)
static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
u64 virt_addr, u64 mask, u64 shift)
{
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
((virt_addr & HOP0_MASK) >> HOP0_SHIFT);
((virt_addr & mask) >> shift);
}
static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
u64 virt_addr)
static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
((virt_addr & HOP1_MASK) >> HOP1_SHIFT);
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
mmu_specs->hop0_shift);
}
static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
u64 virt_addr)
static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
((virt_addr & HOP2_MASK) >> HOP2_SHIFT);
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
mmu_specs->hop1_shift);
}
static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
u64 virt_addr)
static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
((virt_addr & HOP3_MASK) >> HOP3_SHIFT);
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
mmu_specs->hop2_shift);
}
static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
u64 virt_addr)
static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
((virt_addr & HOP4_MASK) >> HOP4_SHIFT);
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
mmu_specs->hop3_shift);
}
static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
mmu_specs->hop4_shift);
}
static inline u64 get_next_hop_addr(u64 curr_pte)
......@@ -355,7 +367,10 @@ static int mmu_show(struct seq_file *s, void *data)
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
struct hl_ctx *ctx;
bool is_dram_addr;
u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
......@@ -377,33 +392,39 @@ static int mmu_show(struct seq_file *s, void *data)
return 0;
}
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock);
/* the following lookup is copied from unmap() in mmu.c */
hop0_addr = get_hop0_addr(ctx);
hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
hop1_addr = get_next_hop_addr(hop0_pte);
if (hop1_addr == ULLONG_MAX)
goto not_mapped;
hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
hop2_addr = get_next_hop_addr(hop1_pte);
if (hop2_addr == ULLONG_MAX)
goto not_mapped;
hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
hop3_addr = get_next_hop_addr(hop2_pte);
if (hop3_addr == ULLONG_MAX)
goto not_mapped;
hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
if (!(hop3_pte & LAST_MASK)) {
......@@ -412,7 +433,8 @@ static int mmu_show(struct seq_file *s, void *data)
if (hop4_addr == ULLONG_MAX)
goto not_mapped;
hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
virt_addr);
hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
if (!(hop4_pte & PAGE_PRESENT_MASK))
goto not_mapped;
......@@ -534,41 +556,50 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
u64 *phys_addr)
{
struct hl_ctx *ctx = hdev->compute_ctx;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 hop_addr, hop_pte_addr, hop_pte;
u64 offset_mask = HOP4_MASK | FLAGS_MASK;
int rc = 0;
bool is_dram_addr;
if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
return -EINVAL;
}
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock);
/* hop 0 */
hop_addr = get_hop0_addr(ctx);
hop_pte_addr = get_hop0_pte_addr(ctx, hop_addr, virt_addr);
hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 1 */
hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX)
goto not_mapped;
hop_pte_addr = get_hop1_pte_addr(ctx, hop_addr, virt_addr);
hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 2 */
hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX)
goto not_mapped;
hop_pte_addr = get_hop2_pte_addr(ctx, hop_addr, virt_addr);
hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 3 */
hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX)
goto not_mapped;
hop_pte_addr = get_hop3_pte_addr(ctx, hop_addr, virt_addr);
hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
if (!(hop_pte & LAST_MASK)) {
......@@ -576,7 +607,8 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX)
goto not_mapped;
hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr);
hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
offset_mask = FLAGS_MASK;
......
......@@ -380,6 +380,23 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
prop->dmmu.hop0_shift = HOP0_SHIFT;
prop->dmmu.hop1_shift = HOP1_SHIFT;
prop->dmmu.hop2_shift = HOP2_SHIFT;
prop->dmmu.hop3_shift = HOP3_SHIFT;
prop->dmmu.hop4_shift = HOP4_SHIFT;
prop->dmmu.hop0_mask = HOP0_MASK;
prop->dmmu.hop1_mask = HOP1_MASK;
prop->dmmu.hop2_mask = HOP2_MASK;
prop->dmmu.hop3_mask = HOP3_MASK;
prop->dmmu.hop4_mask = HOP4_MASK;
prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
/* No difference between PMMU and DMMU except of page size */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
prop->dmmu.page_size = PAGE_SIZE_2MB;
prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->va_space_host_start_address = VA_HOST_SPACE_START;
prop->va_space_host_end_address = VA_HOST_SPACE_END;
prop->va_space_dram_start_address = VA_DDR_SPACE_START;
......
......@@ -130,6 +130,36 @@ enum hl_device_hw_state {
HL_DEVICE_HW_STATE_DIRTY
};
/**
* struct hl_mmu_properties - ASIC specific MMU address translation properties.
* @hop0_shift: shift of hop 0 mask.
* @hop1_shift: shift of hop 1 mask.
* @hop2_shift: shift of hop 2 mask.
* @hop3_shift: shift of hop 3 mask.
* @hop4_shift: shift of hop 4 mask.
* @hop0_mask: mask to get the PTE address in hop 0.
* @hop1_mask: mask to get the PTE address in hop 1.
* @hop2_mask: mask to get the PTE address in hop 2.
* @hop3_mask: mask to get the PTE address in hop 3.
* @hop4_mask: mask to get the PTE address in hop 4.
* @page_size: default page size used to allocate memory.
* @huge_page_size: page size used to allocate memory with huge pages.
*/
struct hl_mmu_properties {
u64 hop0_shift;
u64 hop1_shift;
u64 hop2_shift;
u64 hop3_shift;
u64 hop4_shift;
u64 hop0_mask;
u64 hop1_mask;
u64 hop2_mask;
u64 hop3_mask;
u64 hop4_mask;
u32 page_size;
u32 huge_page_size;
};
/**
* struct asic_fixed_properties - ASIC specific immutable properties.
* @hw_queues_props: H/W queues properties.
......@@ -137,6 +167,8 @@ enum hl_device_hw_state {
* available sensors.
* @uboot_ver: F/W U-boot version.
* @preboot_ver: F/W Preboot version.
* @dmmu: DRAM MMU address translation properties.
* @pmmu: PCI (host) MMU address translation properties.
* @sram_base_address: SRAM physical start address.
* @sram_end_address: SRAM physical end address.
* @sram_user_base_address - SRAM physical start address for user access.
......@@ -173,53 +205,55 @@ enum hl_device_hw_state {
* @psoc_pci_pll_nf: PCI PLL NF value.
* @psoc_pci_pll_od: PCI PLL OD value.
* @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
* @completion_queues_count: number of completion queues.
* @high_pll: high PLL frequency used by the device.
* @cb_pool_cb_cnt: number of CBs in the CB pool.
* @cb_pool_cb_size: size of each CB in the CB pool.
* @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues.
*/
struct asic_fixed_properties {
struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES];
struct armcp_info armcp_info;
char uboot_ver[VERSION_MAX_LEN];
char preboot_ver[VERSION_MAX_LEN];
u64 sram_base_address;
u64 sram_end_address;
u64 sram_user_base_address;
u64 dram_base_address;
u64 dram_end_address;
u64 dram_user_base_address;
u64 dram_size;
u64 dram_pci_bar_size;
u64 max_power_default;
u64 va_space_host_start_address;
u64 va_space_host_end_address;
u64 va_space_dram_start_address;
u64 va_space_dram_end_address;
u64 dram_size_for_default_page_mapping;
u64 pcie_dbi_base_address;
u64 pcie_aux_dbi_reg_addr;
u64 mmu_pgt_addr;
u64 mmu_dram_default_page_addr;
u32 mmu_pgt_size;
u32 mmu_pte_size;
u32 mmu_hop_table_size;
u32 mmu_hop0_tables_total_size;
u32 dram_page_size;
u32 cfg_size;
u32 sram_size;
u32 max_asid;
u32 num_of_events;
u32 psoc_pci_pll_nr;
u32 psoc_pci_pll_nf;
u32 psoc_pci_pll_od;
u32 psoc_pci_pll_div_factor;
u32 high_pll;
u32 cb_pool_cb_cnt;
u32 cb_pool_cb_size;
u8 completion_queues_count;
u8 tpc_enabled_mask;
struct armcp_info armcp_info;
char uboot_ver[VERSION_MAX_LEN];
char preboot_ver[VERSION_MAX_LEN];
struct hl_mmu_properties dmmu;
struct hl_mmu_properties pmmu;
u64 sram_base_address;
u64 sram_end_address;
u64 sram_user_base_address;
u64 dram_base_address;
u64 dram_end_address;
u64 dram_user_base_address;
u64 dram_size;
u64 dram_pci_bar_size;
u64 max_power_default;
u64 va_space_host_start_address;
u64 va_space_host_end_address;
u64 va_space_dram_start_address;
u64 va_space_dram_end_address;
u64 dram_size_for_default_page_mapping;
u64 pcie_dbi_base_address;
u64 pcie_aux_dbi_reg_addr;
u64 mmu_pgt_addr;
u64 mmu_dram_default_page_addr;
u32 mmu_pgt_size;
u32 mmu_pte_size;
u32 mmu_hop_table_size;
u32 mmu_hop0_tables_total_size;
u32 dram_page_size;
u32 cfg_size;
u32 sram_size;
u32 max_asid;
u32 num_of_events;
u32 psoc_pci_pll_nr;
u32 psoc_pci_pll_nf;
u32 psoc_pci_pll_od;
u32 psoc_pci_pll_div_factor;
u32 high_pll;
u32 cb_pool_cb_cnt;
u32 cb_pool_cb_size;
u8 tpc_enabled_mask;
u8 completion_queues_count;
};
/**
......
......@@ -12,7 +12,6 @@
#define PAGE_SHIFT_2MB 21
#define PAGE_SIZE_2MB (_AC(1, UL) << PAGE_SHIFT_2MB)
#define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB)
#define PAGE_MASK_2MB (~(PAGE_SIZE_2MB - 1))
#define PAGE_PRESENT_MASK 0x0000000000001ull
#define SWAP_OUT_MASK 0x0000000000004ull
......
......@@ -13,7 +13,6 @@
#include <linux/slab.h>
#include <linux/genalloc.h>
#define PGS_IN_2MB_PAGE (PAGE_SIZE_2MB >> PAGE_SHIFT)
#define HL_MMU_DEBUG 0
/*
......@@ -516,8 +515,8 @@ static inline int add_va_block(struct hl_device *hdev,
* - Return the start address of the virtual block
*/
static u64 get_va_block(struct hl_device *hdev,
struct hl_va_range *va_range, u64 size, u64 hint_addr,
bool is_userptr)
struct hl_va_range *va_range, u64 size, u64 hint_addr,
bool is_userptr)
{
struct hl_vm_va_block *va_block, *new_va_block = NULL;
u64 valid_start, valid_size, prev_start, prev_end, page_mask,
......@@ -525,18 +524,17 @@ static u64 get_va_block(struct hl_device *hdev,
u32 page_size;
bool add_prev = false;
if (is_userptr) {
if (is_userptr)
/*
* We cannot know if the user allocated memory with huge pages
* or not, hence we continue with the biggest possible
* granularity.
*/
page_size = PAGE_SIZE_2MB;
page_mask = PAGE_MASK_2MB;
} else {
page_size = hdev->asic_prop.dram_page_size;
page_mask = ~((u64)page_size - 1);
}
page_size = hdev->asic_prop.pmmu.huge_page_size;
else
page_size = hdev->asic_prop.dmmu.page_size;
page_mask = ~((u64)page_size - 1);
mutex_lock(&va_range->lock);
......@@ -558,7 +556,6 @@ static u64 get_va_block(struct hl_device *hdev,
if (valid_size >= size &&
(!new_va_block || valid_size < res_valid_size)) {
new_va_block = va_block;
res_valid_start = valid_start;
res_valid_size = valid_size;
......@@ -629,7 +626,7 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
/*
* init_phys_pg_pack_from_userptr - initialize physical page pack from host
* memory
* @asid: current context ASID
* @ctx: current context
* @userptr: userptr to initialize from
* @pphys_pg_pack: result pointer
*
......@@ -638,16 +635,20 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
* - Create a physical page pack from the physical pages related to the given
* virtual block
*/
static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
struct hl_userptr *userptr,
struct hl_vm_phys_pg_pack **pphys_pg_pack)
{
struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu;
struct hl_vm_phys_pg_pack *phys_pg_pack;
struct scatterlist *sg;
dma_addr_t dma_addr;
u64 page_mask, total_npages;
u32 npages, page_size = PAGE_SIZE;
u32 npages, page_size = PAGE_SIZE,
huge_page_size = mmu_prop->huge_page_size;
bool first = true, is_huge_page_opt = true;
int rc, i, j;
u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
if (!phys_pg_pack)
......@@ -655,7 +656,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
phys_pg_pack->vm_type = userptr->vm_type;
phys_pg_pack->created_from_userptr = true;
phys_pg_pack->asid = asid;
phys_pg_pack->asid = ctx->asid;
atomic_set(&phys_pg_pack->mapping_cnt, 1);
/* Only if all dma_addrs are aligned to 2MB and their
......@@ -670,14 +671,14 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
total_npages += npages;
if ((npages % PGS_IN_2MB_PAGE) ||
(dma_addr & (PAGE_SIZE_2MB - 1)))
if ((npages % pgs_in_huge_page) ||
(dma_addr & (huge_page_size - 1)))
is_huge_page_opt = false;
}
if (is_huge_page_opt) {
page_size = PAGE_SIZE_2MB;
total_npages /= PGS_IN_2MB_PAGE;
page_size = huge_page_size;
do_div(total_npages, pgs_in_huge_page);
}
page_mask = ~(((u64) page_size) - 1);
......@@ -709,7 +710,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
dma_addr += page_size;
if (is_huge_page_opt)
npages -= PGS_IN_2MB_PAGE;
npages -= pgs_in_huge_page;
else
npages--;
}
......@@ -872,7 +873,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
return rc;
}
rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr,
rc = init_phys_pg_pack_from_userptr(ctx, userptr,
&phys_pg_pack);
if (rc) {
dev_err(hdev->dev,
......@@ -1029,7 +1030,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
if (*vm_type == VM_TYPE_USERPTR) {
is_userptr = true;
userptr = hnode->ptr;
rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr,
rc = init_phys_pg_pack_from_userptr(ctx, userptr,
&phys_pg_pack);
if (rc) {
dev_err(hdev->dev,
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment