Commit 1631ba12 authored by Heiko Stuebner's avatar Heiko Stuebner Committed by Palmer Dabbelt

riscv: Add support for non-coherent devices using zicbom extension

The Zicbom ISA-extension was ratified in november 2021
and introduces instructions for dcache invalidate, clean
and flush operations.

Implement cache management operations for non-coherent devices
based on them.

Of course not all cores will support this, so implement an
alternative-based mechanism that replaces empty instructions
with ones done around Zicbom instructions.

As discussed in previous versions, assume the platform
being coherent by default so that non-coherent devices need
to get marked accordingly by firmware.
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarHeiko Stuebner <heiko@sntech.de>
Reviewed-by: default avatarGuo Ren <guoren@kernel.org>
Link: https://lore.kernel.org/r/20220706231536.2041855-4-heiko@sntech.deSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent d1afce67
...@@ -113,6 +113,7 @@ config RISCV ...@@ -113,6 +113,7 @@ config RISCV
select MODULES_USE_ELF_RELA if MODULES select MODULES_USE_ELF_RELA if MODULES
select MODULE_SECTIONS if MODULES select MODULE_SECTIONS if MODULES
select OF select OF
select OF_DMA_DEFAULT_COHERENT
select OF_EARLY_FLATTREE select OF_EARLY_FLATTREE
select OF_IRQ select OF_IRQ
select PCI_DOMAINS_GENERIC if PCI select PCI_DOMAINS_GENERIC if PCI
...@@ -218,6 +219,14 @@ config PGTABLE_LEVELS ...@@ -218,6 +219,14 @@ config PGTABLE_LEVELS
config LOCKDEP_SUPPORT config LOCKDEP_SUPPORT
def_bool y def_bool y
config RISCV_DMA_NONCOHERENT
bool
select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SETUP_DMA_OPS
select DMA_DIRECT_REMAP
source "arch/riscv/Kconfig.socs" source "arch/riscv/Kconfig.socs"
source "arch/riscv/Kconfig.erratas" source "arch/riscv/Kconfig.erratas"
...@@ -376,6 +385,28 @@ config RISCV_ISA_SVPBMT ...@@ -376,6 +385,28 @@ config RISCV_ISA_SVPBMT
If you don't know what to do here, say Y. If you don't know what to do here, say Y.
config CC_HAS_ZICBOM
bool
default y if 64BIT && $(cc-option,-mabi=lp64 -march=rv64ima_zicbom)
default y if 32BIT && $(cc-option,-mabi=ilp32 -march=rv32ima_zicbom)
config RISCV_ISA_ZICBOM
bool "Zicbom extension support for non-coherent DMA operation"
depends on CC_HAS_ZICBOM
depends on !XIP_KERNEL
select RISCV_DMA_NONCOHERENT
select RISCV_ALTERNATIVE
default y
help
Adds support to dynamically detect the presence of the ZICBOM
extension (Cache Block Management Operations) and enable its
usage.
The Zicbom extension can be used to handle for example
non-coherent DMA support on devices that need it.
If you don't know what to do here, say Y.
config FPU config FPU
bool "FPU support" bool "FPU support"
default y default y
......
...@@ -56,6 +56,10 @@ riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c ...@@ -56,6 +56,10 @@ riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei) toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei)
riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei
# Check if the toolchain supports Zicbom extension
toolchain-supports-zicbom := $(call cc-option-yn, -march=$(riscv-march-y)_zicbom)
riscv-march-$(toolchain-supports-zicbom) := $(riscv-march-y)_zicbom
KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
KBUILD_AFLAGS += -march=$(riscv-march-y) KBUILD_AFLAGS += -march=$(riscv-march-y)
......
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
#endif
/* /*
* RISC-V requires the stack pointer to be 16-byte aligned, so ensure that * RISC-V requires the stack pointer to be 16-byte aligned, so ensure that
* the flat loader aligns it accordingly. * the flat loader aligns it accordingly.
......
...@@ -42,6 +42,16 @@ void flush_icache_mm(struct mm_struct *mm, bool local); ...@@ -42,6 +42,16 @@ void flush_icache_mm(struct mm_struct *mm, bool local);
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#ifdef CONFIG_RISCV_ISA_ZICBOM
void riscv_init_cbom_blocksize(void);
#else
static inline void riscv_init_cbom_blocksize(void) { }
#endif
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
void riscv_noncoherent_supported(void);
#endif
/* /*
* Bits in sys_riscv_flush_icache()'s flags argument. * Bits in sys_riscv_flush_icache()'s flags argument.
*/ */
......
...@@ -20,7 +20,8 @@ ...@@ -20,7 +20,8 @@
#endif #endif
#define CPUFEATURE_SVPBMT 0 #define CPUFEATURE_SVPBMT 0
#define CPUFEATURE_NUMBER 1 #define CPUFEATURE_ZICBOM 1
#define CPUFEATURE_NUMBER 2
#ifdef __ASSEMBLY__ #ifdef __ASSEMBLY__
...@@ -93,6 +94,22 @@ asm volatile(ALTERNATIVE( \ ...@@ -93,6 +94,22 @@ asm volatile(ALTERNATIVE( \
#define ALT_THEAD_PMA(_val) #define ALT_THEAD_PMA(_val)
#endif #endif
#define ALT_CMO_OP(_op, _start, _size, _cachesize) \
asm volatile(ALTERNATIVE( \
__nops(5), \
"mv a0, %1\n\t" \
"j 2f\n\t" \
"3:\n\t" \
"cbo." __stringify(_op) " (a0)\n\t" \
"add a0, a0, %0\n\t" \
"2:\n\t" \
"bltu a0, %2, 3b\n\t", 0, \
CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM) \
: : "r"(_cachesize), \
"r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \
"r"((unsigned long)(_start) + (_size)) \
: "a0")
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif #endif
...@@ -54,6 +54,7 @@ extern unsigned long elf_hwcap; ...@@ -54,6 +54,7 @@ extern unsigned long elf_hwcap;
enum riscv_isa_ext_id { enum riscv_isa_ext_id {
RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE, RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
RISCV_ISA_EXT_SVPBMT, RISCV_ISA_EXT_SVPBMT,
RISCV_ISA_EXT_ZICBOM,
RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
}; };
......
...@@ -89,6 +89,7 @@ int riscv_of_parent_hartid(struct device_node *node) ...@@ -89,6 +89,7 @@ int riscv_of_parent_hartid(struct device_node *node)
static struct riscv_isa_ext_data isa_ext_arr[] = { static struct riscv_isa_ext_data isa_ext_arr[] = {
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX), __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
}; };
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/of.h> #include <linux/of.h>
#include <asm/alternative.h> #include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/errata_list.h> #include <asm/errata_list.h>
#include <asm/hwcap.h> #include <asm/hwcap.h>
#include <asm/patch.h> #include <asm/patch.h>
...@@ -198,6 +199,7 @@ void __init riscv_fill_hwcap(void) ...@@ -198,6 +199,7 @@ void __init riscv_fill_hwcap(void)
} else { } else {
SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
} }
#undef SET_ISA_EXT_MAP #undef SET_ISA_EXT_MAP
} }
...@@ -259,6 +261,25 @@ static bool __init_or_module cpufeature_probe_svpbmt(unsigned int stage) ...@@ -259,6 +261,25 @@ static bool __init_or_module cpufeature_probe_svpbmt(unsigned int stage)
return false; return false;
} }
static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage)
{
#ifdef CONFIG_RISCV_ISA_ZICBOM
switch (stage) {
case RISCV_ALTERNATIVES_EARLY_BOOT:
return false;
default:
if (riscv_isa_extension_available(NULL, ZICBOM)) {
riscv_noncoherent_supported();
return true;
} else {
return false;
}
}
#endif
return false;
}
/* /*
* Probe presence of individual extensions. * Probe presence of individual extensions.
* *
...@@ -273,6 +294,9 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage) ...@@ -273,6 +294,9 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
if (cpufeature_probe_svpbmt(stage)) if (cpufeature_probe_svpbmt(stage))
cpu_req_feature |= (1U << CPUFEATURE_SVPBMT); cpu_req_feature |= (1U << CPUFEATURE_SVPBMT);
if (cpufeature_probe_zicbom(stage))
cpu_req_feature |= (1U << CPUFEATURE_ZICBOM);
return cpu_req_feature; return cpu_req_feature;
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/crash_dump.h> #include <linux/crash_dump.h>
#include <asm/alternative.h> #include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h> #include <asm/cpu_ops.h>
#include <asm/early_ioremap.h> #include <asm/early_ioremap.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -296,6 +297,7 @@ void __init setup_arch(char **cmdline_p) ...@@ -296,6 +297,7 @@ void __init setup_arch(char **cmdline_p)
#endif #endif
riscv_fill_hwcap(); riscv_fill_hwcap();
riscv_init_cbom_blocksize();
apply_boot_alternatives(); apply_boot_alternatives();
} }
......
...@@ -30,3 +30,4 @@ endif ...@@ -30,3 +30,4 @@ endif
endif endif
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_RISCV_DMA_NONCOHERENT) += dma-noncoherent.o
// SPDX-License-Identifier: GPL-2.0-only
/*
* RISC-V specific functions to support DMA for non-coherent devices
*
* Copyright (c) 2021 Western Digital Corporation or its affiliates.
*/
#include <linux/dma-direct.h>
#include <linux/dma-map-ops.h>
#include <linux/mm.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <asm/cacheflush.h>
static unsigned int riscv_cbom_block_size = L1_CACHE_BYTES;
static bool noncoherent_supported;
void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
void *vaddr = phys_to_virt(paddr);
switch (dir) {
case DMA_TO_DEVICE:
ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
break;
case DMA_FROM_DEVICE:
ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
break;
case DMA_BIDIRECTIONAL:
ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
break;
default:
break;
}
}
void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
void *vaddr = phys_to_virt(paddr);
switch (dir) {
case DMA_TO_DEVICE:
break;
case DMA_FROM_DEVICE:
case DMA_BIDIRECTIONAL:
ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
break;
default:
break;
}
}
void arch_dma_prep_coherent(struct page *page, size_t size)
{
void *flush_addr = page_address(page);
ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size);
}
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
WARN_TAINT(!coherent && riscv_cbom_block_size > ARCH_DMA_MINALIGN,
TAINT_CPU_OUT_OF_SPEC,
"%s %s: ARCH_DMA_MINALIGN smaller than riscv,cbom-block-size (%d < %d)",
dev_driver_string(dev), dev_name(dev),
ARCH_DMA_MINALIGN, riscv_cbom_block_size);
WARN_TAINT(!coherent && !noncoherent_supported, TAINT_CPU_OUT_OF_SPEC,
"%s %s: device non-coherent but no non-coherent operations supported",
dev_driver_string(dev), dev_name(dev));
dev->dma_coherent = coherent;
}
#ifdef CONFIG_RISCV_ISA_ZICBOM
void riscv_init_cbom_blocksize(void)
{
struct device_node *node;
int ret;
u32 val;
for_each_of_cpu_node(node) {
int hartid = riscv_of_processor_hartid(node);
int cbom_hartid;
if (hartid < 0)
continue;
/* set block-size for cbom extension if available */
ret = of_property_read_u32(node, "riscv,cbom-block-size", &val);
if (ret)
continue;
if (!riscv_cbom_block_size) {
riscv_cbom_block_size = val;
cbom_hartid = hartid;
} else {
if (riscv_cbom_block_size != val)
pr_warn("cbom-block-size mismatched between harts %d and %d\n",
cbom_hartid, hartid);
}
}
}
#endif
void riscv_noncoherent_supported(void)
{
noncoherent_supported = true;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment