Commit f6f9be1c authored by Florian Fainelli's avatar Florian Fainelli Committed by Russell King

ARM: 8725/1: Add Broadcom Brahma-B15 readahead cache support

This patch adds support for the Broadcom Brahma-B15 CPU readahead cache
controller. This cache controller sits between the L2 and the memory bus
and its purpose is to provide a friendler burst size towards the DDR
interface than the native cache line size.

The readahead cache is mostly transparent, except for
flush_kern_cache_all, which is precisely what we are overriding here.

The readahead cache only intercepts reads, and does invalidate on
writes (IOW), as such, some data can remain stale in any of its buffers, such
that we need to flush it, which is an operation that needs to happen in
a particular order:

- disable the readahead cache
- flush it
- call the appropriate cache-v7.S function
- re-enable

This patch tries to minimize the impact to the cache-v7.S file by only
providing a stub in case CONFIG_CACHE_B15_RAC is enabled (default for
ARCH_BRCMSTB since it is the current user).
Signed-off-by: default avatarAlamy Liu <alamyliu@broadcom.com>
Signed-off-by: default avatarFlorian Fainelli <f.fainelli@gmail.com>
Signed-off-by: default avatarRussell King <rmk+kernel@armlinux.org.uk>
parent 32882912
...@@ -117,6 +117,10 @@ ...@@ -117,6 +117,10 @@
# endif # endif
#endif #endif
#if defined(CONFIG_CACHE_B15_RAC)
# define MULTI_CACHE 1
#endif
#if defined(CONFIG_CPU_V7M) #if defined(CONFIG_CPU_V7M)
# define MULTI_CACHE 1 # define MULTI_CACHE 1
#endif #endif
......
#ifndef __ASM_ARM_HARDWARE_CACHE_B15_RAC_H
#define __ASM_ARM_HARDWARE_CACHE_B15_RAC_H
#ifndef __ASSEMBLY__
void b15_flush_kern_cache_all(void);
#endif
#endif
...@@ -909,6 +909,14 @@ config OUTER_CACHE_SYNC ...@@ -909,6 +909,14 @@ config OUTER_CACHE_SYNC
The outer cache has a outer_cache_fns.sync function pointer The outer cache has a outer_cache_fns.sync function pointer
that can be used to drain the write buffer of the outer cache. that can be used to drain the write buffer of the outer cache.
config CACHE_B15_RAC
bool "Enable the Broadcom Brahma-B15 read-ahead cache controller"
depends on ARCH_BRCMSTB
default y
help
This option enables the Broadcom Brahma-B15 read-ahead cache
controller. If disabled, the read-ahead cache remains off.
config CACHE_FEROCEON_L2 config CACHE_FEROCEON_L2
bool "Enable the Feroceon L2 cache controller" bool "Enable the Feroceon L2 cache controller"
depends on ARCH_MV78XX0 || ARCH_MVEBU depends on ARCH_MV78XX0 || ARCH_MVEBU
......
...@@ -103,6 +103,7 @@ AFLAGS_proc-v6.o :=-Wa,-march=armv6 ...@@ -103,6 +103,7 @@ AFLAGS_proc-v6.o :=-Wa,-march=armv6
AFLAGS_proc-v7.o :=-Wa,-march=armv7-a AFLAGS_proc-v7.o :=-Wa,-march=armv7-a
obj-$(CONFIG_OUTER_CACHE) += l2c-common.o obj-$(CONFIG_OUTER_CACHE) += l2c-common.o
obj-$(CONFIG_CACHE_B15_RAC) += cache-b15-rac.o
obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o
obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o
obj-$(CONFIG_CACHE_L2X0_PMU) += cache-l2x0-pmu.o obj-$(CONFIG_CACHE_L2X0_PMU) += cache-l2x0-pmu.o
......
/*
* Broadcom Brahma-B15 CPU read-ahead cache management functions
*
* Copyright (C) 2015-2016 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/err.h>
#include <linux/spinlock.h>
#include <linux/io.h>
#include <linux/bitops.h>
#include <linux/of_address.h>
#include <asm/cacheflush.h>
#include <asm/hardware/cache-b15-rac.h>
extern void v7_flush_kern_cache_all(void);
/* RAC register offsets, relative to the HIF_CPU_BIUCTRL register base */
#define RAC_CONFIG0_REG (0x78)
#define RACENPREF_MASK (0x3)
#define RACPREFINST_SHIFT (0)
#define RACENINST_SHIFT (2)
#define RACPREFDATA_SHIFT (4)
#define RACENDATA_SHIFT (6)
#define RAC_CPU_SHIFT (8)
#define RACCFG_MASK (0xff)
#define RAC_CONFIG1_REG (0x7c)
#define RAC_FLUSH_REG (0x80)
#define FLUSH_RAC (1 << 0)
/* Bitmask to enable instruction and data prefetching with a 256-bytes stride */
#define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \
RACENPREF_MASK << RACENINST_SHIFT | \
1 << RACPREFDATA_SHIFT | \
RACENPREF_MASK << RACENDATA_SHIFT)
#define RAC_ENABLED 0
static void __iomem *b15_rac_base;
static DEFINE_SPINLOCK(rac_lock);
/* Initialization flag to avoid checking for b15_rac_base, and to prevent
* multi-platform kernels from crashing here as well.
*/
static unsigned long b15_rac_flags;
static inline u32 __b15_rac_disable(void)
{
u32 val = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
__raw_writel(0, b15_rac_base + RAC_CONFIG0_REG);
dmb();
return val;
}
static inline void __b15_rac_flush(void)
{
u32 reg;
__raw_writel(FLUSH_RAC, b15_rac_base + RAC_FLUSH_REG);
do {
/* This dmb() is required to force the Bus Interface Unit
* to clean oustanding writes, and forces an idle cycle
* to be inserted.
*/
dmb();
reg = __raw_readl(b15_rac_base + RAC_FLUSH_REG);
} while (reg & FLUSH_RAC);
}
static inline u32 b15_rac_disable_and_flush(void)
{
u32 reg;
reg = __b15_rac_disable();
__b15_rac_flush();
return reg;
}
static inline void __b15_rac_enable(u32 val)
{
__raw_writel(val, b15_rac_base + RAC_CONFIG0_REG);
/* dsb() is required here to be consistent with __flush_icache_all() */
dsb();
}
#define BUILD_RAC_CACHE_OP(name, bar) \
void b15_flush_##name(void) \
{ \
unsigned int do_flush; \
u32 val = 0; \
\
spin_lock(&rac_lock); \
do_flush = test_bit(RAC_ENABLED, &b15_rac_flags); \
if (do_flush) \
val = b15_rac_disable_and_flush(); \
v7_flush_##name(); \
if (!do_flush) \
bar; \
else \
__b15_rac_enable(val); \
spin_unlock(&rac_lock); \
}
#define nobarrier
/* The readahead cache present in the Brahma-B15 CPU is a special piece of
* hardware after the integrated L2 cache of the B15 CPU complex whose purpose
* is to prefetch instruction and/or data with a line size of either 64 bytes
* or 256 bytes. The rationale is that the data-bus of the CPU interface is
* optimized for 256-bytes transactions, and enabling the readahead cache
* provides a significant performance boost we want it enabled (typically
* twice the performance for a memcpy benchmark application).
*
* The readahead cache is transparent for Modified Virtual Addresses
* cache maintenance operations: ICIMVAU, DCIMVAC, DCCMVAC, DCCMVAU and
* DCCIMVAC.
*
* It is however not transparent for the following cache maintenance
* operations: DCISW, DCCSW, DCCISW, ICIALLUIS and ICIALLU which is precisely
* what we are patching here with our BUILD_RAC_CACHE_OP here.
*/
BUILD_RAC_CACHE_OP(kern_cache_all, nobarrier);
static void b15_rac_enable(void)
{
unsigned int cpu;
u32 enable = 0;
for_each_possible_cpu(cpu)
enable |= (RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT));
b15_rac_disable_and_flush();
__b15_rac_enable(enable);
}
static int __init b15_rac_init(void)
{
struct device_node *dn;
int ret = 0, cpu;
u32 reg, en_mask = 0;
dn = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl");
if (!dn)
return -ENODEV;
if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n"))
goto out;
b15_rac_base = of_iomap(dn, 0);
if (!b15_rac_base) {
pr_err("failed to remap BIU control base\n");
ret = -ENOMEM;
goto out;
}
spin_lock(&rac_lock);
reg = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
for_each_possible_cpu(cpu)
en_mask |= ((1 << RACPREFDATA_SHIFT) << (cpu * RAC_CPU_SHIFT));
WARN(reg & en_mask, "Read-ahead cache not previously disabled\n");
b15_rac_enable();
set_bit(RAC_ENABLED, &b15_rac_flags);
spin_unlock(&rac_lock);
pr_info("Broadcom Brahma-B15 readahead cache at: 0x%p\n",
b15_rac_base + RAC_CONFIG0_REG);
out:
of_node_put(dn);
return ret;
}
arch_initcall(b15_rac_init);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment