Commit 047486d8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'edac_for_4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

Pull EDAC updates from Borislav Petkov:

 - Altera: L2 cache and On-Chip RAM support (Thor Thayer).

 - EDAC: Workqueue handling cleanups (Borislav Petkov).

 - Xgene: Register bus error handling (Loc Ho).

 - Misc small fixes.

* tag 'edac_for_4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
  ARM: socfpga: Enable OCRAM ECC on startup
  ARM: socfpga: Enable L2 cache ECC on startup
  ARM: dts: Add Altera L2 Cache and OCRAM EDAC entries
  EDAC, altera: Add Altera L2 cache and OCRAM support
  EDAC: Use edac_debugfs_remove_recursive() in edac_debugfs_exit()
  EDAC, mpc85xx: Silence unused variable warning
  EDAC: Cleanup/sync workqueue functions
  EDAC: Kill workqueue setup/teardown functions
  EDAC: Balance workqueue setup and teardown
  arm64: Update the APM X-Gene EDAC node with the RB register resource
  EDAC, xgene: Add missing SoC register bus error handling
  Documentation, EDAC: Update xgene binding for missing register bus
  EDAC, amd64_edac: Shift wrapping issue in f1x_get_norm_dct_addr()
parents 9256d5a3 7cc5a5d3
Altera SoCFPGA ECC Manager
This driver uses the EDAC framework to implement the SOCFPGA ECC Manager.
The ECC Manager counts and corrects single bit errors and counts/handles
double bit errors which are uncorrectable.
Required Properties:
- compatible : Should be "altr,socfpga-ecc-manager"
- #address-cells: must be 1
- #size-cells: must be 1
- ranges : standard definition, should translate from local addresses
Subcomponents:
L2 Cache ECC
Required Properties:
- compatible : Should be "altr,socfpga-l2-ecc"
- reg : Address and size for ECC error interrupt clear registers.
- interrupts : Should be single bit error interrupt, then double bit error
interrupt. Note the rising edge type.
On Chip RAM ECC
Required Properties:
- compatible : Should be "altr,socfpga-ocram-ecc"
- reg : Address and size for ECC error interrupt clear registers.
- iram : phandle to On-Chip RAM definition.
- interrupts : Should be single bit error interrupt, then double bit error
interrupt. Note the rising edge type.
Example:
eccmgr: eccmgr@ffd08140 {
compatible = "altr,socfpga-ecc-manager";
#address-cells = <1>;
#size-cells = <1>;
ranges;
l2-ecc@ffd08140 {
compatible = "altr,socfpga-l2-ecc";
reg = <0xffd08140 0x4>;
interrupts = <0 36 1>, <0 37 1>;
};
ocram-ecc@ffd08144 {
compatible = "altr,socfpga-ocram-ecc";
reg = <0xffd08144 0x4>;
iram = <&ocram>;
interrupts = <0 178 1>, <0 179 1>;
};
};
......@@ -16,6 +16,10 @@ Required properties:
- regmap-mcba : Regmap of the MCB-A (memory bridge) resource.
- regmap-mcbb : Regmap of the MCB-B (memory bridge) resource.
- regmap-efuse : Regmap of the PMD efuse resource.
- regmap-rb : Regmap of the register bus resource. This property
is optional only for compatibility. If the RB
error conditions are not cleared, it will
continuously generate interrupt.
- reg : First resource shall be the CPU bus (PCP) resource.
- interrupts : Interrupt-specifier for MCU, PMD, L3, or SoC error
IRQ(s).
......@@ -64,6 +68,11 @@ Example:
reg = <0x0 0x1054a000 0x0 0x20>;
};
rb: rb@7e000000 {
compatible = "apm,xgene-rb", "syscon";
reg = <0x0 0x7e000000 0x0 0x10>;
};
edac@78800000 {
compatible = "apm,xgene-edac";
#address-cells = <2>;
......@@ -73,6 +82,7 @@ Example:
regmap-mcba = <&mcba>;
regmap-mcbb = <&mcbb>;
regmap-efuse = <&efuse>;
regmap-rb = <&rb>;
reg = <0x0 0x78800000 0x0 0x100>;
interrupts = <0x0 0x20 0x4>,
<0x0 0x21 0x4>,
......
......@@ -656,6 +656,26 @@ i2c3: i2c@ffc07000 {
status = "disabled";
};
eccmgr: eccmgr@ffd08140 {
compatible = "altr,socfpga-ecc-manager";
#address-cells = <1>;
#size-cells = <1>;
ranges;
l2-ecc@ffd08140 {
compatible = "altr,socfpga-l2-ecc";
reg = <0xffd08140 0x4>;
interrupts = <0 36 1>, <0 37 1>;
};
ocram-ecc@ffd08144 {
compatible = "altr,socfpga-ocram-ecc";
reg = <0xffd08144 0x4>;
iram = <&ocram>;
interrupts = <0 178 1>, <0 179 1>;
};
};
L2: l2-cache@fffef000 {
compatible = "arm,pl310-cache";
reg = <0xfffef000 0x1000>;
......
......@@ -5,3 +5,5 @@
obj-y := socfpga.o
obj-$(CONFIG_SMP) += headsmp.o platsmp.o
obj-$(CONFIG_SOCFPGA_SUSPEND) += pm.o self-refresh.o
obj-$(CONFIG_EDAC_ALTERA_L2C) += l2_cache.o
obj-$(CONFIG_EDAC_ALTERA_OCRAM) += ocram.o
......@@ -36,6 +36,8 @@
extern void socfpga_init_clocks(void);
extern void socfpga_sysmgr_init(void);
void socfpga_init_l2_ecc(void);
void socfpga_init_ocram_ecc(void);
extern void __iomem *sys_manager_base_addr;
extern void __iomem *rst_manager_base_addr;
......
/*
* Copyright Altera Corporation (C) 2016. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/io.h>
#include <linux/of_platform.h>
#include <linux/of_address.h>
void socfpga_init_l2_ecc(void)
{
struct device_node *np;
void __iomem *mapped_l2_edac_addr;
np = of_find_compatible_node(NULL, NULL, "altr,socfpga-l2-ecc");
if (!np) {
pr_err("Unable to find socfpga-l2-ecc in dtb\n");
return;
}
mapped_l2_edac_addr = of_iomap(np, 0);
of_node_put(np);
if (!mapped_l2_edac_addr) {
pr_err("Unable to find L2 ECC mapping in dtb\n");
return;
}
/* Enable ECC */
writel(0x01, mapped_l2_edac_addr);
iounmap(mapped_l2_edac_addr);
}
/*
* Copyright Altera Corporation (C) 2016. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/io.h>
#include <linux/genalloc.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#define ALTR_OCRAM_CLEAR_ECC 0x00000018
#define ALTR_OCRAM_ECC_EN 0x00000019
void socfpga_init_ocram_ecc(void)
{
struct device_node *np;
void __iomem *mapped_ocr_edac_addr;
/* Find the OCRAM EDAC device tree node */
np = of_find_compatible_node(NULL, NULL, "altr,socfpga-ocram-ecc");
if (!np) {
pr_err("Unable to find socfpga-ocram-ecc\n");
return;
}
mapped_ocr_edac_addr = of_iomap(np, 0);
of_node_put(np);
if (!mapped_ocr_edac_addr) {
pr_err("Unable to map OCRAM ecc regs.\n");
return;
}
/* Clear any pending OCRAM ECC interrupts, then enable ECC */
writel(ALTR_OCRAM_CLEAR_ECC, mapped_ocr_edac_addr);
writel(ALTR_OCRAM_ECC_EN, mapped_ocr_edac_addr);
iounmap(mapped_ocr_edac_addr);
}
......@@ -59,6 +59,11 @@ static void __init socfpga_init_irq(void)
{
irqchip_init();
socfpga_sysmgr_init();
if (IS_ENABLED(CONFIG_EDAC_ALTERA_L2C))
socfpga_init_l2_ecc();
if (IS_ENABLED(CONFIG_EDAC_ALTERA_OCRAM))
socfpga_init_ocram_ecc();
}
static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd)
......
......@@ -493,6 +493,11 @@ efuse: efuse@1054a000 {
reg = <0x0 0x1054a000 0x0 0x20>;
};
rb: rb@7e000000 {
compatible = "apm,xgene-rb", "syscon";
reg = <0x0 0x7e000000 0x0 0x10>;
};
edac@78800000 {
compatible = "apm,xgene-edac";
#address-cells = <2>;
......@@ -502,6 +507,7 @@ edac@78800000 {
regmap-mcba = <&mcba>;
regmap-mcbb = <&mcbb>;
regmap-efuse = <&efuse>;
regmap-rb = <&rb>;
reg = <0x0 0x78800000 0x0 0x100>;
interrupts = <0x0 0x20 0x4>,
<0x0 0x21 0x4>,
......
......@@ -367,14 +367,30 @@ config EDAC_OCTEON_PCI
Support for error detection and correction on the
Cavium Octeon family of SOCs.
config EDAC_ALTERA_MC
bool "Altera SDRAM Memory Controller EDAC"
config EDAC_ALTERA
bool "Altera SOCFPGA ECC"
depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA
help
Support for error detection and correction on the
Altera SDRAM memory controller. Note that the
preloader must initialize the SDRAM before loading
the kernel.
Altera SOCs. This must be selected for SDRAM ECC.
Note that the preloader must initialize the SDRAM
before loading the kernel.
config EDAC_ALTERA_L2C
bool "Altera L2 Cache ECC"
depends on EDAC_ALTERA=y
select CACHE_L2X0
help
Support for error detection and correction on the
Altera L2 cache Memory for Altera SoCs. This option
requires L2 cache so it will force that selection.
config EDAC_ALTERA_OCRAM
bool "Altera On-Chip RAM ECC"
depends on EDAC_ALTERA=y && SRAM && GENERIC_ALLOCATOR
help
Support for error detection and correction on the
Altera On-Chip RAM Memory for Altera SoCs.
config EDAC_SYNOPSYS
tristate "Synopsys DDR Memory Controller"
......
......@@ -67,6 +67,6 @@ obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o
obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o
obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o
obj-$(CONFIG_EDAC_ALTERA_MC) += altera_edac.o
obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o
obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o
obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o
This diff is collapsed.
......@@ -1452,7 +1452,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
u64 chan_off;
u64 dram_base = get_dram_base(pvt, range);
u64 hole_off = f10_dhar_offset(pvt);
u64 dct_sel_base_off = (pvt->dct_sel_hi & 0xFFFFFC00) << 16;
u64 dct_sel_base_off = (u64)(pvt->dct_sel_hi & 0xFFFFFC00) << 16;
if (hi_rng) {
/*
......
......@@ -53,7 +53,7 @@ int __init edac_debugfs_init(void)
void edac_debugfs_exit(void)
{
debugfs_remove(edac_debugfs);
debugfs_remove_recursive(edac_debugfs);
}
int edac_create_debugfs_nodes(struct mem_ctl_info *mci)
......
......@@ -535,59 +535,20 @@ static void edac_mc_workq_function(struct work_struct *work_req)
mutex_lock(&mem_ctls_mutex);
/* if this control struct has movd to offline state, we are done */
if (mci->op_state == OP_OFFLINE) {
if (mci->op_state != OP_RUNNING_POLL) {
mutex_unlock(&mem_ctls_mutex);
return;
}
/* Only poll controllers that are running polled and have a check */
if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
if (edac_mc_assert_error_check_and_clear())
mci->edac_check(mci);
mutex_unlock(&mem_ctls_mutex);
/* Reschedule */
/* Queue ourselves again. */
edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
}
/*
* edac_mc_workq_setup
* initialize a workq item for this mci
* passing in the new delay period in msec
*
* locking model:
*
* called with the mem_ctls_mutex held
*/
static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
{
edac_dbg(0, "\n");
/* if this instance is not in the POLL state, then simply return */
if (mci->op_state != OP_RUNNING_POLL)
return;
INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
edac_queue_work(&mci->work, msecs_to_jiffies(msec));
}
/*
* edac_mc_workq_teardown
* stop the workq processing on this mci
*
* locking model:
*
* called WITHOUT lock held
*/
static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
{
mci->op_state = OP_OFFLINE;
edac_stop_work(&mci->work);
}
/*
* edac_mc_reset_delay_period(unsigned long value)
*
......@@ -771,12 +732,12 @@ int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
goto fail1;
}
/* If there IS a check routine, then we are running POLLED */
if (mci->edac_check != NULL) {
/* This instance is NOW RUNNING */
if (mci->edac_check) {
mci->op_state = OP_RUNNING_POLL;
edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
} else {
mci->op_state = OP_RUNNING_INTERRUPT;
}
......@@ -823,15 +784,16 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
return NULL;
}
/* mark MCI offline: */
mci->op_state = OP_OFFLINE;
if (!del_mc_from_global_list(mci))
edac_mc_owner = NULL;
mutex_unlock(&mem_ctls_mutex);
/* flush workq processes */
edac_mc_workq_teardown(mci);
mutex_unlock(&mem_ctls_mutex);
/* marking MCI offline */
mci->op_state = OP_OFFLINE;
if (mci->edac_check)
edac_stop_work(&mci->work);
/* remove from sysfs */
edac_remove_sysfs_mci_device(mci);
......
......@@ -195,55 +195,24 @@ static void edac_pci_workq_function(struct work_struct *work_req)
mutex_lock(&edac_pci_ctls_mutex);
if (pci->op_state == OP_RUNNING_POLL) {
/* we might be in POLL mode, but there may NOT be a poll func
*/
if ((pci->edac_check != NULL) && edac_pci_get_check_errors())
pci->edac_check(pci);
/* if we are on a one second period, then use round */
msec = edac_pci_get_poll_msec();
if (msec == 1000)
delay = round_jiffies_relative(msecs_to_jiffies(msec));
else
delay = msecs_to_jiffies(msec);
/* Reschedule only if we are in POLL mode */
edac_queue_work(&pci->work, delay);
if (pci->op_state != OP_RUNNING_POLL) {
mutex_unlock(&edac_pci_ctls_mutex);
return;
}
mutex_unlock(&edac_pci_ctls_mutex);
}
/*
* edac_pci_workq_setup()
* initialize a workq item for this edac_pci instance
* passing in the new delay period in msec
*
* locking model:
* called when 'edac_pci_ctls_mutex' is locked
*/
static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci,
unsigned int msec)
{
edac_dbg(0, "\n");
if (edac_pci_get_check_errors())
pci->edac_check(pci);
INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function);
/* if we are on a one second period, then use round */
msec = edac_pci_get_poll_msec();
if (msec == 1000)
delay = round_jiffies_relative(msecs_to_jiffies(msec));
else
delay = msecs_to_jiffies(msec);
edac_queue_work(&pci->work, msecs_to_jiffies(edac_pci_get_poll_msec()));
}
edac_queue_work(&pci->work, delay);
/*
* edac_pci_workq_teardown()
* stop the workq processing on this edac_pci instance
*/
static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci)
{
edac_dbg(0, "\n");
pci->op_state = OP_OFFLINE;
edac_stop_work(&pci->work);
mutex_unlock(&edac_pci_ctls_mutex);
}
/*
......@@ -289,10 +258,12 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
goto fail1;
}
if (pci->edac_check != NULL) {
if (pci->edac_check) {
pci->op_state = OP_RUNNING_POLL;
edac_pci_workq_setup(pci, 1000);
INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function);
edac_queue_work(&pci->work, msecs_to_jiffies(edac_pci_get_poll_msec()));
} else {
pci->op_state = OP_RUNNING_INTERRUPT;
}
......@@ -350,8 +321,8 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev)
mutex_unlock(&edac_pci_ctls_mutex);
/* stop the workq timer */
edac_pci_workq_teardown(pci);
if (pci->edac_check)
edac_stop_work(&pci->work);
edac_printk(KERN_INFO, EDAC_PCI,
"Removed device %d for %s %s: DEV %s\n",
......
......@@ -1244,7 +1244,7 @@ static struct platform_driver * const drivers[] = {
static int __init mpc85xx_mc_init(void)
{
int res = 0;
u32 pvr = 0;
u32 __maybe_unused pvr = 0;
printk(KERN_INFO "Freescale(R) MPC85xx EDAC driver, "
"(C) 2006 Montavista Software\n");
......
......@@ -61,6 +61,7 @@ struct xgene_edac {
struct regmap *mcba_map;
struct regmap *mcbb_map;
struct regmap *efuse_map;
struct regmap *rb_map;
void __iomem *pcp_csr;
spinlock_t lock;
struct dentry *dfs;
......@@ -1057,7 +1058,7 @@ static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
case 0x041:
return true;
}
} else if (L3C_ELR_ERRSYN(l3celr) == 9)
} else if (L3C_ELR_ERRWAY(l3celr) == 9)
return true;
return false;
......@@ -1353,6 +1354,17 @@ static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
#define GLBL_MDED_ERRH 0x0848
#define GLBL_MDED_ERRHMASK 0x084c
/* IO Bus Registers */
#define RBCSR 0x0000
#define STICKYERR_MASK BIT(0)
#define RBEIR 0x0008
#define AGENT_OFFLINE_ERR_MASK BIT(30)
#define UNIMPL_RBPAGE_ERR_MASK BIT(29)
#define WORD_ALIGNED_ERR_MASK BIT(28)
#define PAGE_ACCESS_ERR_MASK BIT(27)
#define WRITE_ACCESS_MASK BIT(26)
#define RBERRADDR_RD(src) ((src) & 0x03FFFFFF)
static const char * const soc_mem_err_v1[] = {
"10GbE0",
"10GbE1",
......@@ -1470,6 +1482,51 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
u32 err_addr_hi;
u32 reg;
/* If the register bus resource isn't available, just skip it */
if (!ctx->edac->rb_map)
goto rb_skip;
/*
* Check RB access errors
* 1. Out of range
* 2. Un-implemented page
* 3. Un-aligned access
* 4. Offline slave IP
*/
if (regmap_read(ctx->edac->rb_map, RBCSR, &reg))
return;
if (reg & STICKYERR_MASK) {
bool write;
u32 address;
dev_err(edac_dev->dev, "IOB bus access error(s)\n");
if (regmap_read(ctx->edac->rb_map, RBEIR, &reg))
return;
write = reg & WRITE_ACCESS_MASK ? 1 : 0;
address = RBERRADDR_RD(reg);
if (reg & AGENT_OFFLINE_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s access to offline agent error\n",
write ? "write" : "read");
if (reg & UNIMPL_RBPAGE_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s access to unimplemented page error\n",
write ? "write" : "read");
if (reg & WORD_ALIGNED_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s word aligned access error\n",
write ? "write" : "read");
if (reg & PAGE_ACCESS_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s to page out of range access error\n",
write ? "write" : "read");
if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
return;
if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
return;
}
rb_skip:
/* IOB Bridge agent transaction error interrupt */
reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
if (!reg)
......@@ -1852,6 +1909,17 @@ static int xgene_edac_probe(struct platform_device *pdev)
goto out_err;
}
/*
* NOTE: The register bus resource is optional for compatibility
* reason.
*/
edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
"regmap-rb");
if (IS_ERR(edac->rb_map)) {
dev_warn(edac->dev, "missing syscon regmap rb\n");
edac->rb_map = NULL;
}
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(edac->pcp_csr)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment