Commit 80cc7d87 authored by Mauro Carvalho Chehab's avatar Mauro Carvalho Chehab

edac: lock module owner to avoid error report conflicts

APEI GHES and i7core_edac/sb_edac currently can be loaded at
the same time, but those are Highlander modules:
	"There can be only one".

There are two reasons for that:

1) Each driver assumes that it is the only one registering at
   the EDAC core, as it is driver's responsibility to number
   the memory controllers, and all of them start from 0;

2) If BIOS is handling the memory errors, the OS can't also be
   doing it, as one will mangle with the other.

So, we need to add an module owner's lock at the EDAC core,
in order to avoid having two different modules handling memory
errors at the same time. The best way for doing this lock seems
to use the driver's name, as this is unique, and won't require
changes on every driver.
Signed-off-by: default avatarMauro Carvalho Chehab <mchehab@redhat.com>
parent c2c93dbc
...@@ -42,6 +42,12 @@ ...@@ -42,6 +42,12 @@
static DEFINE_MUTEX(mem_ctls_mutex); static DEFINE_MUTEX(mem_ctls_mutex);
static LIST_HEAD(mc_devices); static LIST_HEAD(mc_devices);
/*
* Used to lock EDAC MC to just one module, avoiding two drivers e. g.
* apei/ghes and i7core_edac to be used at the same time.
*/
static void const *edac_mc_owner;
unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
unsigned len) unsigned len)
{ {
...@@ -659,9 +665,9 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) ...@@ -659,9 +665,9 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
return 1; return 1;
} }
static void del_mc_from_global_list(struct mem_ctl_info *mci) static int del_mc_from_global_list(struct mem_ctl_info *mci)
{ {
atomic_dec(&edac_handlers); int handlers = atomic_dec_return(&edac_handlers);
list_del_rcu(&mci->link); list_del_rcu(&mci->link);
/* these are for safe removal of devices from global list while /* these are for safe removal of devices from global list while
...@@ -669,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) ...@@ -669,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci)
*/ */
synchronize_rcu(); synchronize_rcu();
INIT_LIST_HEAD(&mci->link); INIT_LIST_HEAD(&mci->link);
return handlers;
} }
/** /**
...@@ -712,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find); ...@@ -712,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find);
/* FIXME - should a warning be printed if no error detection? correction? */ /* FIXME - should a warning be printed if no error detection? correction? */
int edac_mc_add_mc(struct mem_ctl_info *mci) int edac_mc_add_mc(struct mem_ctl_info *mci)
{ {
int ret = -EINVAL;
edac_dbg(0, "\n"); edac_dbg(0, "\n");
#ifdef CONFIG_EDAC_DEBUG #ifdef CONFIG_EDAC_DEBUG
...@@ -742,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) ...@@ -742,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
#endif #endif
mutex_lock(&mem_ctls_mutex); mutex_lock(&mem_ctls_mutex);
if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
ret = -EPERM;
goto fail0;
}
if (add_mc_to_global_list(mci)) if (add_mc_to_global_list(mci))
goto fail0; goto fail0;
...@@ -768,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) ...@@ -768,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
edac_mc_owner = mci->mod_name;
mutex_unlock(&mem_ctls_mutex); mutex_unlock(&mem_ctls_mutex);
return 0; return 0;
...@@ -776,7 +792,7 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) ...@@ -776,7 +792,7 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
fail0: fail0:
mutex_unlock(&mem_ctls_mutex); mutex_unlock(&mem_ctls_mutex);
return 1; return ret;
} }
EXPORT_SYMBOL_GPL(edac_mc_add_mc); EXPORT_SYMBOL_GPL(edac_mc_add_mc);
...@@ -802,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) ...@@ -802,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
return NULL; return NULL;
} }
del_mc_from_global_list(mci); if (!del_mc_from_global_list(mci))
edac_mc_owner = NULL;
mutex_unlock(&mem_ctls_mutex); mutex_unlock(&mem_ctls_mutex);
/* flush workq processes */ /* flush workq processes */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment