Commit 5b160bd4 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/mce changes from Ingo Molnar:
 "This tree improves the AMD thresholding bank code and includes a
  memory fault signal handling fixlet."

* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Fix siginfo_t->si_addr value for non-recoverable memory faults
  x86, MCE, AMD: Update copyrights and boilerplate
  x86, MCE, AMD: Give proper names to the thresholding banks
  x86, MCE, AMD: Make error_count read only
  x86, MCE, AMD: Cleanup reading of error_count
  x86, MCE, AMD: Print decimal thresholding values
  x86, MCE, AMD: Move shared bank to node descriptor
  x86, MCE, AMD: Remove local_allocate_... wrapper
  x86, MCE, AMD: Remove shared banks sysfs linking
  x86, amd_nb: Export model 0x10 and later PCI id
parents 7100e505 bb65a764
...@@ -26,10 +26,31 @@ struct amd_l3_cache { ...@@ -26,10 +26,31 @@ struct amd_l3_cache {
u8 subcaches[4]; u8 subcaches[4];
}; };
struct threshold_block {
unsigned int block;
unsigned int bank;
unsigned int cpu;
u32 address;
u16 interrupt_enable;
bool interrupt_capable;
u16 threshold_limit;
struct kobject kobj;
struct list_head miscj;
};
struct threshold_bank {
struct kobject *kobj;
struct threshold_block *blocks;
/* initialized to the number of CPUs on the node sharing this bank */
atomic_t cpus;
};
struct amd_northbridge { struct amd_northbridge {
struct pci_dev *misc; struct pci_dev *misc;
struct pci_dev *link; struct pci_dev *link;
struct amd_l3_cache l3_cache; struct amd_l3_cache l3_cache;
struct threshold_bank *bank4;
}; };
struct amd_northbridge_info { struct amd_northbridge_info {
......
...@@ -19,6 +19,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { ...@@ -19,6 +19,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
{} {}
}; };
EXPORT_SYMBOL(amd_nb_misc_ids); EXPORT_SYMBOL(amd_nb_misc_ids);
......
...@@ -1190,6 +1190,7 @@ void mce_notify_process(void) ...@@ -1190,6 +1190,7 @@ void mce_notify_process(void)
{ {
unsigned long pfn; unsigned long pfn;
struct mce_info *mi = mce_find_info(); struct mce_info *mi = mce_find_info();
int flags = MF_ACTION_REQUIRED;
if (!mi) if (!mi)
mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
...@@ -1204,8 +1205,9 @@ void mce_notify_process(void) ...@@ -1204,8 +1205,9 @@ void mce_notify_process(void)
* doomed. We still need to mark the page as poisoned and alert any * doomed. We still need to mark the page as poisoned and alert any
* other users of the page. * other users of the page.
*/ */
if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || if (!mi->restartable)
mi->restartable == 0) { flags |= MF_MUST_KILL;
if (memory_failure(pfn, MCE_VECTOR, flags) < 0) {
pr_err("Memory error not recovered"); pr_err("Memory error not recovered");
force_sig(SIGBUS, current); force_sig(SIGBUS, current);
} }
......
This diff is collapsed.
...@@ -33,9 +33,6 @@ static bool force; ...@@ -33,9 +33,6 @@ static bool force;
module_param(force, bool, 0444); module_param(force, bool, 0444);
MODULE_PARM_DESC(force, "force loading on processors with erratum 319"); MODULE_PARM_DESC(force, "force loading on processors with erratum 319");
/* PCI-IDs for Northbridge devices not used anywhere else */
#define PCI_DEVICE_ID_AMD_15H_M10H_NB_F3 0x1403
/* CPUID function 0x80000001, ebx */ /* CPUID function 0x80000001, ebx */
#define CPUID_PKGTYPE_MASK 0xf0000000 #define CPUID_PKGTYPE_MASK 0xf0000000
#define CPUID_PKGTYPE_F 0x00000000 #define CPUID_PKGTYPE_F 0x00000000
...@@ -213,7 +210,7 @@ static DEFINE_PCI_DEVICE_TABLE(k10temp_id_table) = { ...@@ -213,7 +210,7 @@ static DEFINE_PCI_DEVICE_TABLE(k10temp_id_table) = {
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M10H_NB_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
{} {}
}; };
MODULE_DEVICE_TABLE(pci, k10temp_id_table); MODULE_DEVICE_TABLE(pci, k10temp_id_table);
......
...@@ -1591,6 +1591,7 @@ void vmemmap_populate_print_last(void); ...@@ -1591,6 +1591,7 @@ void vmemmap_populate_print_last(void);
enum mf_flags { enum mf_flags {
MF_COUNT_INCREASED = 1 << 0, MF_COUNT_INCREASED = 1 << 0,
MF_ACTION_REQUIRED = 1 << 1, MF_ACTION_REQUIRED = 1 << 1,
MF_MUST_KILL = 1 << 2,
}; };
extern int memory_failure(unsigned long pfn, int trapno, int flags); extern int memory_failure(unsigned long pfn, int trapno, int flags);
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
......
...@@ -517,6 +517,7 @@ ...@@ -517,6 +517,7 @@
#define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302
#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
#define PCI_DEVICE_ID_AMD_15H_M10H_F3 0x1403
#define PCI_DEVICE_ID_AMD_15H_NB_F0 0x1600 #define PCI_DEVICE_ID_AMD_15H_NB_F0 0x1600
#define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
#define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602
......
...@@ -345,14 +345,14 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, ...@@ -345,14 +345,14 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
* Also when FAIL is set do a force kill because something went * Also when FAIL is set do a force kill because something went
* wrong earlier. * wrong earlier.
*/ */
static void kill_procs(struct list_head *to_kill, int doit, int trapno, static void kill_procs(struct list_head *to_kill, int forcekill, int trapno,
int fail, struct page *page, unsigned long pfn, int fail, struct page *page, unsigned long pfn,
int flags) int flags)
{ {
struct to_kill *tk, *next; struct to_kill *tk, *next;
list_for_each_entry_safe (tk, next, to_kill, nd) { list_for_each_entry_safe (tk, next, to_kill, nd) {
if (doit) { if (forcekill) {
/* /*
* In case something went wrong with munmapping * In case something went wrong with munmapping
* make sure the process doesn't catch the * make sure the process doesn't catch the
...@@ -858,7 +858,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, ...@@ -858,7 +858,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
struct address_space *mapping; struct address_space *mapping;
LIST_HEAD(tokill); LIST_HEAD(tokill);
int ret; int ret;
int kill = 1; int kill = 1, forcekill;
struct page *hpage = compound_head(p); struct page *hpage = compound_head(p);
struct page *ppage; struct page *ppage;
...@@ -888,7 +888,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, ...@@ -888,7 +888,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
* be called inside page lock (it's recommended but not enforced). * be called inside page lock (it's recommended but not enforced).
*/ */
mapping = page_mapping(hpage); mapping = page_mapping(hpage);
if (!PageDirty(hpage) && mapping && if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping &&
mapping_cap_writeback_dirty(mapping)) { mapping_cap_writeback_dirty(mapping)) {
if (page_mkclean(hpage)) { if (page_mkclean(hpage)) {
SetPageDirty(hpage); SetPageDirty(hpage);
...@@ -965,12 +965,14 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, ...@@ -965,12 +965,14 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
* Now that the dirty bit has been propagated to the * Now that the dirty bit has been propagated to the
* struct page and all unmaps done we can decide if * struct page and all unmaps done we can decide if
* killing is needed or not. Only kill when the page * killing is needed or not. Only kill when the page
* was dirty, otherwise the tokill list is merely * was dirty or the process is not restartable,
* otherwise the tokill list is merely
* freed. When there was a problem unmapping earlier * freed. When there was a problem unmapping earlier
* use a more force-full uncatchable kill to prevent * use a more force-full uncatchable kill to prevent
* any accesses to the poisoned memory. * any accesses to the poisoned memory.
*/ */
kill_procs(&tokill, !!PageDirty(ppage), trapno, forcekill = PageDirty(ppage) || (flags & MF_MUST_KILL);
kill_procs(&tokill, forcekill, trapno,
ret != SWAP_SUCCESS, p, pfn, flags); ret != SWAP_SUCCESS, p, pfn, flags);
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment