Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
46e387bb
Commit
46e387bb
authored
Oct 22, 2010
by
Andi Kleen
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'hwpoison-hugepages' into hwpoison
Conflicts: mm/memory-failure.c
parents
e9d08567
3ef8fd7f
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
551 additions
and
125 deletions
+551
-125
arch/x86/mm/fault.c
arch/x86/mm/fault.c
+13
-6
fs/hugetlbfs/inode.c
fs/hugetlbfs/inode.c
+15
-0
include/linux/hugetlb.h
include/linux/hugetlb.h
+15
-2
include/linux/migrate.h
include/linux/migrate.h
+16
-0
include/linux/mm.h
include/linux/mm.h
+10
-2
mm/hugetlb.c
mm/hugetlb.c
+163
-70
mm/memory-failure.c
mm/memory-failure.c
+93
-9
mm/memory.c
mm/memory.c
+2
-1
mm/migrate.c
mm/migrate.c
+216
-18
mm/rmap.c
mm/rmap.c
+8
-17
No files found.
arch/x86/mm/fault.c
View file @
46e387bb
...
...
@@ -11,6 +11,7 @@
#include <linux/kprobes.h>
/* __kprobes, ... */
#include <linux/mmiotrace.h>
/* kmmio_handler, ... */
#include <linux/perf_event.h>
/* perf_sw_event */
#include <linux/hugetlb.h>
/* hstate_index_to_shift */
#include <asm/traps.h>
/* dotraplinkage, ... */
#include <asm/pgalloc.h>
/* pgd_*(), ... */
...
...
@@ -160,15 +161,20 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
static
void
force_sig_info_fault
(
int
si_signo
,
int
si_code
,
unsigned
long
address
,
struct
task_struct
*
tsk
)
struct
task_struct
*
tsk
,
int
fault
)
{
unsigned
lsb
=
0
;
siginfo_t
info
;
info
.
si_signo
=
si_signo
;
info
.
si_errno
=
0
;
info
.
si_code
=
si_code
;
info
.
si_addr
=
(
void
__user
*
)
address
;
info
.
si_addr_lsb
=
si_code
==
BUS_MCEERR_AR
?
PAGE_SHIFT
:
0
;
if
(
fault
&
VM_FAULT_HWPOISON_LARGE
)
lsb
=
hstate_index_to_shift
(
VM_FAULT_GET_HINDEX
(
fault
));
if
(
fault
&
VM_FAULT_HWPOISON
)
lsb
=
PAGE_SHIFT
;
info
.
si_addr_lsb
=
lsb
;
force_sig_info
(
si_signo
,
&
info
,
tsk
);
}
...
...
@@ -722,7 +728,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
tsk
->
thread
.
error_code
=
error_code
|
(
address
>=
TASK_SIZE
);
tsk
->
thread
.
trap_no
=
14
;
force_sig_info_fault
(
SIGSEGV
,
si_code
,
address
,
tsk
);
force_sig_info_fault
(
SIGSEGV
,
si_code
,
address
,
tsk
,
0
);
return
;
}
...
...
@@ -807,14 +813,14 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
tsk
->
thread
.
trap_no
=
14
;
#ifdef CONFIG_MEMORY_FAILURE
if
(
fault
&
VM_FAULT_HWPOISON
)
{
if
(
fault
&
(
VM_FAULT_HWPOISON
|
VM_FAULT_HWPOISON_LARGE
)
)
{
printk
(
KERN_ERR
"MCE: Killing %s:%d due to hardware memory corruption fault at %lx
\n
"
,
tsk
->
comm
,
tsk
->
pid
,
address
);
code
=
BUS_MCEERR_AR
;
}
#endif
force_sig_info_fault
(
SIGBUS
,
code
,
address
,
tsk
);
force_sig_info_fault
(
SIGBUS
,
code
,
address
,
tsk
,
fault
);
}
static
noinline
void
...
...
@@ -824,7 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
if
(
fault
&
VM_FAULT_OOM
)
{
out_of_memory
(
regs
,
error_code
,
address
);
}
else
{
if
(
fault
&
(
VM_FAULT_SIGBUS
|
VM_FAULT_HWPOISON
))
if
(
fault
&
(
VM_FAULT_SIGBUS
|
VM_FAULT_HWPOISON
|
VM_FAULT_HWPOISON_LARGE
))
do_sigbus
(
regs
,
error_code
,
address
,
fault
);
else
BUG
();
...
...
fs/hugetlbfs/inode.c
View file @
46e387bb
...
...
@@ -31,6 +31,7 @@
#include <linux/statfs.h>
#include <linux/security.h>
#include <linux/magic.h>
#include <linux/migrate.h>
#include <asm/uaccess.h>
...
...
@@ -573,6 +574,19 @@ static int hugetlbfs_set_page_dirty(struct page *page)
return
0
;
}
static
int
hugetlbfs_migrate_page
(
struct
address_space
*
mapping
,
struct
page
*
newpage
,
struct
page
*
page
)
{
int
rc
;
rc
=
migrate_huge_page_move_mapping
(
mapping
,
newpage
,
page
);
if
(
rc
)
return
rc
;
migrate_page_copy
(
newpage
,
page
);
return
0
;
}
static
int
hugetlbfs_statfs
(
struct
dentry
*
dentry
,
struct
kstatfs
*
buf
)
{
struct
hugetlbfs_sb_info
*
sbinfo
=
HUGETLBFS_SB
(
dentry
->
d_sb
);
...
...
@@ -659,6 +673,7 @@ static const struct address_space_operations hugetlbfs_aops = {
.
write_begin
=
hugetlbfs_write_begin
,
.
write_end
=
hugetlbfs_write_end
,
.
set_page_dirty
=
hugetlbfs_set_page_dirty
,
.
migratepage
=
hugetlbfs_migrate_page
,
};
...
...
include/linux/hugetlb.h
View file @
46e387bb
...
...
@@ -43,7 +43,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct
vm_area_struct
*
vma
,
int
acctflags
);
void
hugetlb_unreserve_pages
(
struct
inode
*
inode
,
long
offset
,
long
freed
);
void
__isolate_hwpoisoned_huge_page
(
struct
page
*
page
);
int
dequeue_hwpoisoned_huge_page
(
struct
page
*
page
);
void
copy_huge_page
(
struct
page
*
dst
,
struct
page
*
src
);
extern
unsigned
long
hugepages_treat_as_movable
;
extern
const
unsigned
long
hugetlb_zero
,
hugetlb_infinity
;
...
...
@@ -101,7 +102,10 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
#define huge_pte_offset(mm, address) 0
#define __isolate_hwpoisoned_huge_page(page) 0
#define dequeue_hwpoisoned_huge_page(page) 0
static
inline
void
copy_huge_page
(
struct
page
*
dst
,
struct
page
*
src
)
{
}
#define hugetlb_change_protection(vma, address, end, newprot)
...
...
@@ -228,6 +232,8 @@ struct huge_bootmem_page {
struct
hstate
*
hstate
;
};
struct
page
*
alloc_huge_page_node
(
struct
hstate
*
h
,
int
nid
);
/* arch callback */
int
__init
alloc_bootmem_huge_page
(
struct
hstate
*
h
);
...
...
@@ -301,8 +307,14 @@ static inline struct hstate *page_hstate(struct page *page)
return
size_to_hstate
(
PAGE_SIZE
<<
compound_order
(
page
));
}
static
inline
unsigned
hstate_index_to_shift
(
unsigned
index
)
{
return
hstates
[
index
].
order
+
PAGE_SHIFT
;
}
#else
struct
hstate
{};
#define alloc_huge_page_node(h, nid) NULL
#define alloc_bootmem_huge_page(h) NULL
#define hstate_file(f) NULL
#define hstate_vma(v) NULL
...
...
@@ -317,6 +329,7 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)
{
return
1
;
}
#define hstate_index_to_shift(index) 0
#endif
#endif
/* _LINUX_HUGETLB_H */
include/linux/migrate.h
View file @
46e387bb
...
...
@@ -14,6 +14,8 @@ extern int migrate_page(struct address_space *,
struct
page
*
,
struct
page
*
);
extern
int
migrate_pages
(
struct
list_head
*
l
,
new_page_t
x
,
unsigned
long
private
,
int
offlining
);
extern
int
migrate_huge_pages
(
struct
list_head
*
l
,
new_page_t
x
,
unsigned
long
private
,
int
offlining
);
extern
int
fail_migrate_page
(
struct
address_space
*
,
struct
page
*
,
struct
page
*
);
...
...
@@ -23,12 +25,17 @@ extern int migrate_prep_local(void);
extern
int
migrate_vmas
(
struct
mm_struct
*
mm
,
const
nodemask_t
*
from
,
const
nodemask_t
*
to
,
unsigned
long
flags
);
extern
void
migrate_page_copy
(
struct
page
*
newpage
,
struct
page
*
page
);
extern
int
migrate_huge_page_move_mapping
(
struct
address_space
*
mapping
,
struct
page
*
newpage
,
struct
page
*
page
);
#else
#define PAGE_MIGRATION 0
static
inline
void
putback_lru_pages
(
struct
list_head
*
l
)
{}
static
inline
int
migrate_pages
(
struct
list_head
*
l
,
new_page_t
x
,
unsigned
long
private
,
int
offlining
)
{
return
-
ENOSYS
;
}
static
inline
int
migrate_huge_pages
(
struct
list_head
*
l
,
new_page_t
x
,
unsigned
long
private
,
int
offlining
)
{
return
-
ENOSYS
;
}
static
inline
int
migrate_prep
(
void
)
{
return
-
ENOSYS
;
}
static
inline
int
migrate_prep_local
(
void
)
{
return
-
ENOSYS
;
}
...
...
@@ -40,6 +47,15 @@ static inline int migrate_vmas(struct mm_struct *mm,
return
-
ENOSYS
;
}
static
inline
void
migrate_page_copy
(
struct
page
*
newpage
,
struct
page
*
page
)
{}
static
inline
int
migrate_huge_page_move_mapping
(
struct
address_space
*
mapping
,
struct
page
*
newpage
,
struct
page
*
page
)
{
return
-
ENOSYS
;
}
/* Possible settings for the migrate_page() method in address_operations */
#define migrate_page NULL
#define fail_migrate_page NULL
...
...
include/linux/mm.h
View file @
46e387bb
...
...
@@ -718,12 +718,20 @@ static inline int page_mapped(struct page *page)
#define VM_FAULT_SIGBUS 0x0002
#define VM_FAULT_MAJOR 0x0004
#define VM_FAULT_WRITE 0x0008
/* Special case for get_user_pages */
#define VM_FAULT_HWPOISON 0x0010
/* Hit poisoned page */
#define VM_FAULT_HWPOISON 0x0010
/* Hit poisoned small page */
#define VM_FAULT_HWPOISON_LARGE 0x0020
/* Hit poisoned large page. Index encoded in upper bits */
#define VM_FAULT_NOPAGE 0x0100
/* ->fault installed the pte, not return page */
#define VM_FAULT_LOCKED 0x0200
/* ->fault locked the returned page */
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000
/* encodes hpage index for large hwpoison */
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \
VM_FAULT_HWPOISON_LARGE)
/* Encode hstate index for a hwpoisoned large page */
#define VM_FAULT_SET_HINDEX(x) ((x) << 12)
#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
/*
* Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
...
...
mm/hugetlb.c
View file @
46e387bb
...
...
@@ -423,14 +423,14 @@ static void clear_huge_page(struct page *page,
}
}
static
void
copy_gigantic_page
(
struct
page
*
dst
,
struct
page
*
src
,
static
void
copy_
user_
gigantic_page
(
struct
page
*
dst
,
struct
page
*
src
,
unsigned
long
addr
,
struct
vm_area_struct
*
vma
)
{
int
i
;
struct
hstate
*
h
=
hstate_vma
(
vma
);
struct
page
*
dst_base
=
dst
;
struct
page
*
src_base
=
src
;
might_sleep
();
for
(
i
=
0
;
i
<
pages_per_huge_page
(
h
);
)
{
cond_resched
();
copy_user_highpage
(
dst
,
src
,
addr
+
i
*
PAGE_SIZE
,
vma
);
...
...
@@ -440,14 +440,15 @@ static void copy_gigantic_page(struct page *dst, struct page *src,
src
=
mem_map_next
(
src
,
src_base
,
i
);
}
}
static
void
copy_huge_page
(
struct
page
*
dst
,
struct
page
*
src
,
static
void
copy_user_huge_page
(
struct
page
*
dst
,
struct
page
*
src
,
unsigned
long
addr
,
struct
vm_area_struct
*
vma
)
{
int
i
;
struct
hstate
*
h
=
hstate_vma
(
vma
);
if
(
unlikely
(
pages_per_huge_page
(
h
)
>
MAX_ORDER_NR_PAGES
))
{
copy_gigantic_page
(
dst
,
src
,
addr
,
vma
);
copy_
user_
gigantic_page
(
dst
,
src
,
addr
,
vma
);
return
;
}
...
...
@@ -458,6 +459,40 @@ static void copy_huge_page(struct page *dst, struct page *src,
}
}
static
void
copy_gigantic_page
(
struct
page
*
dst
,
struct
page
*
src
)
{
int
i
;
struct
hstate
*
h
=
page_hstate
(
src
);
struct
page
*
dst_base
=
dst
;
struct
page
*
src_base
=
src
;
for
(
i
=
0
;
i
<
pages_per_huge_page
(
h
);
)
{
cond_resched
();
copy_highpage
(
dst
,
src
);
i
++
;
dst
=
mem_map_next
(
dst
,
dst_base
,
i
);
src
=
mem_map_next
(
src
,
src_base
,
i
);
}
}
void
copy_huge_page
(
struct
page
*
dst
,
struct
page
*
src
)
{
int
i
;
struct
hstate
*
h
=
page_hstate
(
src
);
if
(
unlikely
(
pages_per_huge_page
(
h
)
>
MAX_ORDER_NR_PAGES
))
{
copy_gigantic_page
(
dst
,
src
);
return
;
}
might_sleep
();
for
(
i
=
0
;
i
<
pages_per_huge_page
(
h
);
i
++
)
{
cond_resched
();
copy_highpage
(
dst
+
i
,
src
+
i
);
}
}
static
void
enqueue_huge_page
(
struct
hstate
*
h
,
struct
page
*
page
)
{
int
nid
=
page_to_nid
(
page
);
...
...
@@ -466,11 +501,24 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
h
->
free_huge_pages_node
[
nid
]
++
;
}
static
struct
page
*
dequeue_huge_page_node
(
struct
hstate
*
h
,
int
nid
)
{
struct
page
*
page
;
if
(
list_empty
(
&
h
->
hugepage_freelists
[
nid
]))
return
NULL
;
page
=
list_entry
(
h
->
hugepage_freelists
[
nid
].
next
,
struct
page
,
lru
);
list_del
(
&
page
->
lru
);
set_page_refcounted
(
page
);
h
->
free_huge_pages
--
;
h
->
free_huge_pages_node
[
nid
]
--
;
return
page
;
}
static
struct
page
*
dequeue_huge_page_vma
(
struct
hstate
*
h
,
struct
vm_area_struct
*
vma
,
unsigned
long
address
,
int
avoid_reserve
)
{
int
nid
;
struct
page
*
page
=
NULL
;
struct
mempolicy
*
mpol
;
nodemask_t
*
nodemask
;
...
...
@@ -496,21 +544,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
for_each_zone_zonelist_nodemask
(
zone
,
z
,
zonelist
,
MAX_NR_ZONES
-
1
,
nodemask
)
{
nid
=
zone_to_nid
(
zone
);
if
(
cpuset_zone_allowed_softwall
(
zone
,
htlb_alloc_mask
)
&&
!
list_empty
(
&
h
->
hugepage_freelists
[
nid
]))
{
page
=
list_entry
(
h
->
hugepage_freelists
[
nid
].
next
,
struct
page
,
lru
);
list_del
(
&
page
->
lru
);
h
->
free_huge_pages
--
;
h
->
free_huge_pages_node
[
nid
]
--
;
if
(
cpuset_zone_allowed_softwall
(
zone
,
htlb_alloc_mask
))
{
page
=
dequeue_huge_page_node
(
h
,
zone_to_nid
(
zone
));
if
(
page
)
{
if
(
!
avoid_reserve
)
decrement_hugepage_resv_vma
(
h
,
vma
);
break
;
}
}
}
err:
mpol_cond_put
(
mpol
);
put_mems_allowed
();
...
...
@@ -770,11 +812,10 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
return
ret
;
}
static
struct
page
*
alloc_buddy_huge_page
(
struct
hstate
*
h
,
struct
vm_area_struct
*
vma
,
unsigned
long
address
)
static
struct
page
*
alloc_buddy_huge_page
(
struct
hstate
*
h
,
int
nid
)
{
struct
page
*
page
;
unsigned
int
nid
;
unsigned
int
r_
nid
;
if
(
h
->
order
>=
MAX_ORDER
)
return
NULL
;
...
...
@@ -812,9 +853,14 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
}
spin_unlock
(
&
hugetlb_lock
);
if
(
nid
==
NUMA_NO_NODE
)
page
=
alloc_pages
(
htlb_alloc_mask
|
__GFP_COMP
|
__GFP_REPEAT
|
__GFP_NOWARN
,
huge_page_order
(
h
));
else
page
=
alloc_pages_exact_node
(
nid
,
htlb_alloc_mask
|
__GFP_COMP
|
__GFP_THISNODE
|
__GFP_REPEAT
|
__GFP_NOWARN
,
huge_page_order
(
h
));
if
(
page
&&
arch_prepare_hugepage
(
page
))
{
__free_pages
(
page
,
huge_page_order
(
h
));
...
...
@@ -823,19 +869,13 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
spin_lock
(
&
hugetlb_lock
);
if
(
page
)
{
/*
* This page is now managed by the hugetlb allocator and has
* no users -- drop the buddy allocator's reference.
*/
put_page_testzero
(
page
);
VM_BUG_ON
(
page_count
(
page
));
nid
=
page_to_nid
(
page
);
r_nid
=
page_to_nid
(
page
);
set_compound_page_dtor
(
page
,
free_huge_page
);
/*
* We incremented the global counters already
*/
h
->
nr_huge_pages_node
[
nid
]
++
;
h
->
surplus_huge_pages_node
[
nid
]
++
;
h
->
nr_huge_pages_node
[
r_
nid
]
++
;
h
->
surplus_huge_pages_node
[
r_
nid
]
++
;
__count_vm_event
(
HTLB_BUDDY_PGALLOC
);
}
else
{
h
->
nr_huge_pages
--
;
...
...
@@ -847,6 +887,25 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
return
page
;
}
/*
* This allocation function is useful in the context where vma is irrelevant.
* E.g. soft-offlining uses this function because it only cares physical
* address of error page.
*/
struct
page
*
alloc_huge_page_node
(
struct
hstate
*
h
,
int
nid
)
{
struct
page
*
page
;
spin_lock
(
&
hugetlb_lock
);
page
=
dequeue_huge_page_node
(
h
,
nid
);
spin_unlock
(
&
hugetlb_lock
);
if
(
!
page
)
page
=
alloc_buddy_huge_page
(
h
,
nid
);
return
page
;
}
/*
* Increase the hugetlb pool such that it can accomodate a reservation
* of size 'delta'.
...
...
@@ -871,17 +930,14 @@ static int gather_surplus_pages(struct hstate *h, int delta)
retry:
spin_unlock
(
&
hugetlb_lock
);
for
(
i
=
0
;
i
<
needed
;
i
++
)
{
page
=
alloc_buddy_huge_page
(
h
,
NU
LL
,
0
);
if
(
!
page
)
{
page
=
alloc_buddy_huge_page
(
h
,
NU
MA_NO_NODE
);
if
(
!
page
)
/*
* We were not able to allocate enough pages to
* satisfy the entire reservation so we free what
* we've allocated so far.
*/
spin_lock
(
&
hugetlb_lock
);
needed
=
0
;
goto
free
;
}
list_add
(
&
page
->
lru
,
&
surplus_list
);
}
...
...
@@ -908,31 +964,31 @@ static int gather_surplus_pages(struct hstate *h, int delta)
needed
+=
allocated
;
h
->
resv_huge_pages
+=
delta
;
ret
=
0
;
free:
spin_unlock
(
&
hugetlb_lock
);
/* Free the needed pages to the hugetlb pool */
list_for_each_entry_safe
(
page
,
tmp
,
&
surplus_list
,
lru
)
{
if
((
--
needed
)
<
0
)
break
;
list_del
(
&
page
->
lru
);
/*
* This page is now managed by the hugetlb allocator and has
* no users -- drop the buddy allocator's reference.
*/
put_page_testzero
(
page
);
VM_BUG_ON
(
page_count
(
page
));
enqueue_huge_page
(
h
,
page
);
}
/* Free unnecessary surplus pages to the buddy allocator */
free:
if
(
!
list_empty
(
&
surplus_list
))
{
spin_unlock
(
&
hugetlb_lock
);
list_for_each_entry_safe
(
page
,
tmp
,
&
surplus_list
,
lru
)
{
list_del
(
&
page
->
lru
);
/*
* The page has a reference count of zero already, so
* call free_huge_page directly instead of using
* put_page. This must be done with hugetlb_lock
* unlocked which is safe because free_huge_page takes
* hugetlb_lock before deciding how to free the page.
*/
free_huge_page
(
page
);
put_page
(
page
);
}
spin_lock
(
&
hugetlb_lock
);
}
spin_lock
(
&
hugetlb_lock
);
return
ret
;
}
...
...
@@ -1052,14 +1108,13 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
spin_unlock
(
&
hugetlb_lock
);
if
(
!
page
)
{
page
=
alloc_buddy_huge_page
(
h
,
vma
,
addr
);
page
=
alloc_buddy_huge_page
(
h
,
NUMA_NO_NODE
);
if
(
!
page
)
{
hugetlb_put_quota
(
inode
->
i_mapping
,
chg
);
return
ERR_PTR
(
-
VM_FAULT_SIGBUS
);
}
}
set_page_refcounted
(
page
);
set_page_private
(
page
,
(
unsigned
long
)
mapping
);
vma_commit_reservation
(
h
,
vma
,
addr
);
...
...
@@ -2153,6 +2208,19 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
return
-
ENOMEM
;
}
static
int
is_hugetlb_entry_migration
(
pte_t
pte
)
{
swp_entry_t
swp
;
if
(
huge_pte_none
(
pte
)
||
pte_present
(
pte
))
return
0
;
swp
=
pte_to_swp_entry
(
pte
);
if
(
non_swap_entry
(
swp
)
&&
is_migration_entry
(
swp
))
{
return
1
;
}
else
return
0
;
}
static
int
is_hugetlb_entry_hwpoisoned
(
pte_t
pte
)
{
swp_entry_t
swp
;
...
...
@@ -2383,7 +2451,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
if
(
unlikely
(
anon_vma_prepare
(
vma
)))
return
VM_FAULT_OOM
;
copy_huge_page
(
new_page
,
old_page
,
address
,
vma
);
copy_
user_
huge_page
(
new_page
,
old_page
,
address
,
vma
);
__SetPageUptodate
(
new_page
);
/*
...
...
@@ -2515,20 +2583,18 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
hugepage_add_new_anon_rmap
(
page
,
vma
,
address
);
}
}
else
{
page_dup_rmap
(
page
);
}
/*
* Since memory error handler replaces pte into hwpoison swap entry
* at the time of error handling, a process which reserved but not have
* the mapping to the error hugepage does not have hwpoison swap entry.
* So we need to block accesses from such a process by checking
* PG_hwpoison bit here.
* If memory error occurs between mmap() and fault, some process
* don't have hwpoisoned swap entry for errored virtual address.
* So we need to block hugepage fault by PG_hwpoison bit check.
*/
if
(
unlikely
(
PageHWPoison
(
page
)))
{
ret
=
VM_FAULT_HWPOISON
;
ret
=
VM_FAULT_HWPOISON
|
VM_FAULT_SET_HINDEX
(
h
-
hstates
);
goto
backout_unlocked
;
}
page_dup_rmap
(
page
);
}
/*
* If we are going to COW a private mapping later, we examine the
...
...
@@ -2587,8 +2653,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
ptep
=
huge_pte_offset
(
mm
,
address
);
if
(
ptep
)
{
entry
=
huge_ptep_get
(
ptep
);
if
(
unlikely
(
is_hugetlb_entry_hwpoisoned
(
entry
)))
return
VM_FAULT_HWPOISON
;
if
(
unlikely
(
is_hugetlb_entry_migration
(
entry
)))
{
migration_entry_wait
(
mm
,
(
pmd_t
*
)
ptep
,
address
);
return
0
;
}
else
if
(
unlikely
(
is_hugetlb_entry_hwpoisoned
(
entry
)))
return
VM_FAULT_HWPOISON_LARGE
|
VM_FAULT_SET_HINDEX
(
h
-
hstates
);
}
ptep
=
huge_pte_alloc
(
mm
,
address
,
huge_page_size
(
h
));
...
...
@@ -2878,18 +2948,41 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
hugetlb_acct_memory
(
h
,
-
(
chg
-
freed
));
}
#ifdef CONFIG_MEMORY_FAILURE
/* Should be called in hugetlb_lock */
static
int
is_hugepage_on_freelist
(
struct
page
*
hpage
)
{
struct
page
*
page
;
struct
page
*
tmp
;
struct
hstate
*
h
=
page_hstate
(
hpage
);
int
nid
=
page_to_nid
(
hpage
);
list_for_each_entry_safe
(
page
,
tmp
,
&
h
->
hugepage_freelists
[
nid
],
lru
)
if
(
page
==
hpage
)
return
1
;
return
0
;
}
/*
* This function is called from memory failure code.
* Assume the caller holds page lock of the head page.
*/
void
__isolat
e_hwpoisoned_huge_page
(
struct
page
*
hpage
)
int
dequeu
e_hwpoisoned_huge_page
(
struct
page
*
hpage
)
{
struct
hstate
*
h
=
page_hstate
(
hpage
);
int
nid
=
page_to_nid
(
hpage
);
int
ret
=
-
EBUSY
;
spin_lock
(
&
hugetlb_lock
);
if
(
is_hugepage_on_freelist
(
hpage
))
{
list_del
(
&
hpage
->
lru
);
set_page_refcounted
(
hpage
);
h
->
free_huge_pages
--
;
h
->
free_huge_pages_node
[
nid
]
--
;
ret
=
0
;
}
spin_unlock
(
&
hugetlb_lock
);
return
ret
;
}
#endif
mm/memory-failure.c
View file @
46e387bb
...
...
@@ -697,11 +697,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
* Issues:
* - Error on hugepage is contained in hugepage unit (not in raw page unit.)
* To narrow down kill region to one page, we need to break up pmd.
* - To support soft-offlining for hugepage, we need to support hugepage
* migration.
*/
static
int
me_huge_page
(
struct
page
*
p
,
unsigned
long
pfn
)
{
int
res
=
0
;
struct
page
*
hpage
=
compound_head
(
p
);
/*
* We can safely recover from error on free or reserved (i.e.
...
...
@@ -714,7 +713,8 @@ static int me_huge_page(struct page *p, unsigned long pfn)
* so there is no race between isolation and mapping/unmapping.
*/
if
(
!
(
page_mapping
(
hpage
)
||
PageAnon
(
hpage
)))
{
__isolate_hwpoisoned_huge_page
(
hpage
);
res
=
dequeue_hwpoisoned_huge_page
(
hpage
);
if
(
!
res
)
return
RECOVERED
;
}
return
DELAYED
;
...
...
@@ -972,7 +972,10 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
* We need/can do nothing about count=0 pages.
* 1) it's a free page, and therefore in safe hand:
* prep_new_page() will be the gate keeper.
* 2) it's part of a non-compound high order page.
* 2) it's a free hugepage, which is also safe:
* an affected hugepage will be dequeued from hugepage freelist,
* so there's no concern about reusing it ever after.
* 3) it's part of a non-compound high order page.
* Implies some kernel user: cannot stop them from
* R/W the page; let's pray that the page has been
* used and will be freed some time later.
...
...
@@ -984,6 +987,24 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
if
(
is_free_buddy_page
(
p
))
{
action_result
(
pfn
,
"free buddy"
,
DELAYED
);
return
0
;
}
else
if
(
PageHuge
(
hpage
))
{
/*
* Check "just unpoisoned", "filter hit", and
* "race with other subpage."
*/
lock_page_nosync
(
hpage
);
if
(
!
PageHWPoison
(
hpage
)
||
(
hwpoison_filter
(
p
)
&&
TestClearPageHWPoison
(
p
))
||
(
p
!=
hpage
&&
TestSetPageHWPoison
(
hpage
)))
{
atomic_long_sub
(
nr_pages
,
&
mce_bad_pages
);
return
0
;
}
set_page_hwpoison_huge_page
(
hpage
);
res
=
dequeue_hwpoisoned_huge_page
(
hpage
);
action_result
(
pfn
,
"free huge"
,
res
?
IGNORED
:
DELAYED
);
unlock_page
(
hpage
);
return
res
;
}
else
{
action_result
(
pfn
,
"high order kernel"
,
IGNORED
);
return
-
EBUSY
;
...
...
@@ -1145,6 +1166,16 @@ int unpoison_memory(unsigned long pfn)
nr_pages
=
1
<<
compound_order
(
page
);
if
(
!
get_page_unless_zero
(
page
))
{
/*
* Since HWPoisoned hugepage should have non-zero refcount,
* race between memory failure and unpoison seems to happen.
* In such case unpoison fails and memory failure runs
* to the end.
*/
if
(
PageHuge
(
page
))
{
pr_debug
(
"MCE: Memory failure is now running on free hugepage %#lx
\n
"
,
pfn
);
return
0
;
}
if
(
TestClearPageHWPoison
(
p
))
atomic_long_sub
(
nr_pages
,
&
mce_bad_pages
);
pr_info
(
"MCE: Software-unpoisoned free page %#lx
\n
"
,
pfn
);
...
...
@@ -1162,9 +1193,9 @@ int unpoison_memory(unsigned long pfn)
pr_info
(
"MCE: Software-unpoisoned page %#lx
\n
"
,
pfn
);
atomic_long_sub
(
nr_pages
,
&
mce_bad_pages
);
freeit
=
1
;
}
if
(
PageHuge
(
p
))
if
(
PageHuge
(
page
))
clear_page_hwpoison_huge_page
(
page
);
}
unlock_page
(
page
);
put_page
(
page
);
...
...
@@ -1178,6 +1209,10 @@ EXPORT_SYMBOL(unpoison_memory);
static
struct
page
*
new_page
(
struct
page
*
p
,
unsigned
long
private
,
int
**
x
)
{
int
nid
=
page_to_nid
(
p
);
if
(
PageHuge
(
p
))
return
alloc_huge_page_node
(
page_hstate
(
compound_head
(
p
)),
nid
);
else
return
alloc_pages_exact_node
(
nid
,
GFP_HIGHUSER_MOVABLE
,
0
);
}
...
...
@@ -1206,8 +1241,15 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
* was free.
*/
set_migratetype_isolate
(
p
);
/*
* When the target page is a free hugepage, just remove it
* from free hugepage list.
*/
if
(
!
get_page_unless_zero
(
compound_head
(
p
)))
{
if
(
is_free_buddy_page
(
p
))
{
if
(
PageHuge
(
p
))
{
pr_info
(
"get_any_page: %#lx free huge page
\n
"
,
pfn
);
ret
=
dequeue_hwpoisoned_huge_page
(
compound_head
(
p
));
}
else
if
(
is_free_buddy_page
(
p
))
{
pr_info
(
"get_any_page: %#lx free buddy page
\n
"
,
pfn
);
/* Set hwpoison bit while page is still isolated */
SetPageHWPoison
(
p
);
...
...
@@ -1226,6 +1268,45 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
return
ret
;
}
static
int
soft_offline_huge_page
(
struct
page
*
page
,
int
flags
)
{
int
ret
;
unsigned
long
pfn
=
page_to_pfn
(
page
);
struct
page
*
hpage
=
compound_head
(
page
);
LIST_HEAD
(
pagelist
);
ret
=
get_any_page
(
page
,
pfn
,
flags
);
if
(
ret
<
0
)
return
ret
;
if
(
ret
==
0
)
goto
done
;
if
(
PageHWPoison
(
hpage
))
{
put_page
(
hpage
);
pr_debug
(
"soft offline: %#lx hugepage already poisoned
\n
"
,
pfn
);
return
-
EBUSY
;
}
/* Keep page count to indicate a given hugepage is isolated. */
list_add
(
&
hpage
->
lru
,
&
pagelist
);
ret
=
migrate_huge_pages
(
&
pagelist
,
new_page
,
MPOL_MF_MOVE_ALL
,
0
);
if
(
ret
)
{
pr_debug
(
"soft offline: %#lx: migration failed %d, type %lx
\n
"
,
pfn
,
ret
,
page
->
flags
);
if
(
ret
>
0
)
ret
=
-
EIO
;
return
ret
;
}
done:
if
(
!
PageHWPoison
(
hpage
))
atomic_long_add
(
1
<<
compound_order
(
hpage
),
&
mce_bad_pages
);
set_page_hwpoison_huge_page
(
hpage
);
dequeue_hwpoisoned_huge_page
(
hpage
);
/* keep elevated page count for bad page */
return
ret
;
}
/**
* soft_offline_page - Soft offline a page.
* @page: page to offline
...
...
@@ -1253,6 +1334,9 @@ int soft_offline_page(struct page *page, int flags)
int
ret
;
unsigned
long
pfn
=
page_to_pfn
(
page
);
if
(
PageHuge
(
page
))
return
soft_offline_huge_page
(
page
,
flags
);
ret
=
get_any_page
(
page
,
pfn
,
flags
);
if
(
ret
<
0
)
return
ret
;
...
...
mm/memory.c
View file @
46e387bb
...
...
@@ -1450,7 +1450,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if
(
ret
&
VM_FAULT_OOM
)
return
i
?
i
:
-
ENOMEM
;
if
(
ret
&
(
VM_FAULT_HWPOISON
|
VM_FAULT_SIGBUS
))
(
VM_FAULT_HWPOISON
|
VM_FAULT_HWPOISON_LARGE
|
VM_FAULT_SIGBUS
))
return
i
?
i
:
-
EFAULT
;
BUG
();
}
...
...
mm/migrate.c
View file @
46e387bb
...
...
@@ -32,6 +32,7 @@
#include <linux/security.h>
#include <linux/memcontrol.h>
#include <linux/syscalls.h>
#include <linux/hugetlb.h>
#include <linux/gfp.h>
#include "internal.h"
...
...
@@ -95,6 +96,12 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
pte_t
*
ptep
,
pte
;
spinlock_t
*
ptl
;
if
(
unlikely
(
PageHuge
(
new
)))
{
ptep
=
huge_pte_offset
(
mm
,
addr
);
if
(
!
ptep
)
goto
out
;
ptl
=
&
mm
->
page_table_lock
;
}
else
{
pgd
=
pgd_offset
(
mm
,
addr
);
if
(
!
pgd_present
(
*
pgd
))
goto
out
;
...
...
@@ -115,6 +122,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
}
ptl
=
pte_lockptr
(
mm
,
pmd
);
}
spin_lock
(
ptl
);
pte
=
*
ptep
;
if
(
!
is_swap_pte
(
pte
))
...
...
@@ -130,10 +139,19 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
pte
=
pte_mkold
(
mk_pte
(
new
,
vma
->
vm_page_prot
));
if
(
is_write_migration_entry
(
entry
))
pte
=
pte_mkwrite
(
pte
);
#ifdef CONFIG_HUGETLB_PAGE
if
(
PageHuge
(
new
))
pte
=
pte_mkhuge
(
pte
);
#endif
flush_cache_page
(
vma
,
addr
,
pte_pfn
(
pte
));
set_pte_at
(
mm
,
addr
,
ptep
,
pte
);
if
(
PageHuge
(
new
))
{
if
(
PageAnon
(
new
))
hugepage_add_anon_rmap
(
new
,
vma
,
addr
);
else
page_dup_rmap
(
new
);
}
else
if
(
PageAnon
(
new
))
page_add_anon_rmap
(
new
,
vma
,
addr
);
else
page_add_file_rmap
(
new
);
...
...
@@ -275,11 +293,59 @@ static int migrate_page_move_mapping(struct address_space *mapping,
return
0
;
}
/*
* The expected number of remaining references is the same as that
* of migrate_page_move_mapping().
*/
int
migrate_huge_page_move_mapping
(
struct
address_space
*
mapping
,
struct
page
*
newpage
,
struct
page
*
page
)
{
int
expected_count
;
void
**
pslot
;
if
(
!
mapping
)
{
if
(
page_count
(
page
)
!=
1
)
return
-
EAGAIN
;
return
0
;
}
spin_lock_irq
(
&
mapping
->
tree_lock
);
pslot
=
radix_tree_lookup_slot
(
&
mapping
->
page_tree
,
page_index
(
page
));
expected_count
=
2
+
page_has_private
(
page
);
if
(
page_count
(
page
)
!=
expected_count
||
(
struct
page
*
)
radix_tree_deref_slot
(
pslot
)
!=
page
)
{
spin_unlock_irq
(
&
mapping
->
tree_lock
);
return
-
EAGAIN
;
}
if
(
!
page_freeze_refs
(
page
,
expected_count
))
{
spin_unlock_irq
(
&
mapping
->
tree_lock
);
return
-
EAGAIN
;
}
get_page
(
newpage
);
radix_tree_replace_slot
(
pslot
,
newpage
);
page_unfreeze_refs
(
page
,
expected_count
);
__put_page
(
page
);
spin_unlock_irq
(
&
mapping
->
tree_lock
);
return
0
;
}
/*
* Copy the page to its new location
*/
static
void
migrate_page_copy
(
struct
page
*
newpage
,
struct
page
*
page
)
void
migrate_page_copy
(
struct
page
*
newpage
,
struct
page
*
page
)
{
if
(
PageHuge
(
page
))
copy_huge_page
(
newpage
,
page
);
else
copy_highpage
(
newpage
,
page
);
if
(
PageError
(
page
))
...
...
@@ -723,6 +789,92 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
return
rc
;
}
/*
* Counterpart of unmap_and_move_page() for hugepage migration.
*
* This function doesn't wait the completion of hugepage I/O
* because there is no race between I/O and migration for hugepage.
* Note that currently hugepage I/O occurs only in direct I/O
* where no lock is held and PG_writeback is irrelevant,
* and writeback status of all subpages are counted in the reference
* count of the head page (i.e. if all subpages of a 2MB hugepage are
* under direct I/O, the reference of the head page is 512 and a bit more.)
* This means that when we try to migrate hugepage whose subpages are
* doing direct I/O, some references remain after try_to_unmap() and
* hugepage migration fails without data corruption.
*
* There is also no race when direct I/O is issued on the page under migration,
* because then pte is replaced with migration swap entry and direct I/O code
* will wait in the page fault for migration to complete.
*/
static
int
unmap_and_move_huge_page
(
new_page_t
get_new_page
,
unsigned
long
private
,
struct
page
*
hpage
,
int
force
,
int
offlining
)
{
int
rc
=
0
;
int
*
result
=
NULL
;
struct
page
*
new_hpage
=
get_new_page
(
hpage
,
private
,
&
result
);
int
rcu_locked
=
0
;
struct
anon_vma
*
anon_vma
=
NULL
;
if
(
!
new_hpage
)
return
-
ENOMEM
;
rc
=
-
EAGAIN
;
if
(
!
trylock_page
(
hpage
))
{
if
(
!
force
)
goto
out
;
lock_page
(
hpage
);
}
if
(
PageAnon
(
hpage
))
{
rcu_read_lock
();
rcu_locked
=
1
;
if
(
page_mapped
(
hpage
))
{
anon_vma
=
page_anon_vma
(
hpage
);
atomic_inc
(
&
anon_vma
->
external_refcount
);
}
}
try_to_unmap
(
hpage
,
TTU_MIGRATION
|
TTU_IGNORE_MLOCK
|
TTU_IGNORE_ACCESS
);
if
(
!
page_mapped
(
hpage
))
rc
=
move_to_new_page
(
new_hpage
,
hpage
,
1
);
if
(
rc
)
remove_migration_ptes
(
hpage
,
hpage
);
if
(
anon_vma
&&
atomic_dec_and_lock
(
&
anon_vma
->
external_refcount
,
&
anon_vma
->
lock
))
{
int
empty
=
list_empty
(
&
anon_vma
->
head
);
spin_unlock
(
&
anon_vma
->
lock
);
if
(
empty
)
anon_vma_free
(
anon_vma
);
}
if
(
rcu_locked
)
rcu_read_unlock
();
out:
unlock_page
(
hpage
);
if
(
rc
!=
-
EAGAIN
)
{
list_del
(
&
hpage
->
lru
);
put_page
(
hpage
);
}
put_page
(
new_hpage
);
if
(
result
)
{
if
(
rc
)
*
result
=
rc
;
else
*
result
=
page_to_nid
(
new_hpage
);
}
return
rc
;
}
/*
* migrate_pages
*
...
...
@@ -788,6 +940,52 @@ int migrate_pages(struct list_head *from,
return
nr_failed
+
retry
;
}
int
migrate_huge_pages
(
struct
list_head
*
from
,
new_page_t
get_new_page
,
unsigned
long
private
,
int
offlining
)
{
int
retry
=
1
;
int
nr_failed
=
0
;
int
pass
=
0
;
struct
page
*
page
;
struct
page
*
page2
;
int
rc
;
for
(
pass
=
0
;
pass
<
10
&&
retry
;
pass
++
)
{
retry
=
0
;
list_for_each_entry_safe
(
page
,
page2
,
from
,
lru
)
{
cond_resched
();
rc
=
unmap_and_move_huge_page
(
get_new_page
,
private
,
page
,
pass
>
2
,
offlining
);
switch
(
rc
)
{
case
-
ENOMEM
:
goto
out
;
case
-
EAGAIN
:
retry
++
;
break
;
case
0
:
break
;
default:
/* Permanent failure */
nr_failed
++
;
break
;
}
}
}
rc
=
0
;
out:
list_for_each_entry_safe
(
page
,
page2
,
from
,
lru
)
put_page
(
page
);
if
(
rc
)
return
rc
;
return
nr_failed
+
retry
;
}
#ifdef CONFIG_NUMA
/*
* Move a list of individual pages
...
...
mm/rmap.c
View file @
46e387bb
...
...
@@ -780,10 +780,10 @@ void page_move_anon_rmap(struct page *page,
}
/**
* __page_set_anon_rmap - setup new anonymous rmap
* @page:
the page to add the mapping to
* @vma:
the vm area in which the mapping is added
* @address:
the user virtual address mapped
* __page_set_anon_rmap - set
up new anonymous rmap
* @page:
Page to add to rmap
* @vma:
VM area to add page to.
* @address:
User virtual address of the mapping
* @exclusive: the page is exclusively owned by the current process
*/
static
void
__page_set_anon_rmap
(
struct
page
*
page
,
...
...
@@ -793,25 +793,16 @@ static void __page_set_anon_rmap(struct page *page,
BUG_ON
(
!
anon_vma
);
if
(
PageAnon
(
page
))
return
;
/*
* If the page isn't exclusively mapped into this vma,
* we must use the _oldest_ possible anon_vma for the
* page mapping!
*/
if
(
!
exclusive
)
{
if
(
PageAnon
(
page
))
return
;
if
(
!
exclusive
)
anon_vma
=
anon_vma
->
root
;
}
else
{
/*
* In this case, swapped-out-but-not-discarded swap-cache
* is remapped. So, no need to update page->mapping here.
* We convice anon_vma poitned by page->mapping is not obsolete
* because vma->anon_vma is necessary to be a family of it.
*/
if
(
PageAnon
(
page
))
return
;
}
anon_vma
=
(
void
*
)
anon_vma
+
PAGE_MAPPING_ANON
;
page
->
mapping
=
(
struct
address_space
*
)
anon_vma
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment