Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
6e949ddb
Commit
6e949ddb
authored
Aug 13, 2021
by
Paolo Bonzini
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'kvm-tdpmmu-fixes' into kvm-master
Merge topic branch with fixes for both 5.14-rc6 and 5.15.
parents
c5e2bf0b
ce25681d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
63 additions
and
15 deletions
+63
-15
Documentation/virt/kvm/locking.rst
Documentation/virt/kvm/locking.rst
+4
-4
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_host.h
+7
-0
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/mmu.c
+28
-0
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/mmu/tdp_mmu.c
+24
-11
No files found.
Documentation/virt/kvm/locking.rst
View file @
6e949ddb
...
...
@@ -25,10 +25,10 @@ On x86:
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock
is
taken inside kvm->arch.mmu_lock, and cannot be taken without already
holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise
there's no need to take kvm->arch.tdp_mmu_pages_lock at all
).
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock
and
kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
cannot be taken without already holding kvm->arch.mmu_lock (typically with
``read_lock`` for the TDP MMU, thus the need for additional spinlocks
).
Everything else is a leaf: no other lock is taken inside the critical
sections.
...
...
arch/x86/include/asm/kvm_host.h
View file @
6e949ddb
...
...
@@ -1038,6 +1038,13 @@ struct kvm_arch {
struct
list_head
lpage_disallowed_mmu_pages
;
struct
kvm_page_track_notifier_node
mmu_sp_tracker
;
struct
kvm_page_track_notifier_head
track_notifier_head
;
/*
* Protects marking pages unsync during page faults, as TDP MMU page
* faults only take mmu_lock for read. For simplicity, the unsync
* pages lock is always taken when marking pages unsync regardless of
* whether mmu_lock is held for read or write.
*/
spinlock_t
mmu_unsync_pages_lock
;
struct
list_head
assigned_dev_head
;
struct
iommu_domain
*
iommu_domain
;
...
...
arch/x86/kvm/mmu/mmu.c
View file @
6e949ddb
...
...
@@ -2535,6 +2535,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
int
mmu_try_to_unsync_pages
(
struct
kvm_vcpu
*
vcpu
,
gfn_t
gfn
,
bool
can_unsync
)
{
struct
kvm_mmu_page
*
sp
;
bool
locked
=
false
;
/*
* Force write-protection if the page is being tracked. Note, the page
...
...
@@ -2557,9 +2558,34 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
if
(
sp
->
unsync
)
continue
;
/*
* TDP MMU page faults require an additional spinlock as they
* run with mmu_lock held for read, not write, and the unsync
* logic is not thread safe. Take the spinklock regardless of
* the MMU type to avoid extra conditionals/parameters, there's
* no meaningful penalty if mmu_lock is held for write.
*/
if
(
!
locked
)
{
locked
=
true
;
spin_lock
(
&
vcpu
->
kvm
->
arch
.
mmu_unsync_pages_lock
);
/*
* Recheck after taking the spinlock, a different vCPU
* may have since marked the page unsync. A false
* positive on the unprotected check above is not
* possible as clearing sp->unsync _must_ hold mmu_lock
* for write, i.e. unsync cannot transition from 0->1
* while this CPU holds mmu_lock for read (or write).
*/
if
(
READ_ONCE
(
sp
->
unsync
))
continue
;
}
WARN_ON
(
sp
->
role
.
level
!=
PG_LEVEL_4K
);
kvm_unsync_page
(
vcpu
,
sp
);
}
if
(
locked
)
spin_unlock
(
&
vcpu
->
kvm
->
arch
.
mmu_unsync_pages_lock
);
/*
* We need to ensure that the marking of unsync pages is visible
...
...
@@ -5537,6 +5563,8 @@ void kvm_mmu_init_vm(struct kvm *kvm)
{
struct
kvm_page_track_notifier_node
*
node
=
&
kvm
->
arch
.
mmu_sp_tracker
;
spin_lock_init
(
&
kvm
->
arch
.
mmu_unsync_pages_lock
);
if
(
!
kvm_mmu_init_tdp_mmu
(
kvm
))
/*
* No smp_load/store wrappers needed here as we are in
...
...
arch/x86/kvm/mmu/tdp_mmu.c
View file @
6e949ddb
...
...
@@ -43,6 +43,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
if
(
!
kvm
->
arch
.
tdp_mmu_enabled
)
return
;
WARN_ON
(
!
list_empty
(
&
kvm
->
arch
.
tdp_mmu_pages
));
WARN_ON
(
!
list_empty
(
&
kvm
->
arch
.
tdp_mmu_roots
));
/*
...
...
@@ -81,8 +82,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
void
kvm_tdp_mmu_put_root
(
struct
kvm
*
kvm
,
struct
kvm_mmu_page
*
root
,
bool
shared
)
{
gfn_t
max_gfn
=
1ULL
<<
(
shadow_phys_bits
-
PAGE_SHIFT
);
kvm_lockdep_assert_mmu_lock_held
(
kvm
,
shared
);
if
(
!
refcount_dec_and_test
(
&
root
->
tdp_mmu_root_count
))
...
...
@@ -94,7 +93,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
list_del_rcu
(
&
root
->
link
);
spin_unlock
(
&
kvm
->
arch
.
tdp_mmu_pages_lock
);
zap_gfn_range
(
kvm
,
root
,
0
,
max_gfn
,
false
,
false
,
shared
);
zap_gfn_range
(
kvm
,
root
,
0
,
-
1ull
,
false
,
false
,
shared
);
call_rcu
(
&
root
->
rcu_head
,
tdp_mmu_free_sp_rcu_callback
);
}
...
...
@@ -724,13 +723,29 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t
start
,
gfn_t
end
,
bool
can_yield
,
bool
flush
,
bool
shared
)
{
gfn_t
max_gfn_host
=
1ULL
<<
(
shadow_phys_bits
-
PAGE_SHIFT
);
bool
zap_all
=
(
start
==
0
&&
end
>=
max_gfn_host
);
struct
tdp_iter
iter
;
/*
* No need to try to step down in the iterator when zapping all SPTEs,
* zapping the top-level non-leaf SPTEs will recurse on their children.
*/
int
min_level
=
zap_all
?
root
->
role
.
level
:
PG_LEVEL_4K
;
/*
* Bound the walk at host.MAXPHYADDR, guest accesses beyond that will
* hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF,
* and so KVM will never install a SPTE for such addresses.
*/
end
=
min
(
end
,
max_gfn_host
);
kvm_lockdep_assert_mmu_lock_held
(
kvm
,
shared
);
rcu_read_lock
();
tdp_root_for_each_pte
(
iter
,
root
,
start
,
end
)
{
for_each_tdp_pte_min_level
(
iter
,
root
->
spt
,
root
->
role
.
level
,
min_level
,
start
,
end
)
{
retry:
if
(
can_yield
&&
tdp_mmu_iter_cond_resched
(
kvm
,
&
iter
,
flush
,
shared
))
{
...
...
@@ -744,9 +759,10 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
/*
* If this is a non-last-level SPTE that covers a larger range
* than should be zapped, continue, and zap the mappings at a
* lower level.
* lower level
, except when zapping all SPTEs
.
*/
if
((
iter
.
gfn
<
start
||
if
(
!
zap_all
&&
(
iter
.
gfn
<
start
||
iter
.
gfn
+
KVM_PAGES_PER_HPAGE
(
iter
.
level
)
>
end
)
&&
!
is_last_spte
(
iter
.
old_spte
,
iter
.
level
))
continue
;
...
...
@@ -794,12 +810,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
void
kvm_tdp_mmu_zap_all
(
struct
kvm
*
kvm
)
{
gfn_t
max_gfn
=
1ULL
<<
(
shadow_phys_bits
-
PAGE_SHIFT
);
bool
flush
=
false
;
int
i
;
for
(
i
=
0
;
i
<
KVM_ADDRESS_SPACE_NUM
;
i
++
)
flush
=
kvm_tdp_mmu_zap_gfn_range
(
kvm
,
i
,
0
,
max_gfn
,
flush
=
kvm_tdp_mmu_zap_gfn_range
(
kvm
,
i
,
0
,
-
1ull
,
flush
,
false
);
if
(
flush
)
...
...
@@ -838,7 +853,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm,
*/
void
kvm_tdp_mmu_zap_invalidated_roots
(
struct
kvm
*
kvm
)
{
gfn_t
max_gfn
=
1ULL
<<
(
shadow_phys_bits
-
PAGE_SHIFT
);
struct
kvm_mmu_page
*
next_root
;
struct
kvm_mmu_page
*
root
;
bool
flush
=
false
;
...
...
@@ -854,8 +868,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
rcu_read_unlock
();
flush
=
zap_gfn_range
(
kvm
,
root
,
0
,
max_gfn
,
true
,
flush
,
true
);
flush
=
zap_gfn_range
(
kvm
,
root
,
0
,
-
1ull
,
true
,
flush
,
true
);
/*
* Put the reference acquired in
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment