Commit 238b224c authored by Jan Kara's avatar Jan Kara Committed by Stefan Bader

mbcache2: Use referenced bit instead of LRU

Currently we maintain perfect LRU list by moving entry to the tail of
the list when it gets used. However these operations on cache-global
list are relatively expensive.

In this patch we switch to lazy updates of LRU list. Whenever entry gets
used, we set a referenced bit in it. When reclaiming entries, we give
referenced entries another round in the LRU. Since the list is not a
real LRU anymore, rename it to just 'list'.

In my testing this logic gives about 30% boost to workloads with mostly
unique xattr blocks (e.g. xattr-bench with 10 files and 10000 unique
xattr values).
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
(cherry picked from commit f0c8b462)
CVE-2015-8952
Signed-off-by: default avatarThadeu Lima de Souza Cascardo <cascardo@canonical.com>
Acked-by: default avatarStefan Bader <stefan.bader@canonical.com>
Acked-by: default avatarKleber Sacilotto de Souza <kleber.souza@canonical.com>
Signed-off-by: default avatarThadeu Lima de Souza Cascardo <cascardo@canonical.com>
parent 1bcae5db
...@@ -30,9 +30,9 @@ struct mb2_cache { ...@@ -30,9 +30,9 @@ struct mb2_cache {
int c_bucket_bits; int c_bucket_bits;
/* Maximum entries in cache to avoid degrading hash too much */ /* Maximum entries in cache to avoid degrading hash too much */
int c_max_entries; int c_max_entries;
/* Protects c_lru_list, c_entry_count */ /* Protects c_list, c_entry_count */
spinlock_t c_lru_list_lock; spinlock_t c_list_lock;
struct list_head c_lru_list; struct list_head c_list;
/* Number of entries in cache */ /* Number of entries in cache */
unsigned long c_entry_count; unsigned long c_entry_count;
struct shrinker c_shrink; struct shrinker c_shrink;
...@@ -45,6 +45,29 @@ static struct kmem_cache *mb2_entry_cache; ...@@ -45,6 +45,29 @@ static struct kmem_cache *mb2_entry_cache;
static unsigned long mb2_cache_shrink(struct mb2_cache *cache, static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
unsigned int nr_to_scan); unsigned int nr_to_scan);
static inline bool mb2_cache_entry_referenced(struct mb2_cache_entry *entry)
{
return entry->_e_hash_list_head & 1;
}
static inline void mb2_cache_entry_set_referenced(struct mb2_cache_entry *entry)
{
entry->_e_hash_list_head |= 1;
}
static inline void mb2_cache_entry_clear_referenced(
struct mb2_cache_entry *entry)
{
entry->_e_hash_list_head &= ~1;
}
static inline struct hlist_bl_head *mb2_cache_entry_head(
struct mb2_cache_entry *entry)
{
return (struct hlist_bl_head *)
(entry->_e_hash_list_head & ~1);
}
/* /*
* Number of entries to reclaim synchronously when there are too many entries * Number of entries to reclaim synchronously when there are too many entries
* in cache * in cache
...@@ -80,13 +103,13 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key, ...@@ -80,13 +103,13 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
if (!entry) if (!entry)
return -ENOMEM; return -ENOMEM;
INIT_LIST_HEAD(&entry->e_lru_list); INIT_LIST_HEAD(&entry->e_list);
/* One ref for hash, one ref returned */ /* One ref for hash, one ref returned */
atomic_set(&entry->e_refcnt, 1); atomic_set(&entry->e_refcnt, 1);
entry->e_key = key; entry->e_key = key;
entry->e_block = block; entry->e_block = block;
head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
entry->e_hash_list_head = head; entry->_e_hash_list_head = (unsigned long)head;
hlist_bl_lock(head); hlist_bl_lock(head);
hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
if (dup->e_key == key && dup->e_block == block) { if (dup->e_key == key && dup->e_block == block) {
...@@ -98,12 +121,12 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key, ...@@ -98,12 +121,12 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
hlist_bl_add_head(&entry->e_hash_list, head); hlist_bl_add_head(&entry->e_hash_list, head);
hlist_bl_unlock(head); hlist_bl_unlock(head);
spin_lock(&cache->c_lru_list_lock); spin_lock(&cache->c_list_lock);
list_add_tail(&entry->e_lru_list, &cache->c_lru_list); list_add_tail(&entry->e_list, &cache->c_list);
/* Grab ref for LRU list */ /* Grab ref for LRU list */
atomic_inc(&entry->e_refcnt); atomic_inc(&entry->e_refcnt);
cache->c_entry_count++; cache->c_entry_count++;
spin_unlock(&cache->c_lru_list_lock); spin_unlock(&cache->c_list_lock);
return 0; return 0;
} }
...@@ -124,7 +147,7 @@ static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache, ...@@ -124,7 +147,7 @@ static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
struct hlist_bl_head *head; struct hlist_bl_head *head;
if (entry) if (entry)
head = entry->e_hash_list_head; head = mb2_cache_entry_head(entry);
else else
head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
hlist_bl_lock(head); hlist_bl_lock(head);
...@@ -203,13 +226,13 @@ void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key, ...@@ -203,13 +226,13 @@ void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
/* We keep hash list reference to keep entry alive */ /* We keep hash list reference to keep entry alive */
hlist_bl_del_init(&entry->e_hash_list); hlist_bl_del_init(&entry->e_hash_list);
hlist_bl_unlock(head); hlist_bl_unlock(head);
spin_lock(&cache->c_lru_list_lock); spin_lock(&cache->c_list_lock);
if (!list_empty(&entry->e_lru_list)) { if (!list_empty(&entry->e_list)) {
list_del_init(&entry->e_lru_list); list_del_init(&entry->e_list);
cache->c_entry_count--; cache->c_entry_count--;
atomic_dec(&entry->e_refcnt); atomic_dec(&entry->e_refcnt);
} }
spin_unlock(&cache->c_lru_list_lock); spin_unlock(&cache->c_list_lock);
mb2_cache_entry_put(cache, entry); mb2_cache_entry_put(cache, entry);
return; return;
} }
...@@ -222,15 +245,12 @@ EXPORT_SYMBOL(mb2_cache_entry_delete_block); ...@@ -222,15 +245,12 @@ EXPORT_SYMBOL(mb2_cache_entry_delete_block);
* @cache - cache the entry belongs to * @cache - cache the entry belongs to
* @entry - entry that got used * @entry - entry that got used
* *
* Move entry in lru list to reflect the fact that it was used. * Marks entry as used to give hit higher chances of surviving in cache.
*/ */
void mb2_cache_entry_touch(struct mb2_cache *cache, void mb2_cache_entry_touch(struct mb2_cache *cache,
struct mb2_cache_entry *entry) struct mb2_cache_entry *entry)
{ {
spin_lock(&cache->c_lru_list_lock); mb2_cache_entry_set_referenced(entry);
if (!list_empty(&entry->e_lru_list))
list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
spin_unlock(&cache->c_lru_list_lock);
} }
EXPORT_SYMBOL(mb2_cache_entry_touch); EXPORT_SYMBOL(mb2_cache_entry_touch);
...@@ -251,18 +271,23 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache, ...@@ -251,18 +271,23 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
struct hlist_bl_head *head; struct hlist_bl_head *head;
unsigned int shrunk = 0; unsigned int shrunk = 0;
spin_lock(&cache->c_lru_list_lock); spin_lock(&cache->c_list_lock);
while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) { while (nr_to_scan-- && !list_empty(&cache->c_list)) {
entry = list_first_entry(&cache->c_lru_list, entry = list_first_entry(&cache->c_list,
struct mb2_cache_entry, e_lru_list); struct mb2_cache_entry, e_list);
list_del_init(&entry->e_lru_list); if (mb2_cache_entry_referenced(entry)) {
mb2_cache_entry_clear_referenced(entry);
list_move_tail(&cache->c_list, &entry->e_list);
continue;
}
list_del_init(&entry->e_list);
cache->c_entry_count--; cache->c_entry_count--;
/* /*
* We keep LRU list reference so that entry doesn't go away * We keep LRU list reference so that entry doesn't go away
* from under us. * from under us.
*/ */
spin_unlock(&cache->c_lru_list_lock); spin_unlock(&cache->c_list_lock);
head = entry->e_hash_list_head; head = mb2_cache_entry_head(entry);
hlist_bl_lock(head); hlist_bl_lock(head);
if (!hlist_bl_unhashed(&entry->e_hash_list)) { if (!hlist_bl_unhashed(&entry->e_hash_list)) {
hlist_bl_del_init(&entry->e_hash_list); hlist_bl_del_init(&entry->e_hash_list);
...@@ -272,9 +297,9 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache, ...@@ -272,9 +297,9 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
if (mb2_cache_entry_put(cache, entry)) if (mb2_cache_entry_put(cache, entry))
shrunk++; shrunk++;
cond_resched(); cond_resched();
spin_lock(&cache->c_lru_list_lock); spin_lock(&cache->c_list_lock);
} }
spin_unlock(&cache->c_lru_list_lock); spin_unlock(&cache->c_list_lock);
return shrunk; return shrunk;
} }
...@@ -318,8 +343,8 @@ struct mb2_cache *mb2_cache_create(int bucket_bits) ...@@ -318,8 +343,8 @@ struct mb2_cache *mb2_cache_create(int bucket_bits)
goto err_out; goto err_out;
cache->c_bucket_bits = bucket_bits; cache->c_bucket_bits = bucket_bits;
cache->c_max_entries = bucket_count << 4; cache->c_max_entries = bucket_count << 4;
INIT_LIST_HEAD(&cache->c_lru_list); INIT_LIST_HEAD(&cache->c_list);
spin_lock_init(&cache->c_lru_list_lock); spin_lock_init(&cache->c_list_lock);
cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head), cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
GFP_KERNEL); GFP_KERNEL);
if (!cache->c_hash) { if (!cache->c_hash) {
...@@ -361,13 +386,13 @@ void mb2_cache_destroy(struct mb2_cache *cache) ...@@ -361,13 +386,13 @@ void mb2_cache_destroy(struct mb2_cache *cache)
* We don't bother with any locking. Cache must not be used at this * We don't bother with any locking. Cache must not be used at this
* point. * point.
*/ */
list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) { list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
if (!hlist_bl_unhashed(&entry->e_hash_list)) { if (!hlist_bl_unhashed(&entry->e_hash_list)) {
hlist_bl_del_init(&entry->e_hash_list); hlist_bl_del_init(&entry->e_hash_list);
atomic_dec(&entry->e_refcnt); atomic_dec(&entry->e_refcnt);
} else } else
WARN_ON(1); WARN_ON(1);
list_del(&entry->e_lru_list); list_del(&entry->e_list);
WARN_ON(atomic_read(&entry->e_refcnt) != 1); WARN_ON(atomic_read(&entry->e_refcnt) != 1);
mb2_cache_entry_put(cache, entry); mb2_cache_entry_put(cache, entry);
} }
......
...@@ -10,8 +10,8 @@ ...@@ -10,8 +10,8 @@
struct mb2_cache; struct mb2_cache;
struct mb2_cache_entry { struct mb2_cache_entry {
/* LRU list - protected by cache->c_lru_list_lock */ /* List of entries in cache - protected by cache->c_list_lock */
struct list_head e_lru_list; struct list_head e_list;
/* Hash table list - protected by bitlock in e_hash_list_head */ /* Hash table list - protected by bitlock in e_hash_list_head */
struct hlist_bl_node e_hash_list; struct hlist_bl_node e_hash_list;
atomic_t e_refcnt; atomic_t e_refcnt;
...@@ -19,8 +19,11 @@ struct mb2_cache_entry { ...@@ -19,8 +19,11 @@ struct mb2_cache_entry {
u32 e_key; u32 e_key;
/* Block number of hashed block - stable during lifetime of the entry */ /* Block number of hashed block - stable during lifetime of the entry */
sector_t e_block; sector_t e_block;
/* Head of hash list (for list bit lock) - stable */ /*
struct hlist_bl_head *e_hash_list_head; * Head of hash list (for list bit lock) - stable. Combined with
* referenced bit of entry
*/
unsigned long _e_hash_list_head;
}; };
struct mb2_cache *mb2_cache_create(int bucket_bits); struct mb2_cache *mb2_cache_create(int bucket_bits);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment