Commit b8cf32dc authored by Yosry Ahmed's avatar Yosry Ahmed Committed by Andrew Morton

mm: zswap: multiple zpools support

Support using multiple zpools of the same type in zswap, for concurrency
purposes.  A fixed number of 32 zpools is suggested by this commit, which
was determined empirically.  It can be later changed or made into a config
option if needed.

On a setup with zswap and zsmalloc, comparing a single zpool to 32 zpools
shows improvements in the zsmalloc lock contention, especially on the swap
out path.

The following shows the perf analysis of the swapout path when 10
workloads are simultaneously reclaiming and refaulting tmpfs pages.  There
are some improvements on the swap in path as well, but less significant.

1 zpool:

 |--28.99%--zswap_frontswap_store
       |
       <snip>
       |
       |--8.98%--zpool_map_handle
       |     |
       |      --8.98%--zs_zpool_map
       |           |
       |            --8.95%--zs_map_object
       |                 |
       |                  --8.38%--_raw_spin_lock
       |                       |
       |                        --7.39%--queued_spin_lock_slowpath
       |
       |--8.82%--zpool_malloc
       |     |
       |      --8.82%--zs_zpool_malloc
       |           |
       |            --8.80%--zs_malloc
       |                 |
       |                 |--7.21%--_raw_spin_lock
       |                 |     |
       |                 |      --6.81%--queued_spin_lock_slowpath
       <snip>

32 zpools:

 |--16.73%--zswap_frontswap_store
       |
       <snip>
       |
       |--1.81%--zpool_malloc
       |     |
       |      --1.81%--zs_zpool_malloc
       |           |
       |            --1.79%--zs_malloc
       |                 |
       |                  --0.73%--obj_malloc
       |
       |--1.06%--zswap_update_total_size
       |
       |--0.59%--zpool_map_handle
       |     |
       |      --0.59%--zs_zpool_map
       |           |
       |            --0.57%--zs_map_object
       |                 |
       |                  --0.51%--_raw_spin_lock
       <snip>

Link: https://lkml.kernel.org/r/20230620194644.3142384-1-yosryahmed@google.comSigned-off-by: default avatarYosry Ahmed <yosryahmed@google.com>
Suggested-by: default avatarYu Zhao <yuzhao@google.com>
Acked-by: default avatarChris Li (Google) <chrisl@kernel.org>
Reviewed-by: default avatarNhat Pham <nphamcs@gmail.com>
Tested-by: default avatarNhat Pham <nphamcs@gmail.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 6be36015
...@@ -142,6 +142,9 @@ static bool zswap_exclusive_loads_enabled = IS_ENABLED( ...@@ -142,6 +142,9 @@ static bool zswap_exclusive_loads_enabled = IS_ENABLED(
CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON); CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON);
module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644); module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644);
/* Number of zpools in zswap_pool (empirically determined for scalability) */
#define ZSWAP_NR_ZPOOLS 32
/********************************* /*********************************
* data structures * data structures
**********************************/ **********************************/
...@@ -161,7 +164,7 @@ struct crypto_acomp_ctx { ...@@ -161,7 +164,7 @@ struct crypto_acomp_ctx {
* needs to be verified that it's still valid in the tree. * needs to be verified that it's still valid in the tree.
*/ */
struct zswap_pool { struct zswap_pool {
struct zpool *zpool; struct zpool *zpools[ZSWAP_NR_ZPOOLS];
struct crypto_acomp_ctx __percpu *acomp_ctx; struct crypto_acomp_ctx __percpu *acomp_ctx;
struct kref kref; struct kref kref;
struct list_head list; struct list_head list;
...@@ -248,7 +251,7 @@ static bool zswap_has_pool; ...@@ -248,7 +251,7 @@ static bool zswap_has_pool;
#define zswap_pool_debug(msg, p) \ #define zswap_pool_debug(msg, p) \
pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
zpool_get_type((p)->zpool)) zpool_get_type((p)->zpools[0]))
static int zswap_writeback_entry(struct zswap_entry *entry, static int zswap_writeback_entry(struct zswap_entry *entry,
struct zswap_tree *tree); struct zswap_tree *tree);
...@@ -272,11 +275,13 @@ static void zswap_update_total_size(void) ...@@ -272,11 +275,13 @@ static void zswap_update_total_size(void)
{ {
struct zswap_pool *pool; struct zswap_pool *pool;
u64 total = 0; u64 total = 0;
int i;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(pool, &zswap_pools, list) list_for_each_entry_rcu(pool, &zswap_pools, list)
total += zpool_get_total_size(pool->zpool); for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
total += zpool_get_total_size(pool->zpools[i]);
rcu_read_unlock(); rcu_read_unlock();
...@@ -365,6 +370,16 @@ static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) ...@@ -365,6 +370,16 @@ static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
return false; return false;
} }
static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
{
int i = 0;
if (ZSWAP_NR_ZPOOLS > 1)
i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS));
return entry->pool->zpools[i];
}
/* /*
* Carries out the common pattern of freeing and entry's zpool allocation, * Carries out the common pattern of freeing and entry's zpool allocation,
* freeing the entry itself, and decrementing the number of stored pages. * freeing the entry itself, and decrementing the number of stored pages.
...@@ -381,7 +396,7 @@ static void zswap_free_entry(struct zswap_entry *entry) ...@@ -381,7 +396,7 @@ static void zswap_free_entry(struct zswap_entry *entry)
spin_lock(&entry->pool->lru_lock); spin_lock(&entry->pool->lru_lock);
list_del(&entry->lru); list_del(&entry->lru);
spin_unlock(&entry->pool->lru_lock); spin_unlock(&entry->pool->lru_lock);
zpool_free(entry->pool->zpool, entry->handle); zpool_free(zswap_find_zpool(entry), entry->handle);
zswap_pool_put(entry->pool); zswap_pool_put(entry->pool);
} }
zswap_entry_cache_free(entry); zswap_entry_cache_free(entry);
...@@ -590,7 +605,8 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) ...@@ -590,7 +605,8 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
list_for_each_entry_rcu(pool, &zswap_pools, list) { list_for_each_entry_rcu(pool, &zswap_pools, list) {
if (strcmp(pool->tfm_name, compressor)) if (strcmp(pool->tfm_name, compressor))
continue; continue;
if (strcmp(zpool_get_type(pool->zpool), type)) /* all zpools share the same type */
if (strcmp(zpool_get_type(pool->zpools[0]), type))
continue; continue;
/* if we can't get it, it's about to be destroyed */ /* if we can't get it, it's about to be destroyed */
if (!zswap_pool_get(pool)) if (!zswap_pool_get(pool))
...@@ -695,6 +711,7 @@ static void shrink_worker(struct work_struct *w) ...@@ -695,6 +711,7 @@ static void shrink_worker(struct work_struct *w)
static struct zswap_pool *zswap_pool_create(char *type, char *compressor) static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
{ {
int i;
struct zswap_pool *pool; struct zswap_pool *pool;
char name[38]; /* 'zswap' + 32 char (max) num + \0 */ char name[38]; /* 'zswap' + 32 char (max) num + \0 */
gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
...@@ -715,15 +732,18 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) ...@@ -715,15 +732,18 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
if (!pool) if (!pool)
return NULL; return NULL;
for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
/* unique name for each pool specifically required by zsmalloc */ /* unique name for each pool specifically required by zsmalloc */
snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); snprintf(name, 38, "zswap%x",
atomic_inc_return(&zswap_pools_count));
pool->zpool = zpool_create_pool(type, name, gfp); pool->zpools[i] = zpool_create_pool(type, name, gfp);
if (!pool->zpool) { if (!pool->zpools[i]) {
pr_err("%s zpool not available\n", type); pr_err("%s zpool not available\n", type);
goto error; goto error;
} }
pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); }
pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
...@@ -755,8 +775,8 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) ...@@ -755,8 +775,8 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
error: error:
if (pool->acomp_ctx) if (pool->acomp_ctx)
free_percpu(pool->acomp_ctx); free_percpu(pool->acomp_ctx);
if (pool->zpool) while (i--)
zpool_destroy_pool(pool->zpool); zpool_destroy_pool(pool->zpools[i]);
kfree(pool); kfree(pool);
return NULL; return NULL;
} }
...@@ -805,11 +825,14 @@ static struct zswap_pool *__zswap_pool_create_fallback(void) ...@@ -805,11 +825,14 @@ static struct zswap_pool *__zswap_pool_create_fallback(void)
static void zswap_pool_destroy(struct zswap_pool *pool) static void zswap_pool_destroy(struct zswap_pool *pool)
{ {
int i;
zswap_pool_debug("destroying", pool); zswap_pool_debug("destroying", pool);
cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
free_percpu(pool->acomp_ctx); free_percpu(pool->acomp_ctx);
zpool_destroy_pool(pool->zpool); for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
zpool_destroy_pool(pool->zpools[i]);
kfree(pool); kfree(pool);
} }
...@@ -1073,7 +1096,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, ...@@ -1073,7 +1096,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
struct page *page; struct page *page;
struct scatterlist input, output; struct scatterlist input, output;
struct crypto_acomp_ctx *acomp_ctx; struct crypto_acomp_ctx *acomp_ctx;
struct zpool *pool = entry->pool->zpool; struct zpool *pool = zswap_find_zpool(entry);
u8 *src, *tmp = NULL; u8 *src, *tmp = NULL;
unsigned int dlen; unsigned int dlen;
...@@ -1214,6 +1237,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, ...@@ -1214,6 +1237,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
struct crypto_acomp_ctx *acomp_ctx; struct crypto_acomp_ctx *acomp_ctx;
struct obj_cgroup *objcg = NULL; struct obj_cgroup *objcg = NULL;
struct zswap_pool *pool; struct zswap_pool *pool;
struct zpool *zpool;
int ret; int ret;
unsigned int dlen = PAGE_SIZE; unsigned int dlen = PAGE_SIZE;
unsigned long handle, value; unsigned long handle, value;
...@@ -1324,10 +1348,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, ...@@ -1324,10 +1348,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
} }
/* store */ /* store */
zpool = zswap_find_zpool(entry);
gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
if (zpool_malloc_support_movable(entry->pool->zpool)) if (zpool_malloc_support_movable(zpool))
gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
ret = zpool_malloc(entry->pool->zpool, dlen, gfp, &handle); ret = zpool_malloc(zpool, dlen, gfp, &handle);
if (ret == -ENOSPC) { if (ret == -ENOSPC) {
zswap_reject_compress_poor++; zswap_reject_compress_poor++;
goto put_dstmem; goto put_dstmem;
...@@ -1336,9 +1361,9 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, ...@@ -1336,9 +1361,9 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
zswap_reject_alloc_fail++; zswap_reject_alloc_fail++;
goto put_dstmem; goto put_dstmem;
} }
buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO); buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
memcpy(buf, dst, dlen); memcpy(buf, dst, dlen);
zpool_unmap_handle(entry->pool->zpool, handle); zpool_unmap_handle(zpool, handle);
mutex_unlock(acomp_ctx->mutex); mutex_unlock(acomp_ctx->mutex);
/* populate entry */ /* populate entry */
...@@ -1409,6 +1434,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, ...@@ -1409,6 +1434,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
struct scatterlist input, output; struct scatterlist input, output;
struct crypto_acomp_ctx *acomp_ctx; struct crypto_acomp_ctx *acomp_ctx;
u8 *src, *dst, *tmp; u8 *src, *dst, *tmp;
struct zpool *zpool;
unsigned int dlen; unsigned int dlen;
int ret; int ret;
...@@ -1430,7 +1456,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, ...@@ -1430,7 +1456,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
goto stats; goto stats;
} }
if (!zpool_can_sleep_mapped(entry->pool->zpool)) { zpool = zswap_find_zpool(entry);
if (!zpool_can_sleep_mapped(zpool)) {
tmp = kmalloc(entry->length, GFP_KERNEL); tmp = kmalloc(entry->length, GFP_KERNEL);
if (!tmp) { if (!tmp) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -1440,12 +1467,12 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, ...@@ -1440,12 +1467,12 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
/* decompress */ /* decompress */
dlen = PAGE_SIZE; dlen = PAGE_SIZE;
src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO); src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
if (!zpool_can_sleep_mapped(entry->pool->zpool)) { if (!zpool_can_sleep_mapped(zpool)) {
memcpy(tmp, src, entry->length); memcpy(tmp, src, entry->length);
src = tmp; src = tmp;
zpool_unmap_handle(entry->pool->zpool, entry->handle); zpool_unmap_handle(zpool, entry->handle);
} }
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
...@@ -1457,8 +1484,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, ...@@ -1457,8 +1484,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
mutex_unlock(acomp_ctx->mutex); mutex_unlock(acomp_ctx->mutex);
if (zpool_can_sleep_mapped(entry->pool->zpool)) if (zpool_can_sleep_mapped(zpool))
zpool_unmap_handle(entry->pool->zpool, entry->handle); zpool_unmap_handle(zpool, entry->handle);
else else
kfree(tmp); kfree(tmp);
...@@ -1619,7 +1646,7 @@ static int zswap_setup(void) ...@@ -1619,7 +1646,7 @@ static int zswap_setup(void)
pool = __zswap_pool_create_fallback(); pool = __zswap_pool_create_fallback();
if (pool) { if (pool) {
pr_info("loaded using pool %s/%s\n", pool->tfm_name, pr_info("loaded using pool %s/%s\n", pool->tfm_name,
zpool_get_type(pool->zpool)); zpool_get_type(pool->zpools[0]));
list_add(&pool->list, &zswap_pools); list_add(&pool->list, &zswap_pools);
zswap_has_pool = true; zswap_has_pool = true;
} else { } else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment