Commit 9ead3527 authored by David S. Miller's avatar David S. Miller

Merge branch 'rhashtable-next'

Thomas Graf says:

====================
rhashtable updates on top of Herbert's work

Patch 1 is a bugfix for an RCU splash I encountered while testing.
Patch 2 & 3 are pure cleanups. Patch 4 disables automatic shrinking
by default as discussed in previous thread. Patch 5 removes some
rhashtable internal knowledge from nft_hash and fixes another RCU
splash.

I've pushed various rhashtable tests (Netlink, nft) together with a
Makefile to a git tree [0] for easier stress testing.

[0] https://github.com/tgraf/rhashtable
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 27cd5452 6b6f302c
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* Resizable, Scalable, Concurrent Hash Table * Resizable, Scalable, Concurrent Hash Table
* *
* Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
* Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
* Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
* *
* Code partially derived from nft_hash * Code partially derived from nft_hash
...@@ -104,6 +104,7 @@ struct rhashtable; ...@@ -104,6 +104,7 @@ struct rhashtable;
* @min_size: Minimum size while shrinking * @min_size: Minimum size while shrinking
* @nulls_base: Base value to generate nulls marker * @nulls_base: Base value to generate nulls marker
* @insecure_elasticity: Set to true to disable chain length checks * @insecure_elasticity: Set to true to disable chain length checks
* @automatic_shrinking: Enable automatic shrinking of tables
* @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
* @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
* @obj_hashfn: Function to hash object * @obj_hashfn: Function to hash object
...@@ -118,6 +119,7 @@ struct rhashtable_params { ...@@ -118,6 +119,7 @@ struct rhashtable_params {
unsigned int min_size; unsigned int min_size;
u32 nulls_base; u32 nulls_base;
bool insecure_elasticity; bool insecure_elasticity;
bool automatic_shrinking;
size_t locks_mul; size_t locks_mul;
rht_hashfn_t hashfn; rht_hashfn_t hashfn;
rht_obj_hashfn_t obj_hashfn; rht_obj_hashfn_t obj_hashfn;
...@@ -134,12 +136,10 @@ struct rhashtable_params { ...@@ -134,12 +136,10 @@ struct rhashtable_params {
* @run_work: Deferred worker to expand/shrink asynchronously * @run_work: Deferred worker to expand/shrink asynchronously
* @mutex: Mutex to protect current/future table swapping * @mutex: Mutex to protect current/future table swapping
* @lock: Spin lock to protect walker list * @lock: Spin lock to protect walker list
* @being_destroyed: True if table is set up for destruction
*/ */
struct rhashtable { struct rhashtable {
struct bucket_table __rcu *tbl; struct bucket_table __rcu *tbl;
atomic_t nelems; atomic_t nelems;
bool being_destroyed;
unsigned int key_len; unsigned int key_len;
unsigned int elasticity; unsigned int elasticity;
struct rhashtable_params p; struct rhashtable_params p;
...@@ -208,13 +208,13 @@ static inline unsigned int rht_key_hashfn( ...@@ -208,13 +208,13 @@ static inline unsigned int rht_key_hashfn(
struct rhashtable *ht, const struct bucket_table *tbl, struct rhashtable *ht, const struct bucket_table *tbl,
const void *key, const struct rhashtable_params params) const void *key, const struct rhashtable_params params)
{ {
unsigned hash; unsigned int hash;
/* params must be equal to ht->p if it isn't constant. */ /* params must be equal to ht->p if it isn't constant. */
if (!__builtin_constant_p(params.key_len)) if (!__builtin_constant_p(params.key_len))
hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
else if (params.key_len) { else if (params.key_len) {
unsigned key_len = params.key_len; unsigned int key_len = params.key_len;
if (params.hashfn) if (params.hashfn)
hash = params.hashfn(key, key_len, tbl->hash_rnd); hash = params.hashfn(key, key_len, tbl->hash_rnd);
...@@ -224,7 +224,7 @@ static inline unsigned int rht_key_hashfn( ...@@ -224,7 +224,7 @@ static inline unsigned int rht_key_hashfn(
hash = jhash2(key, key_len / sizeof(u32), hash = jhash2(key, key_len / sizeof(u32),
tbl->hash_rnd); tbl->hash_rnd);
} else { } else {
unsigned key_len = ht->p.key_len; unsigned int key_len = ht->p.key_len;
if (params.hashfn) if (params.hashfn)
hash = params.hashfn(key, key_len, tbl->hash_rnd); hash = params.hashfn(key, key_len, tbl->hash_rnd);
...@@ -332,6 +332,9 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); ...@@ -332,6 +332,9 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
void *rhashtable_walk_next(struct rhashtable_iter *iter); void *rhashtable_walk_next(struct rhashtable_iter *iter);
void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
void rhashtable_free_and_destroy(struct rhashtable *ht,
void (*free_fn)(void *ptr, void *arg),
void *arg);
void rhashtable_destroy(struct rhashtable *ht); void rhashtable_destroy(struct rhashtable *ht);
#define rht_dereference(p, ht) \ #define rht_dereference(p, ht) \
...@@ -512,7 +515,7 @@ static inline void *rhashtable_lookup_fast( ...@@ -512,7 +515,7 @@ static inline void *rhashtable_lookup_fast(
}; };
const struct bucket_table *tbl; const struct bucket_table *tbl;
struct rhash_head *he; struct rhash_head *he;
unsigned hash; unsigned int hash;
rcu_read_lock(); rcu_read_lock();
...@@ -539,6 +542,7 @@ static inline void *rhashtable_lookup_fast( ...@@ -539,6 +542,7 @@ static inline void *rhashtable_lookup_fast(
return NULL; return NULL;
} }
/* Internal function, please use rhashtable_insert_fast() instead */
static inline int __rhashtable_insert_fast( static inline int __rhashtable_insert_fast(
struct rhashtable *ht, const void *key, struct rhash_head *obj, struct rhashtable *ht, const void *key, struct rhash_head *obj,
const struct rhashtable_params params) const struct rhashtable_params params)
...@@ -550,8 +554,8 @@ static inline int __rhashtable_insert_fast( ...@@ -550,8 +554,8 @@ static inline int __rhashtable_insert_fast(
struct bucket_table *tbl, *new_tbl; struct bucket_table *tbl, *new_tbl;
struct rhash_head *head; struct rhash_head *head;
spinlock_t *lock; spinlock_t *lock;
unsigned elasticity; unsigned int elasticity;
unsigned hash; unsigned int hash;
int err; int err;
restart: restart:
...@@ -585,8 +589,8 @@ static inline int __rhashtable_insert_fast( ...@@ -585,8 +589,8 @@ static inline int __rhashtable_insert_fast(
if (unlikely(rht_grow_above_100(ht, tbl))) { if (unlikely(rht_grow_above_100(ht, tbl))) {
slow_path: slow_path:
spin_unlock_bh(lock); spin_unlock_bh(lock);
rcu_read_unlock();
err = rhashtable_insert_rehash(ht); err = rhashtable_insert_rehash(ht);
rcu_read_unlock();
if (err) if (err)
return err; return err;
...@@ -711,6 +715,7 @@ static inline int rhashtable_lookup_insert_key( ...@@ -711,6 +715,7 @@ static inline int rhashtable_lookup_insert_key(
return __rhashtable_insert_fast(ht, key, obj, params); return __rhashtable_insert_fast(ht, key, obj, params);
} }
/* Internal function, please use rhashtable_remove_fast() instead */
static inline int __rhashtable_remove_fast( static inline int __rhashtable_remove_fast(
struct rhashtable *ht, struct bucket_table *tbl, struct rhashtable *ht, struct bucket_table *tbl,
struct rhash_head *obj, const struct rhashtable_params params) struct rhash_head *obj, const struct rhashtable_params params)
...@@ -718,7 +723,7 @@ static inline int __rhashtable_remove_fast( ...@@ -718,7 +723,7 @@ static inline int __rhashtable_remove_fast(
struct rhash_head __rcu **pprev; struct rhash_head __rcu **pprev;
struct rhash_head *he; struct rhash_head *he;
spinlock_t * lock; spinlock_t * lock;
unsigned hash; unsigned int hash;
int err = -ENOENT; int err = -ENOENT;
hash = rht_head_hashfn(ht, tbl, obj, params); hash = rht_head_hashfn(ht, tbl, obj, params);
...@@ -782,7 +787,8 @@ static inline int rhashtable_remove_fast( ...@@ -782,7 +787,8 @@ static inline int rhashtable_remove_fast(
goto out; goto out;
atomic_dec(&ht->nelems); atomic_dec(&ht->nelems);
if (rht_shrink_below_30(ht, tbl)) if (unlikely(ht->p.automatic_shrinking &&
rht_shrink_below_30(ht, tbl)))
schedule_work(&ht->run_work); schedule_work(&ht->run_work);
out: out:
......
...@@ -153,7 +153,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, ...@@ -153,7 +153,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
return new_tbl; return new_tbl;
} }
static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
{ {
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
struct bucket_table *new_tbl = rhashtable_last_table(ht, struct bucket_table *new_tbl = rhashtable_last_table(ht,
...@@ -162,7 +162,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) ...@@ -162,7 +162,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
int err = -ENOENT; int err = -ENOENT;
struct rhash_head *head, *next, *entry; struct rhash_head *head, *next, *entry;
spinlock_t *new_bucket_lock; spinlock_t *new_bucket_lock;
unsigned new_hash; unsigned int new_hash;
rht_for_each(entry, old_tbl, old_hash) { rht_for_each(entry, old_tbl, old_hash) {
err = 0; err = 0;
...@@ -199,7 +199,8 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) ...@@ -199,7 +199,8 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
return err; return err;
} }
static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash) static void rhashtable_rehash_chain(struct rhashtable *ht,
unsigned int old_hash)
{ {
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
spinlock_t *old_bucket_lock; spinlock_t *old_bucket_lock;
...@@ -244,7 +245,7 @@ static int rhashtable_rehash_table(struct rhashtable *ht) ...@@ -244,7 +245,7 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
struct bucket_table *new_tbl; struct bucket_table *new_tbl;
struct rhashtable_walker *walker; struct rhashtable_walker *walker;
unsigned old_hash; unsigned int old_hash;
new_tbl = rht_dereference(old_tbl->future_tbl, ht); new_tbl = rht_dereference(old_tbl->future_tbl, ht);
if (!new_tbl) if (!new_tbl)
...@@ -324,11 +325,12 @@ static int rhashtable_expand(struct rhashtable *ht) ...@@ -324,11 +325,12 @@ static int rhashtable_expand(struct rhashtable *ht)
static int rhashtable_shrink(struct rhashtable *ht) static int rhashtable_shrink(struct rhashtable *ht)
{ {
struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
unsigned size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); unsigned int size;
int err; int err;
ASSERT_RHT_MUTEX(ht); ASSERT_RHT_MUTEX(ht);
size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
if (size < ht->p.min_size) if (size < ht->p.min_size)
size = ht->p.min_size; size = ht->p.min_size;
...@@ -357,20 +359,17 @@ static void rht_deferred_worker(struct work_struct *work) ...@@ -357,20 +359,17 @@ static void rht_deferred_worker(struct work_struct *work)
ht = container_of(work, struct rhashtable, run_work); ht = container_of(work, struct rhashtable, run_work);
mutex_lock(&ht->mutex); mutex_lock(&ht->mutex);
if (ht->being_destroyed)
goto unlock;
tbl = rht_dereference(ht->tbl, ht); tbl = rht_dereference(ht->tbl, ht);
tbl = rhashtable_last_table(ht, tbl); tbl = rhashtable_last_table(ht, tbl);
if (rht_grow_above_75(ht, tbl)) if (rht_grow_above_75(ht, tbl))
rhashtable_expand(ht); rhashtable_expand(ht);
else if (rht_shrink_below_30(ht, tbl)) else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
rhashtable_shrink(ht); rhashtable_shrink(ht);
err = rhashtable_rehash_table(ht); err = rhashtable_rehash_table(ht);
unlock:
mutex_unlock(&ht->mutex); mutex_unlock(&ht->mutex);
if (err) if (err)
...@@ -379,9 +378,9 @@ static void rht_deferred_worker(struct work_struct *work) ...@@ -379,9 +378,9 @@ static void rht_deferred_worker(struct work_struct *work)
static bool rhashtable_check_elasticity(struct rhashtable *ht, static bool rhashtable_check_elasticity(struct rhashtable *ht,
struct bucket_table *tbl, struct bucket_table *tbl,
unsigned hash) unsigned int hash)
{ {
unsigned elasticity = ht->elasticity; unsigned int elasticity = ht->elasticity;
struct rhash_head *head; struct rhash_head *head;
rht_for_each(head, tbl, hash) rht_for_each(head, tbl, hash)
...@@ -431,7 +430,7 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, ...@@ -431,7 +430,7 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
struct bucket_table *tbl) struct bucket_table *tbl)
{ {
struct rhash_head *head; struct rhash_head *head;
unsigned hash; unsigned int hash;
int err; int err;
tbl = rhashtable_last_table(ht, tbl); tbl = rhashtable_last_table(ht, tbl);
...@@ -781,21 +780,53 @@ int rhashtable_init(struct rhashtable *ht, ...@@ -781,21 +780,53 @@ int rhashtable_init(struct rhashtable *ht,
EXPORT_SYMBOL_GPL(rhashtable_init); EXPORT_SYMBOL_GPL(rhashtable_init);
/** /**
* rhashtable_destroy - destroy hash table * rhashtable_free_and_destroy - free elements and destroy hash table
* @ht: the hash table to destroy * @ht: the hash table to destroy
* @free_fn: callback to release resources of element
* @arg: pointer passed to free_fn
*
* Stops an eventual async resize. If defined, invokes free_fn for each
* element to releasal resources. Please note that RCU protected
* readers may still be accessing the elements. Releasing of resources
* must occur in a compatible manner. Then frees the bucket array.
* *
* Frees the bucket array. This function is not rcu safe, therefore the caller * This function will eventually sleep to wait for an async resize
* has to make sure that no resizing may happen by unpublishing the hashtable * to complete. The caller is responsible that no further write operations
* and waiting for the quiescent cycle before releasing the bucket array. * occurs in parallel.
*/ */
void rhashtable_destroy(struct rhashtable *ht) void rhashtable_free_and_destroy(struct rhashtable *ht,
void (*free_fn)(void *ptr, void *arg),
void *arg)
{ {
ht->being_destroyed = true; const struct bucket_table *tbl;
unsigned int i;
cancel_work_sync(&ht->run_work); cancel_work_sync(&ht->run_work);
mutex_lock(&ht->mutex); mutex_lock(&ht->mutex);
bucket_table_free(rht_dereference(ht->tbl, ht)); tbl = rht_dereference(ht->tbl, ht);
if (free_fn) {
for (i = 0; i < tbl->size; i++) {
struct rhash_head *pos, *next;
for (pos = rht_dereference(tbl->buckets[i], ht),
next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL;
!rht_is_a_nulls(pos);
pos = next,
next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL)
free_fn(rht_obj(ht, pos), arg);
}
}
bucket_table_free(tbl);
mutex_unlock(&ht->mutex); mutex_unlock(&ht->mutex);
} }
EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
void rhashtable_destroy(struct rhashtable *ht)
{
return rhashtable_free_and_destroy(ht, NULL, NULL);
}
EXPORT_SYMBOL_GPL(rhashtable_destroy); EXPORT_SYMBOL_GPL(rhashtable_destroy);
...@@ -172,6 +172,7 @@ static const struct rhashtable_params nft_hash_params = { ...@@ -172,6 +172,7 @@ static const struct rhashtable_params nft_hash_params = {
.head_offset = offsetof(struct nft_hash_elem, node), .head_offset = offsetof(struct nft_hash_elem, node),
.key_offset = offsetof(struct nft_hash_elem, key), .key_offset = offsetof(struct nft_hash_elem, key),
.hashfn = jhash, .hashfn = jhash,
.automatic_shrinking = true,
}; };
static int nft_hash_init(const struct nft_set *set, static int nft_hash_init(const struct nft_set *set,
...@@ -187,26 +188,15 @@ static int nft_hash_init(const struct nft_set *set, ...@@ -187,26 +188,15 @@ static int nft_hash_init(const struct nft_set *set,
return rhashtable_init(priv, &params); return rhashtable_init(priv, &params);
} }
static void nft_hash_destroy(const struct nft_set *set) static void nft_free_element(void *ptr, void *arg)
{ {
struct rhashtable *priv = nft_set_priv(set); nft_hash_elem_destroy((const struct nft_set *)arg, ptr);
const struct bucket_table *tbl; }
struct nft_hash_elem *he;
struct rhash_head *pos, *next;
unsigned int i;
/* Stop an eventual async resizing */
priv->being_destroyed = true;
mutex_lock(&priv->mutex);
tbl = rht_dereference(priv->tbl, priv);
for (i = 0; i < tbl->size; i++) {
rht_for_each_entry_safe(he, pos, next, tbl, i, node)
nft_hash_elem_destroy(set, he);
}
mutex_unlock(&priv->mutex);
rhashtable_destroy(priv); static void nft_hash_destroy(const struct nft_set *set)
{
rhashtable_free_and_destroy(nft_set_priv(set), nft_free_element,
(void *)set);
} }
static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
......
...@@ -3142,6 +3142,7 @@ static const struct rhashtable_params netlink_rhashtable_params = { ...@@ -3142,6 +3142,7 @@ static const struct rhashtable_params netlink_rhashtable_params = {
.obj_hashfn = netlink_hash, .obj_hashfn = netlink_hash,
.obj_cmpfn = netlink_compare, .obj_cmpfn = netlink_compare,
.max_size = 65536, .max_size = 65536,
.automatic_shrinking = true,
}; };
static int __init netlink_proto_init(void) static int __init netlink_proto_init(void)
......
...@@ -2297,6 +2297,7 @@ static const struct rhashtable_params tsk_rht_params = { ...@@ -2297,6 +2297,7 @@ static const struct rhashtable_params tsk_rht_params = {
.key_len = sizeof(u32), /* portid */ .key_len = sizeof(u32), /* portid */
.max_size = 1048576, .max_size = 1048576,
.min_size = 256, .min_size = 256,
.automatic_shrinking = true,
}; };
int tipc_sk_rht_init(struct net *net) int tipc_sk_rht_init(struct net *net)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment