Commit a685e089 authored by Al Viro's avatar Al Viro

Delay struct net freeing while there's a sysfs instance refering to it

	* new refcount in struct net, controlling actual freeing of the memory
	* new method in kobj_ns_type_operations (->drop_ns())
	* ->current_ns() semantics change - it's supposed to be followed by
corresponding ->drop_ns().  For struct net in case of CONFIG_NET_NS it bumps
the new refcount; net_drop_ns() decrements it and calls net_free() if the
last reference has been dropped.  Method renamed to ->grab_current_ns().
	* old net_free() callers call net_drop_ns() instead.
	* sysfs_exit_ns() is gone, along with a large part of callchain
leading to it; now that the references stored in ->ns[...] stay valid we
do not need to hunt them down and replace them with NULL.  That fixes
problems in sysfs_lookup() and sysfs_readdir(), along with getting rid
of sb->s_instances abuse.

	Note that struct net *shutdown* logics has not changed - net_cleanup()
is called exactly when it used to be called.  The only thing postponed by
having a sysfs instance refering to that struct net is actual freeing of
memory occupied by struct net.
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent dde194a6
...@@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data) ...@@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data)
return error; return error;
} }
static void free_sysfs_super_info(struct sysfs_super_info *info)
{
int type;
for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
kobj_ns_drop(type, info->ns[type]);
kfree(info);
}
static struct dentry *sysfs_mount(struct file_system_type *fs_type, static struct dentry *sysfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data) int flags, const char *dev_name, void *data)
{ {
...@@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, ...@@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
info->ns[type] = kobj_ns_current(type); info->ns[type] = kobj_ns_grab_current(type);
sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
if (IS_ERR(sb) || sb->s_fs_info != info) if (IS_ERR(sb) || sb->s_fs_info != info)
kfree(info); free_sysfs_super_info(info);
if (IS_ERR(sb)) if (IS_ERR(sb))
return ERR_CAST(sb); return ERR_CAST(sb);
if (!sb->s_root) { if (!sb->s_root) {
...@@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, ...@@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
static void sysfs_kill_sb(struct super_block *sb) static void sysfs_kill_sb(struct super_block *sb)
{ {
struct sysfs_super_info *info = sysfs_info(sb); struct sysfs_super_info *info = sysfs_info(sb);
/* Remove the superblock from fs_supers/s_instances /* Remove the superblock from fs_supers/s_instances
* so we can't find it, before freeing sysfs_super_info. * so we can't find it, before freeing sysfs_super_info.
*/ */
kill_anon_super(sb); kill_anon_super(sb);
kfree(info); free_sysfs_super_info(info);
} }
static struct file_system_type sysfs_fs_type = { static struct file_system_type sysfs_fs_type = {
...@@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = { ...@@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = {
.kill_sb = sysfs_kill_sb, .kill_sb = sysfs_kill_sb,
}; };
void sysfs_exit_ns(enum kobj_ns_type type, const void *ns)
{
struct super_block *sb;
mutex_lock(&sysfs_mutex);
spin_lock(&sb_lock);
list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
struct sysfs_super_info *info = sysfs_info(sb);
/*
* If we see a superblock on the fs_supers/s_instances
* list the unmount has not completed and sb->s_fs_info
* points to a valid struct sysfs_super_info.
*/
/* Ignore superblocks with the wrong ns */
if (info->ns[type] != ns)
continue;
info->ns[type] = NULL;
}
spin_unlock(&sb_lock);
mutex_unlock(&sysfs_mutex);
}
int __init sysfs_init(void) int __init sysfs_init(void)
{ {
int err = -ENOMEM; int err = -ENOMEM;
......
...@@ -136,7 +136,7 @@ struct sysfs_addrm_cxt { ...@@ -136,7 +136,7 @@ struct sysfs_addrm_cxt {
* instance). * instance).
*/ */
struct sysfs_super_info { struct sysfs_super_info {
const void *ns[KOBJ_NS_TYPES]; void *ns[KOBJ_NS_TYPES];
}; };
#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
extern struct sysfs_dirent sysfs_root; extern struct sysfs_dirent sysfs_root;
......
...@@ -32,15 +32,17 @@ enum kobj_ns_type { ...@@ -32,15 +32,17 @@ enum kobj_ns_type {
/* /*
* Callbacks so sysfs can determine namespaces * Callbacks so sysfs can determine namespaces
* @current_ns: return calling task's namespace * @grab_current_ns: return a new reference to calling task's namespace
* @netlink_ns: return namespace to which a sock belongs (right?) * @netlink_ns: return namespace to which a sock belongs (right?)
* @initial_ns: return the initial namespace (i.e. init_net_ns) * @initial_ns: return the initial namespace (i.e. init_net_ns)
* @drop_ns: drops a reference to namespace
*/ */
struct kobj_ns_type_operations { struct kobj_ns_type_operations {
enum kobj_ns_type type; enum kobj_ns_type type;
const void *(*current_ns)(void); void *(*grab_current_ns)(void);
const void *(*netlink_ns)(struct sock *sk); const void *(*netlink_ns)(struct sock *sk);
const void *(*initial_ns)(void); const void *(*initial_ns)(void);
void (*drop_ns)(void *);
}; };
int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
...@@ -48,9 +50,9 @@ int kobj_ns_type_registered(enum kobj_ns_type type); ...@@ -48,9 +50,9 @@ int kobj_ns_type_registered(enum kobj_ns_type type);
const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
const void *kobj_ns_current(enum kobj_ns_type type); void *kobj_ns_grab_current(enum kobj_ns_type type);
const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
const void *kobj_ns_initial(enum kobj_ns_type type); const void *kobj_ns_initial(enum kobj_ns_type type);
void kobj_ns_exit(enum kobj_ns_type type, const void *ns); void kobj_ns_drop(enum kobj_ns_type type, void *ns);
#endif /* _LINUX_KOBJECT_NS_H */ #endif /* _LINUX_KOBJECT_NS_H */
...@@ -177,9 +177,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, ...@@ -177,9 +177,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
void sysfs_put(struct sysfs_dirent *sd); void sysfs_put(struct sysfs_dirent *sd);
/* Called to clear a ns tag when it is no longer valid */
void sysfs_exit_ns(enum kobj_ns_type type, const void *tag);
int __must_check sysfs_init(void); int __must_check sysfs_init(void);
#else /* CONFIG_SYSFS */ #else /* CONFIG_SYSFS */
...@@ -338,10 +335,6 @@ static inline void sysfs_put(struct sysfs_dirent *sd) ...@@ -338,10 +335,6 @@ static inline void sysfs_put(struct sysfs_dirent *sd)
{ {
} }
static inline void sysfs_exit_ns(int type, const void *tag)
{
}
static inline int __must_check sysfs_init(void) static inline int __must_check sysfs_init(void)
{ {
return 0; return 0;
......
...@@ -35,8 +35,11 @@ struct netns_ipvs; ...@@ -35,8 +35,11 @@ struct netns_ipvs;
#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
struct net { struct net {
atomic_t passive; /* To decided when the network
* namespace should be freed.
*/
atomic_t count; /* To decided when the network atomic_t count; /* To decided when the network
* namespace should be freed. * namespace should be shut down.
*/ */
#ifdef NETNS_REFCNT_DEBUG #ifdef NETNS_REFCNT_DEBUG
atomic_t use_count; /* To track references we atomic_t use_count; /* To track references we
...@@ -154,6 +157,9 @@ int net_eq(const struct net *net1, const struct net *net2) ...@@ -154,6 +157,9 @@ int net_eq(const struct net *net1, const struct net *net2)
{ {
return net1 == net2; return net1 == net2;
} }
extern void net_drop_ns(void *);
#else #else
static inline struct net *get_net(struct net *net) static inline struct net *get_net(struct net *net)
...@@ -175,6 +181,8 @@ int net_eq(const struct net *net1, const struct net *net2) ...@@ -175,6 +181,8 @@ int net_eq(const struct net *net1, const struct net *net2)
{ {
return 1; return 1;
} }
#define net_drop_ns NULL
#endif #endif
......
...@@ -948,14 +948,14 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) ...@@ -948,14 +948,14 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
} }
const void *kobj_ns_current(enum kobj_ns_type type) void *kobj_ns_grab_current(enum kobj_ns_type type)
{ {
const void *ns = NULL; void *ns = NULL;
spin_lock(&kobj_ns_type_lock); spin_lock(&kobj_ns_type_lock);
if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
kobj_ns_ops_tbl[type]) kobj_ns_ops_tbl[type])
ns = kobj_ns_ops_tbl[type]->current_ns(); ns = kobj_ns_ops_tbl[type]->grab_current_ns();
spin_unlock(&kobj_ns_type_lock); spin_unlock(&kobj_ns_type_lock);
return ns; return ns;
...@@ -987,23 +987,15 @@ const void *kobj_ns_initial(enum kobj_ns_type type) ...@@ -987,23 +987,15 @@ const void *kobj_ns_initial(enum kobj_ns_type type)
return ns; return ns;
} }
/* void kobj_ns_drop(enum kobj_ns_type type, void *ns)
* kobj_ns_exit - invalidate a namespace tag
*
* @type: the namespace type (i.e. KOBJ_NS_TYPE_NET)
* @ns: the actual namespace being invalidated
*
* This is called when a tag is no longer valid. For instance,
* when a network namespace exits, it uses this helper to
* make sure no sb's sysfs_info points to the now-invalidated
* netns.
*/
void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
{ {
sysfs_exit_ns(type, ns); spin_lock(&kobj_ns_type_lock);
if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns)
kobj_ns_ops_tbl[type]->drop_ns(ns);
spin_unlock(&kobj_ns_type_lock);
} }
EXPORT_SYMBOL(kobject_get); EXPORT_SYMBOL(kobject_get);
EXPORT_SYMBOL(kobject_put); EXPORT_SYMBOL(kobject_put);
EXPORT_SYMBOL(kobject_del); EXPORT_SYMBOL(kobject_del);
......
...@@ -1179,9 +1179,14 @@ static void remove_queue_kobjects(struct net_device *net) ...@@ -1179,9 +1179,14 @@ static void remove_queue_kobjects(struct net_device *net)
#endif #endif
} }
static const void *net_current_ns(void) static void *net_grab_current_ns(void)
{ {
return current->nsproxy->net_ns; struct net *ns = current->nsproxy->net_ns;
#ifdef CONFIG_NET_NS
if (ns)
atomic_inc(&ns->passive);
#endif
return ns;
} }
static const void *net_initial_ns(void) static const void *net_initial_ns(void)
...@@ -1196,22 +1201,13 @@ static const void *net_netlink_ns(struct sock *sk) ...@@ -1196,22 +1201,13 @@ static const void *net_netlink_ns(struct sock *sk)
struct kobj_ns_type_operations net_ns_type_operations = { struct kobj_ns_type_operations net_ns_type_operations = {
.type = KOBJ_NS_TYPE_NET, .type = KOBJ_NS_TYPE_NET,
.current_ns = net_current_ns, .grab_current_ns = net_grab_current_ns,
.netlink_ns = net_netlink_ns, .netlink_ns = net_netlink_ns,
.initial_ns = net_initial_ns, .initial_ns = net_initial_ns,
.drop_ns = net_drop_ns,
}; };
EXPORT_SYMBOL_GPL(net_ns_type_operations); EXPORT_SYMBOL_GPL(net_ns_type_operations);
static void net_kobj_ns_exit(struct net *net)
{
kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
}
static struct pernet_operations kobj_net_ops = {
.exit = net_kobj_ns_exit,
};
#ifdef CONFIG_HOTPLUG #ifdef CONFIG_HOTPLUG
static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
{ {
...@@ -1339,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file); ...@@ -1339,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file);
int netdev_kobject_init(void) int netdev_kobject_init(void)
{ {
kobj_ns_type_register(&net_ns_type_operations); kobj_ns_type_register(&net_ns_type_operations);
register_pernet_subsys(&kobj_net_ops);
return class_register(&net_class); return class_register(&net_class);
} }
...@@ -128,6 +128,7 @@ static __net_init int setup_net(struct net *net) ...@@ -128,6 +128,7 @@ static __net_init int setup_net(struct net *net)
LIST_HEAD(net_exit_list); LIST_HEAD(net_exit_list);
atomic_set(&net->count, 1); atomic_set(&net->count, 1);
atomic_set(&net->passive, 1);
#ifdef NETNS_REFCNT_DEBUG #ifdef NETNS_REFCNT_DEBUG
atomic_set(&net->use_count, 0); atomic_set(&net->use_count, 0);
...@@ -210,6 +211,13 @@ static void net_free(struct net *net) ...@@ -210,6 +211,13 @@ static void net_free(struct net *net)
kmem_cache_free(net_cachep, net); kmem_cache_free(net_cachep, net);
} }
void net_drop_ns(void *p)
{
struct net *ns = p;
if (ns && atomic_dec_and_test(&ns->passive))
net_free(ns);
}
struct net *copy_net_ns(unsigned long flags, struct net *old_net) struct net *copy_net_ns(unsigned long flags, struct net *old_net)
{ {
struct net *net; struct net *net;
...@@ -230,7 +238,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) ...@@ -230,7 +238,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
} }
mutex_unlock(&net_mutex); mutex_unlock(&net_mutex);
if (rv < 0) { if (rv < 0) {
net_free(net); net_drop_ns(net);
return ERR_PTR(rv); return ERR_PTR(rv);
} }
return net; return net;
...@@ -286,7 +294,7 @@ static void cleanup_net(struct work_struct *work) ...@@ -286,7 +294,7 @@ static void cleanup_net(struct work_struct *work)
/* Finally it is safe to free my network namespace structure */ /* Finally it is safe to free my network namespace structure */
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list); list_del_init(&net->exit_list);
net_free(net); net_drop_ns(net);
} }
} }
static DECLARE_WORK(net_cleanup_work, cleanup_net); static DECLARE_WORK(net_cleanup_work, cleanup_net);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment