Commit 998e7a76 authored by Sven Wegener's avatar Sven Wegener

ipvs: Use kthread_run() instead of doing a double-fork via kernel_thread()

This also moves the setup code out of the daemons, so that we're able to
return proper error codes to user space. The current code will return success
to user space when the daemon is started with an invald mcast interface. With
these changes we get an appropriate "No such device" error.

We longer need our own completion to be sure the daemons are actually running,
because they no longer contain code that can fail and kthread_run() takes care
of the rest.
Signed-off-by: default avatarSven Wegener <sven.wegener@stealer.net>
Acked-by: default avatarSimon Horman <horms@verge.net.au>
parent e6dd731c
...@@ -28,10 +28,10 @@ ...@@ -28,10 +28,10 @@
#include <linux/igmp.h> /* for ip_mc_join_group */ #include <linux/igmp.h> /* for ip_mc_join_group */
#include <linux/udp.h> #include <linux/udp.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/kthread.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/sock.h> #include <net/sock.h>
#include <asm/uaccess.h> /* for get_fs and set_fs */
#include <net/ip_vs.h> #include <net/ip_vs.h>
...@@ -67,8 +67,8 @@ struct ip_vs_sync_conn_options { ...@@ -67,8 +67,8 @@ struct ip_vs_sync_conn_options {
}; };
struct ip_vs_sync_thread_data { struct ip_vs_sync_thread_data {
struct completion *startup; struct socket *sock;
int state; char *buf;
}; };
#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
...@@ -139,6 +139,10 @@ volatile int ip_vs_backup_syncid = 0; ...@@ -139,6 +139,10 @@ volatile int ip_vs_backup_syncid = 0;
char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
/* sync daemon tasks */
static struct task_struct *sync_master_thread;
static struct task_struct *sync_backup_thread;
/* multicast addr */ /* multicast addr */
static struct sockaddr_in mcast_addr = { static struct sockaddr_in mcast_addr = {
.sin_family = AF_INET, .sin_family = AF_INET,
...@@ -147,14 +151,7 @@ static struct sockaddr_in mcast_addr = { ...@@ -147,14 +151,7 @@ static struct sockaddr_in mcast_addr = {
}; };
static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) static inline struct ip_vs_sync_buff *sb_dequeue(void)
{
spin_lock(&ip_vs_sync_lock);
list_add_tail(&sb->list, &ip_vs_sync_queue);
spin_unlock(&ip_vs_sync_lock);
}
static inline struct ip_vs_sync_buff * sb_dequeue(void)
{ {
struct ip_vs_sync_buff *sb; struct ip_vs_sync_buff *sb;
...@@ -198,6 +195,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) ...@@ -198,6 +195,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
kfree(sb); kfree(sb);
} }
static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
{
spin_lock(&ip_vs_sync_lock);
if (ip_vs_sync_state & IP_VS_STATE_MASTER)
list_add_tail(&sb->list, &ip_vs_sync_queue);
else
ip_vs_sync_buff_release(sb);
spin_unlock(&ip_vs_sync_lock);
}
/* /*
* Get the current sync buffer if it has been created for more * Get the current sync buffer if it has been created for more
* than the specified time or the specified time is zero. * than the specified time or the specified time is zero.
...@@ -712,43 +719,28 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) ...@@ -712,43 +719,28 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
} }
static DECLARE_WAIT_QUEUE_HEAD(sync_wait); static int sync_thread_master(void *data)
static pid_t sync_master_pid = 0;
static pid_t sync_backup_pid = 0;
static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait);
static int stop_master_sync = 0;
static int stop_backup_sync = 0;
static void sync_master_loop(void)
{ {
struct socket *sock; struct ip_vs_sync_thread_data *tinfo = data;
struct ip_vs_sync_buff *sb; struct ip_vs_sync_buff *sb;
/* create the sending multicast socket */
sock = make_send_sock();
if (IS_ERR(sock))
return;
IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, " IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d\n", "syncid = %d\n",
ip_vs_master_mcast_ifn, ip_vs_master_syncid); ip_vs_master_mcast_ifn, ip_vs_master_syncid);
for (;;) { while (!kthread_should_stop()) {
while ((sb=sb_dequeue())) { while ((sb = sb_dequeue())) {
ip_vs_send_sync_msg(sock, sb->mesg); ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb); ip_vs_sync_buff_release(sb);
} }
/* check if entries stay in curr_sb for 2 seconds */ /* check if entries stay in curr_sb for 2 seconds */
if ((sb = get_curr_sync_buff(2*HZ))) { sb = get_curr_sync_buff(2 * HZ);
ip_vs_send_sync_msg(sock, sb->mesg); if (sb) {
ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb); ip_vs_sync_buff_release(sb);
} }
if (stop_master_sync)
break;
msleep_interruptible(1000); msleep_interruptible(1000);
} }
...@@ -763,262 +755,173 @@ static void sync_master_loop(void) ...@@ -763,262 +755,173 @@ static void sync_master_loop(void)
} }
/* release the sending multicast socket */ /* release the sending multicast socket */
sock_release(sock); sock_release(tinfo->sock);
kfree(tinfo);
return 0;
} }
static void sync_backup_loop(void) static int sync_thread_backup(void *data)
{ {
struct socket *sock; struct ip_vs_sync_thread_data *tinfo = data;
char *buf;
int len; int len;
if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) {
IP_VS_ERR("sync_backup_loop: kmalloc error\n");
return;
}
/* create the receiving multicast socket */
sock = make_receive_sock();
if (IS_ERR(sock))
goto out;
IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, " IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d\n", "syncid = %d\n",
ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
for (;;) { while (!kthread_should_stop()) {
/* do you have data now? */ /* do we have data now? */
while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) { while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
if ((len = len = ip_vs_receive(tinfo->sock, tinfo->buf,
ip_vs_receive(sock, buf, sync_recv_mesg_maxlen);
sync_recv_mesg_maxlen)) <= 0) { if (len <= 0) {
IP_VS_ERR("receiving message error\n"); IP_VS_ERR("receiving message error\n");
break; break;
} }
/* disable bottom half, because it accessed the data
/* disable bottom half, because it accesses the data
shared by softirq while getting/creating conns */ shared by softirq while getting/creating conns */
local_bh_disable(); local_bh_disable();
ip_vs_process_message(buf, len); ip_vs_process_message(tinfo->buf, len);
local_bh_enable(); local_bh_enable();
} }
if (stop_backup_sync)
break;
msleep_interruptible(1000); msleep_interruptible(1000);
} }
/* release the sending multicast socket */ /* release the sending multicast socket */
sock_release(sock); sock_release(tinfo->sock);
kfree(tinfo->buf);
kfree(tinfo);
out: return 0;
kfree(buf);
} }
static void set_sync_pid(int sync_state, pid_t sync_pid) int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
{
if (sync_state == IP_VS_STATE_MASTER)
sync_master_pid = sync_pid;
else if (sync_state == IP_VS_STATE_BACKUP)
sync_backup_pid = sync_pid;
}
static void set_stop_sync(int sync_state, int set)
{ {
if (sync_state == IP_VS_STATE_MASTER) struct ip_vs_sync_thread_data *tinfo;
stop_master_sync = set; struct task_struct **realtask, *task;
else if (sync_state == IP_VS_STATE_BACKUP) struct socket *sock;
stop_backup_sync = set; char *name, *buf = NULL;
else { int (*threadfn)(void *data);
stop_master_sync = set; int result = -ENOMEM;
stop_backup_sync = set;
}
}
static int sync_thread(void *startup) IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
{ IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
DECLARE_WAITQUEUE(wait, current); sizeof(struct ip_vs_sync_conn));
mm_segment_t oldmm;
int state;
const char *name;
struct ip_vs_sync_thread_data *tinfo = startup;
/* increase the module use count */ if (state == IP_VS_STATE_MASTER) {
ip_vs_use_count_inc(); if (sync_master_thread)
return -EEXIST;
if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) { strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
state = IP_VS_STATE_MASTER; sizeof(ip_vs_master_mcast_ifn));
ip_vs_master_syncid = syncid;
realtask = &sync_master_thread;
name = "ipvs_syncmaster"; name = "ipvs_syncmaster";
} else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) { threadfn = sync_thread_master;
state = IP_VS_STATE_BACKUP; sock = make_send_sock();
} else if (state == IP_VS_STATE_BACKUP) {
if (sync_backup_thread)
return -EEXIST;
strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
sizeof(ip_vs_backup_mcast_ifn));
ip_vs_backup_syncid = syncid;
realtask = &sync_backup_thread;
name = "ipvs_syncbackup"; name = "ipvs_syncbackup";
threadfn = sync_thread_backup;
sock = make_receive_sock();
} else { } else {
IP_VS_BUG();
ip_vs_use_count_dec();
return -EINVAL; return -EINVAL;
} }
daemonize(name); if (IS_ERR(sock)) {
result = PTR_ERR(sock);
oldmm = get_fs(); goto out;
set_fs(KERNEL_DS); }
/* Block all signals */
spin_lock_irq(&current->sighand->siglock);
siginitsetinv(&current->blocked, 0);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
/* set the maximum length of sync message */
set_sync_mesg_maxlen(state); set_sync_mesg_maxlen(state);
if (state == IP_VS_STATE_BACKUP) {
buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
if (!buf)
goto outsocket;
}
add_wait_queue(&sync_wait, &wait); tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
if (!tinfo)
set_sync_pid(state, task_pid_nr(current)); goto outbuf;
complete(tinfo->startup);
/*
* once we call the completion queue above, we should
* null out that reference, since its allocated on the
* stack of the creating kernel thread
*/
tinfo->startup = NULL;
/* processing master/backup loop here */
if (state == IP_VS_STATE_MASTER)
sync_master_loop();
else if (state == IP_VS_STATE_BACKUP)
sync_backup_loop();
else IP_VS_BUG();
remove_wait_queue(&sync_wait, &wait);
/* thread exits */
/* tinfo->sock = sock;
* If we weren't explicitly stopped, then we tinfo->buf = buf;
* exited in error, and should undo our state
*/
if ((!stop_master_sync) && (!stop_backup_sync))
ip_vs_sync_state -= tinfo->state;
set_sync_pid(state, 0); task = kthread_run(threadfn, tinfo, name);
IP_VS_INFO("sync thread stopped!\n"); if (IS_ERR(task)) {
result = PTR_ERR(task);
goto outtinfo;
}
set_fs(oldmm); /* mark as active */
*realtask = task;
ip_vs_sync_state |= state;
/* decrease the module use count */ /* increase the module use count */
ip_vs_use_count_dec(); ip_vs_use_count_inc();
set_stop_sync(state, 0); return 0;
wake_up(&stop_sync_wait);
/* outtinfo:
* we need to free the structure that was allocated
* for us in start_sync_thread
*/
kfree(tinfo); kfree(tinfo);
return 0; outbuf:
kfree(buf);
outsocket:
sock_release(sock);
out:
return result;
} }
static int fork_sync_thread(void *startup) int stop_sync_thread(int state)
{ {
pid_t pid; IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
/* fork the sync thread here, then the parent process of the
sync thread is the init process after this thread exits. */
repeat:
if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
IP_VS_ERR("could not create sync_thread due to %d... "
"retrying.\n", pid);
msleep_interruptible(1000);
goto repeat;
}
return 0;
}
int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) if (state == IP_VS_STATE_MASTER) {
{ if (!sync_master_thread)
DECLARE_COMPLETION_ONSTACK(startup); return -ESRCH;
pid_t pid;
struct ip_vs_sync_thread_data *tinfo;
if ((state == IP_VS_STATE_MASTER && sync_master_pid) || IP_VS_INFO("stopping master sync thread %d ...\n",
(state == IP_VS_STATE_BACKUP && sync_backup_pid)) task_pid_nr(sync_master_thread));
return -EEXIST;
/* /*
* Note that tinfo will be freed in sync_thread on exit * The lock synchronizes with sb_queue_tail(), so that we don't
* add sync buffers to the queue, when we are already in
* progress of stopping the master sync daemon.
*/ */
tinfo = kmalloc(sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL);
if (!tinfo)
return -ENOMEM;
IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); spin_lock(&ip_vs_sync_lock);
IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
sizeof(struct ip_vs_sync_conn)); spin_unlock(&ip_vs_sync_lock);
kthread_stop(sync_master_thread);
ip_vs_sync_state |= state; sync_master_thread = NULL;
if (state == IP_VS_STATE_MASTER) { } else if (state == IP_VS_STATE_BACKUP) {
strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, if (!sync_backup_thread)
sizeof(ip_vs_master_mcast_ifn)); return -ESRCH;
ip_vs_master_syncid = syncid;
} else {
strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
sizeof(ip_vs_backup_mcast_ifn));
ip_vs_backup_syncid = syncid;
}
tinfo->state = state; IP_VS_INFO("stopping backup sync thread %d ...\n",
tinfo->startup = &startup; task_pid_nr(sync_backup_thread));
repeat: ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
if ((pid = kernel_thread(fork_sync_thread, tinfo, 0)) < 0) { kthread_stop(sync_backup_thread);
IP_VS_ERR("could not create fork_sync_thread due to %d... " sync_backup_thread = NULL;
"retrying.\n", pid); } else {
msleep_interruptible(1000); return -EINVAL;
goto repeat;
} }
wait_for_completion(&startup); /* decrease the module use count */
ip_vs_use_count_dec();
return 0;
}
int stop_sync_thread(int state)
{
DECLARE_WAITQUEUE(wait, current);
if ((state == IP_VS_STATE_MASTER && !sync_master_pid) ||
(state == IP_VS_STATE_BACKUP && !sync_backup_pid))
return -ESRCH;
IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
IP_VS_INFO("stopping sync thread %d ...\n",
(state == IP_VS_STATE_MASTER) ?
sync_master_pid : sync_backup_pid);
__set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(&stop_sync_wait, &wait);
set_stop_sync(state, 1);
ip_vs_sync_state -= state;
wake_up(&sync_wait);
schedule();
__set_current_state(TASK_RUNNING);
remove_wait_queue(&stop_sync_wait, &wait);
/* Note: no need to reap the sync thread, because its parent
process is the init process */
if ((state == IP_VS_STATE_MASTER && stop_master_sync) ||
(state == IP_VS_STATE_BACKUP && stop_backup_sync))
IP_VS_BUG();
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment