Commit 6dfc8897 authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] shared thread signals

Support POSIX compliant thread signals on a kernel level with usable
debugging (broadcast SIGSTOP, SIGCONT) and thread group management
(broadcast SIGKILL), plus to load-balance 'process' signals between
threads for better signal performance. 

Changes:

- POSIX thread semantics for signals

there are 7 'types' of actions a signal can take: specific, load-balance,
kill-all, kill-all+core, stop-all, continue-all and ignore. Depending on
the POSIX specifications each signal has one of the types defined for both
the 'handler defined' and the 'handler not defined (kernel default)' case.  
Here is the table:

 ----------------------------------------------------------
 |                    |  userspace       |  kernel        |
 ----------------------------------------------------------
 |  SIGHUP            |  load-balance    |  kill-all      |
 |  SIGINT            |  load-balance    |  kill-all      |
 |  SIGQUIT           |  load-balance    |  kill-all+core |
 |  SIGILL            |  specific        |  kill-all+core |
 |  SIGTRAP           |  specific        |  kill-all+core |
 |  SIGABRT/SIGIOT    |  specific        |  kill-all+core |
 |  SIGBUS            |  specific        |  kill-all+core |
 |  SIGFPE            |  specific        |  kill-all+core |
 |  SIGKILL           |  n/a             |  kill-all      |
 |  SIGUSR1           |  load-balance    |  kill-all      |
 |  SIGSEGV           |  specific        |  kill-all+core |
 |  SIGUSR2           |  load-balance    |  kill-all      |
 |  SIGPIPE           |  specific        |  kill-all      |
 |  SIGALRM           |  load-balance    |  kill-all      |
 |  SIGTERM           |  load-balance    |  kill-all      |
 |  SIGCHLD           |  load-balance    |  ignore        |
 |  SIGCONT           |  load-balance    |  continue-all  |
 |  SIGSTOP           |  n/a             |  stop-all      |
 |  SIGTSTP           |  load-balance    |  stop-all      |
 |  SIGTTIN           |  load-balancen   |  stop-all      |
 |  SIGTTOU           |  load-balancen   |  stop-all      |
 |  SIGURG            |  load-balance    |  ignore        |
 |  SIGXCPU           |  specific        |  kill-all+core |
 |  SIGXFSZ           |  specific        |  kill-all+core |
 |  SIGVTALRM         |  load-balance    |  kill-all      |
 |  SIGPROF           |  specific        |  kill-all      |
 |  SIGPOLL/SIGIO     |  load-balance    |  kill-all      |
 |  SIGSYS/SIGUNUSED  |  specific        |  kill-all+core |
 |  SIGSTKFLT         |  specific        |  kill-all      |
 |  SIGWINCH          |  load-balance    |  ignore        |
 |  SIGPWR            |  load-balance    |  kill-all      |
 |  SIGRTMIN-SIGRTMAX |  load-balance    |  kill-all      |
 ----------------------------------------------------------

as you can see it from the list, signals that have handlers defined never 
get broadcasted - they are either specific or load-balanced.

- CLONE_THREAD implies CLONE_SIGHAND

It does not make much sense to have a thread group that does not share
signal handlers. In fact in the patch i'm using the signal spinlock to
lock access to the thread group. I made the siglock IRQ-safe, thus we can
load-balance signals from interrupt contexts as well. (we cannot take the
tasklist lock in write mode from IRQ handlers.)

this is not as clean as i'd like it to be, but it's the best i could come
up with so far.

- thread group list management reworked.

threads are now removed from the group if the thread is unhashed from the
PID table. This makes the most sense. This also helps with another feature 
that relies on an intact thread group list: multithreaded coredumps.

- child reparenting reworked.

the O(N) algorithm in forget_original_parent() causes massive performance
problems if a large number of threads exit from the group. Performance 
improves more than 10-fold if the following simple rules are followed 
instead:

 - reparent children to the *previous* thread [exiting or not]
 - if a thread is detached then reparent to init.

- fast broadcasting of kernel-internal SIGSTOP, SIGCONT, SIGKILL, etc.

kernel-internal broadcasted signals are a potential DoS problem, since
they might generate massive amounts of GFP_ATOMIC allocations of siginfo
structures. The important thing to note is that the siginfo structure does
not actually have to be allocated and queued - the signal processing code
has all the information it needs, neither of these signals carries any
information in the siginfo structure. This makes a broadcast SIGKILL a
very simple operation: all threads get the bit 9 set in their pending
bitmask. The speedup due to this was significant - and the robustness win
is invaluable.

- sys_execve() should not kill off 'all other' threads.

the 'exec kills all threads if the master thread does the exec()' is a
POSIX(-ish) thing that should not be hardcoded in the kernel in this case.

to handle POSIX exec() semantics, glibc uses a special syscall, which
kills 'all but self' threads: sys_exit_allbutself().

the straightforward exec() implementation just calls sys_exit_allbutself()  
and then sys_execve().

(this syscall is also be used internally if the thread group leader
thread sys_exit()s or sys_exec()s, to ensure the integrity of the thread
group.)
parent 36780249
......@@ -504,6 +504,8 @@ static inline int make_private_signals(void)
{
struct signal_struct * newsig;
remove_thread_group(current, current->sig);
if (atomic_read(&current->sig->count) <= 1)
return 0;
newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
......@@ -575,42 +577,10 @@ static inline void flush_old_files(struct files_struct * files)
*/
static void de_thread(struct task_struct *tsk)
{
struct task_struct *sub;
struct list_head *head, *ptr;
struct siginfo info;
int pause;
write_lock_irq(&tasklist_lock);
if (tsk->tgid != tsk->pid) {
/* subsidiary thread - just escapes the group */
list_del_init(&tsk->thread_group);
tsk->tgid = tsk->pid;
pause = 0;
}
else {
/* master thread - kill all subsidiary threads */
info.si_signo = SIGKILL;
info.si_errno = 0;
info.si_code = SI_DETHREAD;
info.si_pid = current->pid;
info.si_uid = current->uid;
head = tsk->thread_group.next;
list_del_init(&tsk->thread_group);
list_for_each(ptr,head) {
sub = list_entry(ptr,struct task_struct,thread_group);
send_sig_info(SIGKILL,&info,sub);
}
pause = 1;
}
write_unlock_irq(&tasklist_lock);
/* give the subsidiary threads a chance to clean themselves up */
if (pause) yield();
if (!list_empty(&tsk->thread_group))
BUG();
/* An exec() starts a new thread group: */
tsk->tgid = tsk->pid;
}
int flush_old_exec(struct linux_binprm * bprm)
......@@ -633,6 +603,8 @@ int flush_old_exec(struct linux_binprm * bprm)
if (retval) goto mmap_failed;
/* This is the point of no return */
de_thread(current);
release_old_signals(oldsig);
current->sas_ss_sp = current->sas_ss_size = 0;
......@@ -651,9 +623,6 @@ int flush_old_exec(struct linux_binprm * bprm)
flush_thread();
if (!list_empty(&current->thread_group))
de_thread(current);
if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
permission(bprm->file->f_dentry->d_inode,MAY_READ))
current->mm->dumpable = 0;
......
......@@ -158,6 +158,8 @@ typedef struct {
#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS)
/*
* On x86, we implement read-write locks as a 32-bit counter
* with the high bit (sign) being the "contended" bit.
......
......@@ -211,6 +211,11 @@ struct signal_struct {
atomic_t count;
struct k_sigaction action[_NSIG];
spinlock_t siglock;
/* current thread group signal load-balancing target: */
task_t *curr_target;
struct sigpending shared_pending;
};
/*
......@@ -356,7 +361,7 @@ struct task_struct {
spinlock_t sigmask_lock; /* Protects signal and blocked */
struct signal_struct *sig;
sigset_t blocked;
sigset_t blocked, real_blocked, shared_unblocked;
struct sigpending pending;
unsigned long sas_ss_sp;
......@@ -431,6 +436,7 @@ extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
extern void set_user_nice(task_t *p, long nice);
extern int task_prio(task_t *p);
extern int task_nice(task_t *p);
extern int task_curr(task_t *p);
extern int idle_cpu(int cpu);
void yield(void);
......@@ -535,7 +541,7 @@ extern void proc_caches_init(void);
extern void flush_signals(struct task_struct *);
extern void flush_signal_handlers(struct task_struct *);
extern void sig_exit(int, int, struct siginfo *);
extern int dequeue_signal(sigset_t *, siginfo_t *);
extern int dequeue_signal(struct sigpending *pending, sigset_t *mask, siginfo_t *info);
extern void block_all_signals(int (*notifier)(void *priv), void *priv,
sigset_t *mask);
extern void unblock_all_signals(void);
......@@ -654,6 +660,7 @@ extern void exit_thread(void);
extern void exit_mm(struct task_struct *);
extern void exit_files(struct task_struct *);
extern void exit_sighand(struct task_struct *);
extern void remove_thread_group(struct task_struct *tsk, struct signal_struct *sig);
extern void reparent_to_init(void);
extern void daemonize(void);
......@@ -786,8 +793,29 @@ static inline struct task_struct *younger_sibling(struct task_struct *p)
#define for_each_thread(task) \
for (task = next_thread(current) ; task != current ; task = next_thread(task))
#define next_thread(p) \
list_entry((p)->thread_group.next, struct task_struct, thread_group)
static inline task_t *next_thread(task_t *p)
{
if (!p->sig)
BUG();
#if CONFIG_SMP
if (!spin_is_locked(&p->sig->siglock) &&
!rwlock_is_locked(&tasklist_lock))
BUG();
#endif
return list_entry((p)->thread_group.next, task_t, thread_group);
}
static inline task_t *prev_thread(task_t *p)
{
if (!p->sig)
BUG();
#if CONFIG_SMP
if (!spin_is_locked(&p->sig->siglock) &&
!rwlock_is_locked(&tasklist_lock))
BUG();
#endif
return list_entry((p)->thread_group.prev, task_t, thread_group);
}
#define thread_group_leader(p) (p->pid == p->tgid)
......@@ -903,21 +931,8 @@ static inline void cond_resched(void)
This is required every time the blocked sigset_t changes.
Athread cathreaders should have t->sigmask_lock. */
static inline void recalc_sigpending_tsk(struct task_struct *t)
{
if (has_pending_signals(&t->pending.signal, &t->blocked))
set_tsk_thread_flag(t, TIF_SIGPENDING);
else
clear_tsk_thread_flag(t, TIF_SIGPENDING);
}
static inline void recalc_sigpending(void)
{
if (has_pending_signals(&current->pending.signal, &current->blocked))
set_thread_flag(TIF_SIGPENDING);
else
clear_thread_flag(TIF_SIGPENDING);
}
extern FASTCALL(void recalc_sigpending_tsk(struct task_struct *t));
extern void recalc_sigpending(void);
/*
* Wrappers for p->thread_info->cpu access. No-op on UP.
......
......@@ -36,7 +36,6 @@ static inline void __unhash_process(struct task_struct *p)
nr_threads--;
unhash_pid(p);
REMOVE_LINKS(p);
list_del(&p->thread_group);
p->pid = 0;
proc_dentry = p->proc_dentry;
if (unlikely(proc_dentry != NULL)) {
......@@ -73,6 +72,7 @@ static void release_task(struct task_struct * p)
}
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
unhash_process(p);
exit_sighand(p);
release_thread(p);
if (p != current) {
......@@ -244,7 +244,8 @@ void daemonize(void)
static void reparent_thread(task_t *p, task_t *reaper, task_t *child_reaper)
{
/* We dont want people slaying init */
p->exit_signal = SIGCHLD;
if (p->exit_signal != -1)
p->exit_signal = SIGCHLD;
p->self_exec_id++;
/* Make sure we're not reparenting to ourselves */
......@@ -412,18 +413,15 @@ void exit_mm(struct task_struct *tsk)
*/
static inline void forget_original_parent(struct task_struct * father)
{
struct task_struct *p, *reaper;
struct task_struct *p, *reaper = father;
struct list_head *_p;
read_lock(&tasklist_lock);
write_lock_irq(&tasklist_lock);
/* Next in our thread group, if they're not already exiting */
reaper = father;
do {
reaper = next_thread(reaper);
if (!(reaper->flags & PF_EXITING))
break;
} while (reaper != father);
if (father->exit_signal != -1)
reaper = prev_thread(reaper);
else
reaper = child_reaper;
if (reaper == father)
reaper = child_reaper;
......@@ -444,7 +442,7 @@ static inline void forget_original_parent(struct task_struct * father)
p = list_entry(_p,struct task_struct,ptrace_list);
reparent_thread(p, reaper, child_reaper);
}
read_unlock(&tasklist_lock);
write_unlock_irq(&tasklist_lock);
}
static inline void zap_thread(task_t *p, task_t *father, int traced)
......@@ -604,7 +602,6 @@ NORET_TYPE void do_exit(long code)
__exit_files(tsk);
__exit_fs(tsk);
exit_namespace(tsk);
exit_sighand(tsk);
exit_thread();
if (current->leader)
......@@ -763,6 +760,8 @@ asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struc
if (options & __WNOTHREAD)
break;
tsk = next_thread(tsk);
if (tsk->sig != current->sig)
BUG();
} while (tsk != current);
read_unlock(&tasklist_lock);
if (flag) {
......
......@@ -630,6 +630,9 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
spin_lock_init(&sig->siglock);
atomic_set(&sig->count, 1);
memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
sig->curr_target = NULL;
init_sigpending(&sig->shared_pending);
return 0;
}
......@@ -664,6 +667,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
return ERR_PTR(-EINVAL);
/*
* Thread groups must share signals as well:
*/
if (clone_flags & CLONE_THREAD)
clone_flags |= CLONE_SIGHAND;
retval = security_ops->task_create(clone_flags);
if (retval)
goto fork_out;
......@@ -843,8 +852,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->parent = p->real_parent;
if (clone_flags & CLONE_THREAD) {
spin_lock(&current->sig->siglock);
p->tgid = current->tgid;
list_add(&p->thread_group, &current->thread_group);
spin_unlock(&current->sig->siglock);
}
SET_LINKS(p);
......
......@@ -1335,6 +1335,15 @@ int task_nice(task_t *p)
return TASK_NICE(p);
}
/**
* task_curr - is this task currently executing on a CPU?
* @p: the task in question.
*/
int task_curr(task_t *p)
{
return cpu_curr(task_cpu(p)) == p;
}
/**
* idle_cpu - is a given cpu idle currently?
* @cpu: the processor in question.
......
......@@ -6,6 +6,8 @@
* 1997-11-02 Modified for POSIX.1b signals by Richard Henderson
*/
#define __KERNEL_SYSCALLS__
#include <linux/config.h>
#include <linux/slab.h>
#include <linux/module.h>
......@@ -16,7 +18,7 @@
#include <linux/fs.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
#include <asm/param.h>
#include <asm/uaccess.h>
#include <asm/siginfo.h>
......@@ -24,40 +26,146 @@
* SLAB caches for signal bits.
*/
#define DEBUG_SIG 0
#if DEBUG_SIG
#define SIG_SLAB_DEBUG (SLAB_RED_ZONE /* | SLAB_POISON */)
#else
#define SIG_SLAB_DEBUG 0
#endif
static kmem_cache_t *sigqueue_cachep;
atomic_t nr_queued_signals;
int max_queued_signals = 1024;
/*********************************************************
POSIX thread group signal behavior:
----------------------------------------------------------
| | userspace | kernel |
----------------------------------------------------------
| SIGHUP | load-balance | kill-all |
| SIGINT | load-balance | kill-all |
| SIGQUIT | load-balance | kill-all+core |
| SIGILL | specific | kill-all+core |
| SIGTRAP | specific | kill-all+core |
| SIGABRT/SIGIOT | specific | kill-all+core |
| SIGBUS | specific | kill-all+core |
| SIGFPE | specific | kill-all+core |
| SIGKILL | n/a | kill-all |
| SIGUSR1 | load-balance | kill-all |
| SIGSEGV | specific | kill-all+core |
| SIGUSR2 | load-balance | kill-all |
| SIGPIPE | specific | kill-all |
| SIGALRM | load-balance | kill-all |
| SIGTERM | load-balance | kill-all |
| SIGCHLD | load-balance | ignore |
| SIGCONT | specific | continue-all |
| SIGSTOP | n/a | stop-all |
| SIGTSTP | load-balance | stop-all |
| SIGTTIN | load-balance | stop-all |
| SIGTTOU | load-balance | stop-all |
| SIGURG | load-balance | ignore |
| SIGXCPU | specific | kill-all+core |
| SIGXFSZ | specific | kill-all+core |
| SIGVTALRM | load-balance | kill-all |
| SIGPROF | specific | kill-all |
| SIGPOLL/SIGIO | load-balance | kill-all |
| SIGSYS/SIGUNUSED | specific | kill-all+core |
| SIGSTKFLT | specific | kill-all |
| SIGWINCH | load-balance | ignore |
| SIGPWR | load-balance | kill-all |
| SIGRTMIN-SIGRTMAX | load-balance | kill-all |
----------------------------------------------------------
*/
#define M(sig) (1UL << (sig))
#define SIG_USER_SPECIFIC_MASK (\
M(SIGILL) | M(SIGTRAP) | M(SIGABRT) | M(SIGBUS) | \
M(SIGFPE) | M(SIGSEGV) | M(SIGPIPE) | M(SIGXFSZ) | \
M(SIGPROF) | M(SIGSYS) | M(SIGSTKFLT) | M(SIGCONT) )
#define SIG_USER_LOAD_BALANCE_MASK (\
M(SIGHUP) | M(SIGINT) | M(SIGQUIT) | M(SIGUSR1) | \
M(SIGUSR2) | M(SIGALRM) | M(SIGTERM) | M(SIGCHLD) | \
M(SIGURG) | M(SIGVTALRM) | M(SIGPOLL) | M(SIGWINCH) | \
M(SIGPWR) | M(SIGTSTP) | M(SIGTTIN) | M(SIGTTOU) )
#define SIG_KERNEL_SPECIFIC_MASK (\
M(SIGCHLD) | M(SIGURG) | M(SIGWINCH) )
#define SIG_KERNEL_BROADCAST_MASK (\
M(SIGHUP) | M(SIGINT) | M(SIGQUIT) | M(SIGILL) | \
M(SIGTRAP) | M(SIGABRT) | M(SIGBUS) | M(SIGFPE) | \
M(SIGKILL) | M(SIGUSR1) | M(SIGSEGV) | M(SIGUSR2) | \
M(SIGPIPE) | M(SIGALRM) | M(SIGTERM) | M(SIGXCPU) | \
M(SIGXFSZ) | M(SIGVTALRM) | M(SIGPROF) | M(SIGPOLL) | \
M(SIGSYS) | M(SIGSTKFLT) | M(SIGPWR) | M(SIGCONT) | \
M(SIGSTOP) | M(SIGTSTP) | M(SIGTTIN) | M(SIGTTOU) )
#define SIG_KERNEL_ONLY_MASK (\
M(SIGKILL) | M(SIGSTOP) )
#define SIG_KERNEL_COREDUMP_MASK (\
M(SIGQUIT) | M(SIGILL) | M(SIGTRAP) | M(SIGABRT) | \
M(SIGFPE) | M(SIGSEGV) | M(SIGBUS) | M(SIGSYS) | \
M(SIGXCPU) | M(SIGXFSZ) )
#define T(sig, mask) \
((1UL << (sig)) & mask)
#define sig_user_specific(sig) T(sig, SIG_USER_SPECIFIC_MASK)
#define sig_user_load_balance(sig) \
(T(sig, SIG_USER_LOAD_BALANCE_MASK) || ((sig) >= SIGRTMIN))
#define sig_kernel_specific(sig) T(sig, SIG_KERNEL_SPECIFIC_MASK)
#define sig_kernel_broadcast(sig) \
(T(sig, SIG_KERNEL_BROADCAST_MASK) || ((sig) >= SIGRTMIN))
#define sig_kernel_only(sig) T(sig, SIG_KERNEL_ONLY_MASK)
#define sig_kernel_coredump(sig) T(sig, SIG_KERNEL_COREDUMP_MASK)
#define sig_user_defined(t, sig) \
(((t)->sig->action[(sig)-1].sa.sa_handler != SIG_DFL) && \
((t)->sig->action[(sig)-1].sa.sa_handler != SIG_IGN))
#define sig_ignored(t, sig) \
(((sig) != SIGCHLD) && \
((t)->sig->action[(sig)-1].sa.sa_handler == SIG_IGN))
void __init signals_init(void)
{
sigqueue_cachep =
kmem_cache_create("sigqueue",
sizeof(struct sigqueue),
__alignof__(struct sigqueue),
SIG_SLAB_DEBUG, NULL, NULL);
0, NULL, NULL);
if (!sigqueue_cachep)
panic("signals_init(): cannot create sigqueue SLAB cache");
}
#define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
void recalc_sigpending_tsk(struct task_struct *t)
{
if (PENDING(&t->pending, &t->blocked) ||
PENDING(&t->sig->shared_pending, &t->blocked))
set_tsk_thread_flag(t, TIF_SIGPENDING);
else
clear_tsk_thread_flag(t, TIF_SIGPENDING);
}
void recalc_sigpending(void)
{
if (PENDING(&current->pending, &current->blocked) ||
PENDING(&current->sig->shared_pending, &current->blocked))
set_thread_flag(TIF_SIGPENDING);
else
clear_thread_flag(TIF_SIGPENDING);
}
/* Given the mask, find the first available signal that should be serviced. */
static int
next_signal(struct task_struct *tsk, sigset_t *mask)
next_signal(struct sigpending *pending, sigset_t *mask)
{
unsigned long i, *s, *m, x;
int sig = 0;
s = tsk->pending.signal.sig;
s = pending->signal.sig;
m = mask->sig;
switch (_NSIG_WORDS) {
default:
......@@ -113,15 +221,36 @@ flush_signals(struct task_struct *t)
flush_sigqueue(&t->pending);
}
void remove_thread_group(struct task_struct *tsk, struct signal_struct *sig)
{
write_lock_irq(&tasklist_lock);
spin_lock(&tsk->sig->siglock);
if (tsk == sig->curr_target)
sig->curr_target = next_thread(tsk);
list_del_init(&tsk->thread_group);
spin_unlock(&tsk->sig->siglock);
write_unlock_irq(&tasklist_lock);
}
void exit_sighand(struct task_struct *tsk)
{
struct signal_struct * sig = tsk->sig;
if (!sig)
BUG();
if (!atomic_read(&sig->count))
BUG();
remove_thread_group(tsk, sig);
spin_lock_irq(&tsk->sigmask_lock);
if (sig) {
tsk->sig = NULL;
if (atomic_dec_and_test(&sig->count))
if (atomic_dec_and_test(&sig->count)) {
flush_sigqueue(&sig->shared_pending);
kmem_cache_free(sigact_cachep, sig);
}
}
clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
flush_sigqueue(&tsk->pending);
......@@ -153,24 +282,10 @@ flush_signal_handlers(struct task_struct *t)
void
sig_exit(int sig, int exit_code, struct siginfo *info)
{
struct task_struct *t;
sigaddset(&current->pending.signal, sig);
recalc_sigpending();
current->flags |= PF_SIGNALED;
/* Propagate the signal to all the tasks in
* our thread group
*/
if (info && (unsigned long)info != 1
&& info->si_code != SI_TKILL) {
read_lock(&tasklist_lock);
for_each_thread(t) {
force_sig_info(sig, info, t);
}
read_unlock(&tasklist_lock);
}
do_exit(exit_code);
/* NOTREACHED */
}
......@@ -209,7 +324,7 @@ unblock_all_signals(void)
spin_unlock_irqrestore(&current->sigmask_lock, flags);
}
static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
static inline int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
{
if (sigismember(&list->signal, sig)) {
/* Collect the siginfo appropriate to this signal. */
......@@ -221,9 +336,10 @@ static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
pp = &q->next;
}
/* Ok, it wasn't in the queue. We must have
been out of queue space. So zero out the
info. */
/* Ok, it wasn't in the queue. This must be
a fast-pathed signal or we must have been
out of queue space. So zero out the info.
*/
sigdelset(&list->signal, sig);
info->si_signo = sig;
info->si_errno = 0;
......@@ -261,20 +377,14 @@ static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
* Dequeue a signal and return the element to the caller, which is
* expected to free it.
*
* All callers must be holding current->sigmask_lock.
* All callers have to hold the siglock and the sigmask_lock.
*/
int
dequeue_signal(sigset_t *mask, siginfo_t *info)
int dequeue_signal(struct sigpending *pending, sigset_t *mask, siginfo_t *info)
{
int sig = 0;
#if DEBUG_SIG
printk(KERN_DEBUG "SIG dequeue (%s:%d): %d ", current->comm, current->pid,
signal_pending(current));
#endif
sig = next_signal(current, mask);
sig = next_signal(pending, mask);
if (sig) {
if (current->notifier) {
if (sigismember(current->notifier_mask, sig)) {
......@@ -285,7 +395,7 @@ printk(KERN_DEBUG "SIG dequeue (%s:%d): %d ", current->comm, current->pid,
}
}
if (!collect_signal(sig, &current->pending, info))
if (!collect_signal(sig, pending, info))
sig = 0;
/* XXX: Once POSIX.1b timers are in, if si_code == SI_TIMER,
......@@ -293,10 +403,6 @@ printk(KERN_DEBUG "SIG dequeue (%s:%d): %d ", current->comm, current->pid,
}
recalc_sigpending();
#if DEBUG_SIG
printk(KERN_DEBUG " %d -> %d\n", signal_pending(current), sig);
#endif
return sig;
}
......@@ -338,9 +444,10 @@ static int rm_sig_from_queue(int sig, struct task_struct *t)
/*
* Bad permissions for sending the signal
*/
int bad_signal(int sig, struct siginfo *info, struct task_struct *t)
static inline int bad_signal(int sig, struct siginfo *info, struct task_struct *t)
{
return (!info || ((unsigned long)info != 1 && SI_FROMUSER(info)))
return (!info || ((unsigned long)info != 1 &&
(unsigned long)info != 2 && SI_FROMUSER(info)))
&& ((sig != SIGCONT) || (current->session != t->session))
&& (current->euid ^ t->suid) && (current->euid ^ t->uid)
&& (current->uid ^ t->suid) && (current->uid ^ t->uid)
......@@ -436,6 +543,13 @@ static int send_signal(int sig, struct siginfo *info, struct sigpending *signals
{
struct sigqueue * q = NULL;
/*
* fast-pathed signals for kernel-internal things like SIGSTOP
* or SIGKILL.
*/
if ((unsigned long)info == 2)
goto out_set;
/* Real-time signals must be queued if sent by sigqueue, or
some other real-time mechanism. It is implementation
defined whether kill() does so. We attempt to do so, on
......@@ -444,9 +558,8 @@ static int send_signal(int sig, struct siginfo *info, struct sigpending *signals
make sure at least one signal gets delivered and don't
pass on the info struct. */
if (atomic_read(&nr_queued_signals) < max_queued_signals) {
if (atomic_read(&nr_queued_signals) < max_queued_signals)
q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
}
if (q) {
atomic_inc(&nr_queued_signals);
......@@ -473,14 +586,14 @@ static int send_signal(int sig, struct siginfo *info, struct sigpending *signals
break;
}
} else if (sig >= SIGRTMIN && info && (unsigned long)info != 1
&& info->si_code != SI_USER) {
&& info->si_code != SI_USER)
/*
* Queue overflow, abort. We may abort if the signal was rt
* and sent by user using something other than kill().
*/
return -EAGAIN;
}
out_set:
sigaddset(&signals->signal, sig);
return 0;
}
......@@ -528,17 +641,17 @@ static int deliver_signal(int sig, struct siginfo *info, struct task_struct *t)
return retval;
}
int
send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
static int
__send_sig_info(int sig, struct siginfo *info, struct task_struct *t, int shared)
{
unsigned long flags;
int ret;
#if DEBUG_SIG
printk(KERN_DEBUG "SIG queue (%s:%d): %d ", t->comm, t->pid, sig);
if (!irqs_disabled())
BUG();
#if CONFIG_SMP
if (!spin_is_locked(&t->sig->siglock))
BUG();
#endif
ret = -EINVAL;
if (sig < 0 || sig > _NSIG)
goto out_nolock;
......@@ -556,7 +669,7 @@ printk(KERN_DEBUG "SIG queue (%s:%d): %d ", t->comm, t->pid, sig);
if (!sig || !t->sig)
goto out_nolock;
spin_lock_irqsave(&t->sigmask_lock, flags);
spin_lock(&t->sigmask_lock);
handle_stop_signal(sig, t);
/* Optimize away the signal, if it's a signal that can be
......@@ -566,20 +679,25 @@ printk(KERN_DEBUG "SIG queue (%s:%d): %d ", t->comm, t->pid, sig);
if (ignored_signal(sig, t))
goto out;
/* Support queueing exactly one non-rt signal, so that we
can get more detailed information about the cause of
the signal. */
if (sig < SIGRTMIN && sigismember(&t->pending.signal, sig))
goto out;
#define LEGACY_QUEUE(sigptr, sig) \
(((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig)))
if (!shared) {
/* Support queueing exactly one non-rt signal, so that we
can get more detailed information about the cause of
the signal. */
if (LEGACY_QUEUE(&t->pending, sig))
goto out;
ret = deliver_signal(sig, info, t);
ret = deliver_signal(sig, info, t);
} else {
if (LEGACY_QUEUE(&t->sig->shared_pending, sig))
goto out;
ret = send_signal(sig, info, &t->sig->shared_pending);
}
out:
spin_unlock_irqrestore(&t->sigmask_lock, flags);
spin_unlock(&t->sigmask_lock);
out_nolock:
#if DEBUG_SIG
printk(KERN_DEBUG " %d -> %d\n", signal_pending(t), ret);
#endif
return ret;
}
......@@ -605,7 +723,157 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
recalc_sigpending_tsk(t);
spin_unlock_irqrestore(&t->sigmask_lock, flags);
return send_sig_info(sig, info, t);
return send_sig_info(sig, (void *)1, t);
}
static int
__force_sig_info(int sig, struct task_struct *t)
{
unsigned long int flags;
spin_lock_irqsave(&t->sigmask_lock, flags);
if (t->sig == NULL) {
spin_unlock_irqrestore(&t->sigmask_lock, flags);
return -ESRCH;
}
if (t->sig->action[sig-1].sa.sa_handler == SIG_IGN)
t->sig->action[sig-1].sa.sa_handler = SIG_DFL;
sigdelset(&t->blocked, sig);
recalc_sigpending_tsk(t);
spin_unlock_irqrestore(&t->sigmask_lock, flags);
return __send_sig_info(sig, (void *)2, t, 0);
}
#define can_take_signal(p, sig) \
(((unsigned long) p->sig->action[sig-1].sa.sa_handler > 1) && \
!sigismember(&p->blocked, sig) && (task_curr(p) || !signal_pending(p)))
static inline
int load_balance_thread_group(struct task_struct *p, int sig,
struct siginfo *info)
{
struct task_struct *tmp;
int ret;
/*
* if the specified thread is not blocking this signal
* then deliver it.
*/
if (can_take_signal(p, sig))
return __send_sig_info(sig, info, p, 0);
/*
* Otherwise try to find a suitable thread.
* If no such thread is found then deliver to
* the original thread.
*/
tmp = p->sig->curr_target;
if (!tmp || tmp->tgid != p->tgid)
/* restart balancing at this thread */
p->sig->curr_target = p;
else for (;;) {
if (list_empty(&p->thread_group))
BUG();
if (!tmp || tmp->tgid != p->tgid)
BUG();
/*
* Do not send signals that are ignored or blocked,
* or to not-running threads that are overworked:
*/
if (!can_take_signal(tmp, sig)) {
tmp = next_thread(tmp);
p->sig->curr_target = tmp;
if (tmp == p)
break;
continue;
}
ret = __send_sig_info(sig, info, tmp, 0);
return ret;
}
/*
* No suitable thread was found - put the signal
* into the shared-pending queue.
*/
return __send_sig_info(sig, info, p, 1);
}
int __broadcast_thread_group(struct task_struct *p, int sig)
{
struct task_struct *tmp;
struct list_head *entry;
int err = 0;
/* send a signal to the head of the list */
err = __force_sig_info(sig, p);
/* send a signal to all members of the list */
list_for_each(entry, &p->thread_group) {
tmp = list_entry(entry, task_t, thread_group);
err = __force_sig_info(sig, tmp);
}
return err;
}
int
send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
unsigned long flags;
int ret = 0;
if (!p)
BUG();
if (!p->sig)
BUG();
spin_lock_irqsave(&p->sig->siglock, flags);
/* not a thread group - normal signal behavior */
if (list_empty(&p->thread_group) || !sig)
goto out_send;
if (sig_user_defined(p, sig)) {
if (sig_user_specific(sig))
goto out_send;
if (sig_user_load_balance(sig)) {
ret = load_balance_thread_group(p, sig, info);
goto out_unlock;
}
/* must not happen */
BUG();
}
/* optimize away ignored signals: */
if (sig_ignored(p, sig))
goto out_unlock;
/* blocked (or ptraced) signals get posted */
spin_lock(&p->sigmask_lock);
if ((p->ptrace & PT_PTRACED) || sigismember(&p->blocked, sig) ||
sigismember(&p->real_blocked, sig)) {
spin_unlock(&p->sigmask_lock);
goto out_send;
}
spin_unlock(&p->sigmask_lock);
if (sig_kernel_broadcast(sig) || sig_kernel_coredump(sig)) {
ret = __broadcast_thread_group(p, sig);
goto out_unlock;
}
if (sig_kernel_specific(sig))
goto out_send;
/* must not happen */
BUG();
out_send:
ret = __send_sig_info(sig, info, p, 0);
out_unlock:
spin_unlock_irqrestore(&p->sig->siglock, flags);
return ret;
}
/*
......@@ -670,15 +938,8 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
read_lock(&tasklist_lock);
p = find_task_by_pid(pid);
error = -ESRCH;
if (p) {
if (!thread_group_leader(p)) {
struct task_struct *tg;
tg = find_task_by_pid(p->tgid);
if (tg)
p = tg;
}
if (p)
error = send_sig_info(sig, info, p);
}
read_unlock(&tasklist_lock);
return error;
}
......@@ -755,25 +1016,36 @@ kill_proc(pid_t pid, int sig, int priv)
* Joy. Or not. Pthread wants us to wake up every thread
* in our parent group.
*/
static void wake_up_parent(struct task_struct *parent)
static inline void wake_up_parent(struct task_struct *p)
{
struct task_struct *tsk = parent;
struct task_struct *parent = p->parent, *tsk = parent;
/*
* Fortunately this is not necessary for thread groups:
*/
if (p->tgid == tsk->tgid) {
wake_up_interruptible(&tsk->wait_chldexit);
return;
}
spin_lock_irq(&parent->sig->siglock);
do {
wake_up_interruptible(&tsk->wait_chldexit);
tsk = next_thread(tsk);
if (tsk->sig != parent->sig)
BUG();
} while (tsk != parent);
spin_unlock_irq(&parent->sig->siglock);
}
/*
* Let a parent know about a status change of a child.
*/
void do_notify_parent(struct task_struct *tsk, int sig)
{
struct siginfo info;
int why, status;
/* is the thread detached? */
if (sig == -1)
BUG();
......@@ -812,7 +1084,7 @@ void do_notify_parent(struct task_struct *tsk, int sig)
info.si_status = status;
send_sig_info(sig, &info, tsk->parent);
wake_up_parent(tsk->parent);
wake_up_parent(tsk);
}
......@@ -837,13 +1109,24 @@ notify_parent(struct task_struct *tsk, int sig)
int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
{
sigset_t *mask = &current->blocked;
for (;;) {
unsigned long signr;
unsigned long signr = 0;
struct k_sigaction *ka;
spin_lock_irq(&current->sigmask_lock);
signr = dequeue_signal(&current->blocked, info);
spin_unlock_irq(&current->sigmask_lock);
local_irq_disable();
if (current->sig->shared_pending.head) {
spin_lock(&current->sig->siglock);
signr = dequeue_signal(&current->sig->shared_pending, mask, info);
spin_unlock(&current->sig->siglock);
}
if (!signr) {
spin_lock(&current->sigmask_lock);
signr = dequeue_signal(&current->pending, mask, info);
spin_unlock(&current->sigmask_lock);
}
local_irq_enable();
if (!signr)
break;
......@@ -1137,8 +1420,11 @@ sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo,
return -EINVAL;
}
spin_lock_irq(&current->sigmask_lock);
sig = dequeue_signal(&these, &info);
spin_lock_irq(&current->sig->siglock);
spin_lock(&current->sigmask_lock);
sig = dequeue_signal(&current->sig->shared_pending, &these, &info);
if (!sig)
sig = dequeue_signal(&current->pending, &these, &info);
if (!sig) {
timeout = MAX_SCHEDULE_TIMEOUT;
if (uts)
......@@ -1149,21 +1435,27 @@ sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo,
/* None ready -- temporarily unblock those we're
* interested while we are sleeping in so that we'll
* be awakened when they arrive. */
sigset_t oldblocked = current->blocked;
current->real_blocked = current->blocked;
sigandsets(&current->blocked, &current->blocked, &these);
recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock);
spin_unlock(&current->sigmask_lock);
spin_unlock_irq(&current->sig->siglock);
current->state = TASK_INTERRUPTIBLE;
timeout = schedule_timeout(timeout);
spin_lock_irq(&current->sigmask_lock);
sig = dequeue_signal(&these, &info);
current->blocked = oldblocked;
spin_lock_irq(&current->sig->siglock);
spin_lock(&current->sigmask_lock);
sig = dequeue_signal(&current->sig->shared_pending, &these, &info);
if (!sig)
sig = dequeue_signal(&current->pending, &these, &info);
current->blocked = current->real_blocked;
siginitset(&current->real_blocked, 0);
recalc_sigpending();
}
}
spin_unlock_irq(&current->sigmask_lock);
spin_unlock(&current->sigmask_lock);
spin_unlock_irq(&current->sig->siglock);
if (sig) {
ret = sig;
......@@ -1195,33 +1487,35 @@ sys_kill(int pid, int sig)
}
/*
* Kill only one task, even if it's a CLONE_THREAD task.
* Send a signal to only one task, even if it's a CLONE_THREAD task.
*/
asmlinkage long
sys_tkill(int pid, int sig)
{
struct siginfo info;
int error;
struct task_struct *p;
/* This is only valid for single tasks */
if (pid <= 0)
return -EINVAL;
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_TKILL;
info.si_pid = current->pid;
info.si_uid = current->uid;
read_lock(&tasklist_lock);
p = find_task_by_pid(pid);
error = -ESRCH;
if (p) {
error = send_sig_info(sig, &info, p);
}
read_unlock(&tasklist_lock);
return error;
struct siginfo info;
int error;
struct task_struct *p;
/* This is only valid for single tasks */
if (pid <= 0)
return -EINVAL;
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_TKILL;
info.si_pid = current->pid;
info.si_uid = current->uid;
read_lock(&tasklist_lock);
p = find_task_by_pid(pid);
error = -ESRCH;
if (p) {
spin_lock_irq(&p->sig->siglock);
error = __send_sig_info(sig, &info, p, 0);
spin_unlock_irq(&p->sig->siglock);
}
read_unlock(&tasklist_lock);
return error;
}
asmlinkage long
......@@ -1247,13 +1541,12 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
{
struct k_sigaction *k;
if (sig < 1 || sig > _NSIG ||
(act && (sig == SIGKILL || sig == SIGSTOP)))
if (sig < 1 || sig > _NSIG || (act && sig_kernel_only(sig)))
return -EINVAL;
k = &current->sig->action[sig-1];
spin_lock(&current->sig->siglock);
spin_lock_irq(&current->sig->siglock);
if (oact)
*oact = *k;
......@@ -1292,7 +1585,7 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
}
}
spin_unlock(&current->sig->siglock);
spin_unlock_irq(&current->sig->siglock);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment