fork.c 29 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9 10
/*
 *  linux/kernel/fork.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 *  'fork.c' contains the help-routines for the 'fork' system call
 * (see also entry.S and others).
 * Fork is rather simple, once you get the hang of it, but the memory
Linus Torvalds's avatar
Linus Torvalds committed
11
 * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
Linus Torvalds's avatar
Linus Torvalds committed
12 13 14
 */

#include <linux/config.h>
Linus Torvalds's avatar
Linus Torvalds committed
15
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
16 17 18 19 20
#include <linux/init.h>
#include <linux/unistd.h>
#include <linux/smp_lock.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
Linus Torvalds's avatar
Linus Torvalds committed
21
#include <linux/completion.h>
Linus Torvalds's avatar
Linus Torvalds committed
22
#include <linux/namespace.h>
Linus Torvalds's avatar
Linus Torvalds committed
23
#include <linux/personality.h>
Linus Torvalds's avatar
Linus Torvalds committed
24
#include <linux/file.h>
25
#include <linux/binfmts.h>
Andrew Morton's avatar
Andrew Morton committed
26
#include <linux/mman.h>
27
#include <linux/fs.h>
28
#include <linux/security.h>
29
#include <linux/jiffies.h>
30
#include <linux/futex.h>
31
#include <linux/ptrace.h>
32
#include <linux/mount.h>
Linus Torvalds's avatar
Linus Torvalds committed
33 34 35 36 37

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
38 39
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
Linus Torvalds's avatar
Linus Torvalds committed
40

Dave Olien's avatar
Dave Olien committed
41
extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk);
Andrew Morton's avatar
Andrew Morton committed
42
extern void exit_sem(struct task_struct *tsk);
Dave Olien's avatar
Dave Olien committed
43

44 45 46
/* The idle threads do not count..
 * Protected by write_lock_irq(&tasklist_lock)
 */
Linus Torvalds's avatar
Linus Torvalds committed
47 48 49 50
int nr_threads;

int max_threads;
unsigned long total_forks;	/* Handle normal Linux uptimes. */
Ingo Molnar's avatar
Ingo Molnar committed
51

52 53
DEFINE_PER_CPU(unsigned long, process_counts) = 0;

Linus Torvalds's avatar
Linus Torvalds committed
54 55
rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;  /* outer */

56 57 58 59 60 61 62
/*
 * A per-CPU task cache - this relies on the fact that
 * the very last portion of sys_exit() is executed with
 * preemption turned off.
 */
static task_t *task_cache[NR_CPUS] __cacheline_aligned;

63 64 65 66 67 68 69 70 71 72 73 74
int nr_processes(void)
{
	int cpu;
	int total = 0;

	for (cpu = 0; cpu < NR_CPUS; cpu++) {
		if (cpu_online(cpu))
			total += per_cpu(process_counts, cpu);
	}
	return total;
}

75 76 77 78 79 80 81
#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
# define alloc_task_struct()	kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
# define free_task_struct(tsk)	kmem_cache_free(task_struct_cachep, (tsk))
static kmem_cache_t *task_struct_cachep;
#endif

static void free_task(struct task_struct *tsk)
82
{
83 84 85 86 87 88
	/*
	 * The task cache is effectively disabled right now.
	 * Do we want it? The slab cache already has per-cpu
	 * stuff, but the thread info (usually a order-1 page
	 * allocation) doesn't.
	 */
89 90
	if (tsk != current) {
		free_thread_info(tsk->thread_info);
91
		free_task_struct(tsk);
92
	} else {
Ingo Molnar's avatar
Ingo Molnar committed
93
		int cpu = get_cpu();
94

Ingo Molnar's avatar
Ingo Molnar committed
95
		tsk = task_cache[cpu];
96 97
		if (tsk) {
			free_thread_info(tsk->thread_info);
98
			free_task_struct(tsk);
99
		}
Ingo Molnar's avatar
Ingo Molnar committed
100 101
		task_cache[cpu] = current;
		put_cpu();
102 103 104
	}
}

105 106 107 108 109 110 111 112
void __put_task_struct(struct task_struct *tsk)
{
	WARN_ON(!(tsk->state & (TASK_DEAD | TASK_ZOMBIE)));
	WARN_ON(atomic_read(&tsk->usage));
	WARN_ON(tsk == current);

	security_task_free(tsk);
	free_uid(tsk->user);
113
	free_task(tsk);
114 115
}

Linus Torvalds's avatar
Linus Torvalds committed
116 117 118 119
void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
{
	unsigned long flags;

Linus Torvalds's avatar
Linus Torvalds committed
120
	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
Robert Love's avatar
Robert Love committed
121
	spin_lock_irqsave(&q->lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
122
	__add_wait_queue(q, wait);
Robert Love's avatar
Robert Love committed
123
	spin_unlock_irqrestore(&q->lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
124 125 126 127 128 129
}

void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
{
	unsigned long flags;

Linus Torvalds's avatar
Linus Torvalds committed
130
	wait->flags |= WQ_FLAG_EXCLUSIVE;
Robert Love's avatar
Robert Love committed
131
	spin_lock_irqsave(&q->lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
132
	__add_wait_queue_tail(q, wait);
Robert Love's avatar
Robert Love committed
133
	spin_unlock_irqrestore(&q->lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
134 135 136 137 138 139
}

void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
{
	unsigned long flags;

Robert Love's avatar
Robert Love committed
140
	spin_lock_irqsave(&q->lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
141
	__remove_wait_queue(q, wait);
Robert Love's avatar
Robert Love committed
142
	spin_unlock_irqrestore(&q->lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
143 144
}

145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
	unsigned long flags;

	__set_current_state(state);
	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
	spin_lock_irqsave(&q->lock, flags);
	if (list_empty(&wait->task_list))
		__add_wait_queue(q, wait);
	spin_unlock_irqrestore(&q->lock, flags);
}

void
prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
	unsigned long flags;

	__set_current_state(state);
	wait->flags |= WQ_FLAG_EXCLUSIVE;
	spin_lock_irqsave(&q->lock, flags);
	if (list_empty(&wait->task_list))
		__add_wait_queue_tail(q, wait);
	spin_unlock_irqrestore(&q->lock, flags);
}

void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
	unsigned long flags;

	__set_current_state(TASK_RUNNING);
	if (!list_empty(&wait->task_list)) {
		spin_lock_irqsave(&q->lock, flags);
		list_del_init(&wait->task_list);
		spin_unlock_irqrestore(&q->lock, flags);
	}
}

int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync)
{
	int ret = default_wake_function(wait, mode, sync);

	if (ret)
		list_del_init(&wait->task_list);
	return ret;
}

Linus Torvalds's avatar
Linus Torvalds committed
191 192
void __init fork_init(unsigned long mempages)
{
193
#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
194 195 196 197
	/* create a slab on which task_structs can be allocated */
	task_struct_cachep =
		kmem_cache_create("task_struct",
				  sizeof(struct task_struct),0,
198
				  SLAB_MUST_HWCACHE_ALIGN, NULL, NULL);
199 200
	if (!task_struct_cachep)
		panic("fork_init(): cannot create task_struct SLAB cache");
201
#endif
202

Linus Torvalds's avatar
Linus Torvalds committed
203 204 205 206 207
	/*
	 * The default maximum number of threads is set to a safe
	 * value: the thread structures can take up at most half
	 * of memory.
	 */
Linus Torvalds's avatar
Linus Torvalds committed
208
	max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8;
Alan Cox's avatar
Alan Cox committed
209
	/*
210
	 * we need to allow at least 20 threads to boot a system
Alan Cox's avatar
Alan Cox committed
211
	 */
212 213 214 215 216
	if(max_threads < 20)
		max_threads = 20;

	init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
	init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
Linus Torvalds's avatar
Linus Torvalds committed
217 218
}

219
static struct task_struct *dup_task_struct(struct task_struct *orig)
220 221 222
{
	struct task_struct *tsk;
	struct thread_info *ti;
Ingo Molnar's avatar
Ingo Molnar committed
223
	int cpu = get_cpu();
224

225 226
	prepare_to_copy(orig);

Ingo Molnar's avatar
Ingo Molnar committed
227 228 229
	tsk = task_cache[cpu];
	task_cache[cpu] = NULL;
	put_cpu();
230
	if (!tsk) {
231 232
		tsk = alloc_task_struct();
		if (!tsk)
233 234
			return NULL;

235 236 237
		ti = alloc_thread_info(tsk);
		if (!ti) {
			free_task_struct(tsk);
238 239 240 241
			return NULL;
		}
	} else
		ti = tsk->thread_info;
242 243 244 245 246

	*ti = *orig->thread_info;
	*tsk = *orig;
	tsk->thread_info = ti;
	ti->task = tsk;
247 248 249

	/* One for us, one for whoever does the "release_task()" (usually parent) */
	atomic_set(&tsk->usage,2);
250 251 252
	return tsk;
}

253 254
#ifdef CONFIG_MMU
static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
Linus Torvalds's avatar
Linus Torvalds committed
255 256 257
{
	struct vm_area_struct * mpnt, *tmp, **pprev;
	int retval;
Andrew Morton's avatar
Andrew Morton committed
258
	unsigned long charge = 0;
Linus Torvalds's avatar
Linus Torvalds committed
259

260
	down_write(&oldmm->mmap_sem);
Linus Torvalds's avatar
Linus Torvalds committed
261 262 263 264
	flush_cache_mm(current->mm);
	mm->locked_vm = 0;
	mm->mmap = NULL;
	mm->mmap_cache = NULL;
265
	mm->free_area_cache = TASK_UNMAPPED_BASE;
Linus Torvalds's avatar
Linus Torvalds committed
266
	mm->map_count = 0;
Linus Torvalds's avatar
Linus Torvalds committed
267
	mm->rss = 0;
Linus Torvalds's avatar
Linus Torvalds committed
268 269
	mm->cpu_vm_mask = 0;
	pprev = &mm->mmap;
Linus Torvalds's avatar
Linus Torvalds committed
270 271 272 273 274 275 276 277 278 279 280 281

	/*
	 * Add it to the mmlist after the parent.
	 * Doing it this way means that we can order the list,
	 * and fork() won't mess up the ordering significantly.
	 * Add it first so that swapoff can see any swap entries.
	 */
	spin_lock(&mmlist_lock);
	list_add(&mm->mmlist, &current->mm->mmlist);
	mmlist_nr++;
	spin_unlock(&mmlist_lock);

Linus Torvalds's avatar
Linus Torvalds committed
282 283 284 285 286
	for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
		struct file *file;

		if(mpnt->vm_flags & VM_DONTCOPY)
			continue;
Andrew Morton's avatar
Andrew Morton committed
287 288
		if (mpnt->vm_flags & VM_ACCOUNT) {
			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
289
			if (security_vm_enough_memory(len))
Andrew Morton's avatar
Andrew Morton committed
290 291 292
				goto fail_nomem;
			charge += len;
		}
Linus Torvalds's avatar
Linus Torvalds committed
293 294 295 296 297 298 299 300
		tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
		if (!tmp)
			goto fail_nomem;
		*tmp = *mpnt;
		tmp->vm_flags &= ~VM_LOCKED;
		tmp->vm_mm = mm;
		tmp->vm_next = NULL;
		file = tmp->vm_file;
301
		INIT_LIST_HEAD(&tmp->shared);
Linus Torvalds's avatar
Linus Torvalds committed
302 303 304 305 306 307 308
		if (file) {
			struct inode *inode = file->f_dentry->d_inode;
			get_file(file);
			if (tmp->vm_flags & VM_DENYWRITE)
				atomic_dec(&inode->i_writecount);
      
			/* insert tmp into the share list, just after mpnt */
309
			down(&inode->i_mapping->i_shared_sem);
310
			list_add_tail(&tmp->shared, &mpnt->shared);
311
			up(&inode->i_mapping->i_shared_sem);
Linus Torvalds's avatar
Linus Torvalds committed
312 313 314
		}

		/*
Linus Torvalds's avatar
Linus Torvalds committed
315 316
		 * Link in the new vma and copy the page table entries:
		 * link in first so that swapoff can see swap entries.
Linus Torvalds's avatar
Linus Torvalds committed
317
		 */
Linus Torvalds's avatar
Linus Torvalds committed
318
		spin_lock(&mm->page_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
319 320
		*pprev = tmp;
		pprev = &tmp->vm_next;
Linus Torvalds's avatar
Linus Torvalds committed
321 322 323 324 325 326
		mm->map_count++;
		retval = copy_page_range(mm, current->mm, tmp);
		spin_unlock(&mm->page_table_lock);

		if (tmp->vm_ops && tmp->vm_ops->open)
			tmp->vm_ops->open(tmp);
Linus Torvalds's avatar
Linus Torvalds committed
327 328

		if (retval)
329
			goto fail;
Linus Torvalds's avatar
Linus Torvalds committed
330 331
	}
	retval = 0;
Linus Torvalds's avatar
Linus Torvalds committed
332
	build_mmap_rb(mm);
Linus Torvalds's avatar
Linus Torvalds committed
333

Andrew Morton's avatar
Andrew Morton committed
334
out:
Linus Torvalds's avatar
Linus Torvalds committed
335
	flush_tlb_mm(current->mm);
336
	up_write(&oldmm->mmap_sem);
Linus Torvalds's avatar
Linus Torvalds committed
337
	return retval;
Andrew Morton's avatar
Andrew Morton committed
338
fail_nomem:
339 340
	retval = -ENOMEM;
  fail:
Andrew Morton's avatar
Andrew Morton committed
341 342
	vm_unacct_memory(charge);
	goto out;
Linus Torvalds's avatar
Linus Torvalds committed
343
}
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
static inline int mm_alloc_pgd(struct mm_struct * mm)
{
	mm->pgd = pgd_alloc(mm);
	if (unlikely(!mm->pgd))
		return -ENOMEM;
	return 0;
}

static inline void mm_free_pgd(struct mm_struct * mm)
{
	pgd_free(mm->pgd);
}
#else
#define dup_mmap(mm, oldmm)	(0)
#define mm_alloc_pgd(mm)	(0)
#define mm_free_pgd(mm)
#endif /* CONFIG_MMU */
Linus Torvalds's avatar
Linus Torvalds committed
361

Linus Torvalds's avatar
Linus Torvalds committed
362
spinlock_t mmlist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
Linus Torvalds's avatar
Linus Torvalds committed
363
int mmlist_nr;
Linus Torvalds's avatar
Linus Torvalds committed
364 365 366 367

#define allocate_mm()	(kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
#define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))

368 369
#include <linux/init_task.h>

Linus Torvalds's avatar
Linus Torvalds committed
370 371 372 373
static struct mm_struct * mm_init(struct mm_struct * mm)
{
	atomic_set(&mm->mm_users, 1);
	atomic_set(&mm->mm_count, 1);
Linus Torvalds's avatar
Linus Torvalds committed
374
	init_rwsem(&mm->mmap_sem);
Ingo Molnar's avatar
Ingo Molnar committed
375
	mm->core_waiters = 0;
Linus Torvalds's avatar
Linus Torvalds committed
376
	mm->page_table_lock = SPIN_LOCK_UNLOCKED;
377 378
	mm->ioctx_list_lock = RW_LOCK_UNLOCKED;
	mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
379 380
	mm->free_area_cache = TASK_UNMAPPED_BASE;

381 382
	if (likely(!mm_alloc_pgd(mm))) {
		mm->def_flags = 0;
Linus Torvalds's avatar
Linus Torvalds committed
383
		return mm;
384
	}
Linus Torvalds's avatar
Linus Torvalds committed
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
	free_mm(mm);
	return NULL;
}

/*
 * Allocate and initialize an mm_struct.
 */
struct mm_struct * mm_alloc(void)
{
	struct mm_struct * mm;

	mm = allocate_mm();
	if (mm) {
		memset(mm, 0, sizeof(*mm));
		return mm_init(mm);
	}
	return NULL;
}

/*
 * Called when the last reference to the mm
 * is dropped: either by a lazy thread or by
 * mmput. Free the page directory and the mm.
 */
inline void __mmdrop(struct mm_struct *mm)
{
411 412
	BUG_ON(mm == &init_mm);
	mm_free_pgd(mm);
Linus Torvalds's avatar
Linus Torvalds committed
413 414 415 416 417 418 419 420 421 422 423
	destroy_context(mm);
	free_mm(mm);
}

/*
 * Decrement the use count and release all resources for an mm.
 */
void mmput(struct mm_struct *mm)
{
	if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
		list_del(&mm->mmlist);
Linus Torvalds's avatar
Linus Torvalds committed
424
		mmlist_nr--;
Linus Torvalds's avatar
Linus Torvalds committed
425
		spin_unlock(&mmlist_lock);
Andrew Morton's avatar
Andrew Morton committed
426
		exit_aio(mm);
Linus Torvalds's avatar
Linus Torvalds committed
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
		exit_mmap(mm);
		mmdrop(mm);
	}
}

/* Please note the differences between mmput and mm_release.
 * mmput is called whenever we stop holding onto a mm_struct,
 * error success whatever.
 *
 * mm_release is called after a mm_struct has been removed
 * from the current process.
 *
 * This difference is important for error handling, when we
 * only half set up a mm_struct for a new process and need to restore
 * the old one.  Because we mmput the new mm_struct before
 * restoring the old one. . .
 * Eric Biederman 10 January 1998
 */
445
void mm_release(struct task_struct *tsk, struct mm_struct *mm)
Linus Torvalds's avatar
Linus Torvalds committed
446
{
Linus Torvalds's avatar
Linus Torvalds committed
447
	struct completion *vfork_done = tsk->vfork_done;
Linus Torvalds's avatar
Linus Torvalds committed
448

449 450 451
	/* Get rid of any cached register state */
	deactivate_mm(tsk, mm);

Linus Torvalds's avatar
Linus Torvalds committed
452
	/* notify parent sleeping on vfork() */
Linus Torvalds's avatar
Linus Torvalds committed
453 454 455
	if (vfork_done) {
		tsk->vfork_done = NULL;
		complete(vfork_done);
Linus Torvalds's avatar
Linus Torvalds committed
456
	}
457
	if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) {
458
		u32 __user * tidptr = tsk->clear_child_tid;
459
		tsk->clear_child_tid = NULL;
460

461
		/*
462
		 * We don't check the error code - if userspace has
463 464
		 * not set up a proper pointer then tough luck.
		 */
465
		put_user(0, tidptr);
466
		sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL);
467
	}
Linus Torvalds's avatar
Linus Torvalds committed
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
}

static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
{
	struct mm_struct * mm, *oldmm;
	int retval;

	tsk->min_flt = tsk->maj_flt = 0;
	tsk->cmin_flt = tsk->cmaj_flt = 0;
	tsk->nswap = tsk->cnswap = 0;

	tsk->mm = NULL;
	tsk->active_mm = NULL;

	/*
	 * Are we cloning a kernel thread?
	 *
	 * We need to steal a active VM for that..
	 */
	oldmm = current->mm;
	if (!oldmm)
		return 0;

	if (clone_flags & CLONE_VM) {
		atomic_inc(&oldmm->mm_users);
		mm = oldmm;
494 495 496 497 498 499 500
		/*
		 * There are cases where the PTL is held to ensure no
		 * new threads start up in user mode using an mm, which
		 * allows optimizing out ipis; the tlb_gather_mmu code
		 * is an example.
		 */
		spin_unlock_wait(&oldmm->page_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
501 502 503 504 505 506 507 508 509 510 511 512 513
		goto good_mm;
	}

	retval = -ENOMEM;
	mm = allocate_mm();
	if (!mm)
		goto fail_nomem;

	/* Copy the current MM stuff.. */
	memcpy(mm, oldmm, sizeof(*mm));
	if (!mm_init(mm))
		goto fail_nomem;

Colin Gibbs's avatar
Colin Gibbs committed
514 515 516
	if (init_new_context(tsk,mm))
		goto free_pt;

517
	retval = dup_mmap(mm, oldmm);
Linus Torvalds's avatar
Linus Torvalds committed
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550
	if (retval)
		goto free_pt;

good_mm:
	tsk->mm = mm;
	tsk->active_mm = mm;
	return 0;

free_pt:
	mmput(mm);
fail_nomem:
	return retval;
}

static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
{
	struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
	/* We don't need to lock fs - think why ;-) */
	if (fs) {
		atomic_set(&fs->count, 1);
		fs->lock = RW_LOCK_UNLOCKED;
		fs->umask = old->umask;
		read_lock(&old->lock);
		fs->rootmnt = mntget(old->rootmnt);
		fs->root = dget(old->root);
		fs->pwdmnt = mntget(old->pwdmnt);
		fs->pwd = dget(old->pwd);
		if (old->altroot) {
			fs->altrootmnt = mntget(old->altrootmnt);
			fs->altroot = dget(old->altroot);
		} else {
			fs->altrootmnt = NULL;
			fs->altroot = NULL;
551
		}
Linus Torvalds's avatar
Linus Torvalds committed
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
		read_unlock(&old->lock);
	}
	return fs;
}

struct fs_struct *copy_fs_struct(struct fs_struct *old)
{
	return __copy_fs_struct(old);
}

static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
{
	if (clone_flags & CLONE_FS) {
		atomic_inc(&current->fs->count);
		return 0;
	}
	tsk->fs = __copy_fs_struct(current->fs);
	if (!tsk->fs)
570
		return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
571 572 573 574 575 576
	return 0;
}

static int count_open_files(struct files_struct *files, int size)
{
	int i;
577

Linus Torvalds's avatar
Linus Torvalds committed
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
	/* Find the last open fd */
	for (i = size/(8*sizeof(long)); i > 0; ) {
		if (files->open_fds->fds_bits[--i])
			break;
	}
	i = (i+1) * 8 * sizeof(long);
	return i;
}

static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
{
	struct files_struct *oldf, *newf;
	struct file **old_fds, **new_fds;
	int open_files, nfds, size, i, error = 0;

	/*
	 * A background process may not have any files ...
	 */
	oldf = current->files;
	if (!oldf)
		goto out;

	if (clone_flags & CLONE_FILES) {
		atomic_inc(&oldf->count);
		goto out;
	}

	tsk->files = NULL;
	error = -ENOMEM;
	newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
	if (!newf) 
		goto out;

	atomic_set(&newf->count, 1);

613
	newf->file_lock	    = SPIN_LOCK_UNLOCKED;
Linus Torvalds's avatar
Linus Torvalds committed
614 615 616 617 618 619 620 621 622 623 624 625
	newf->next_fd	    = 0;
	newf->max_fds	    = NR_OPEN_DEFAULT;
	newf->max_fdset	    = __FD_SETSIZE;
	newf->close_on_exec = &newf->close_on_exec_init;
	newf->open_fds	    = &newf->open_fds_init;
	newf->fd	    = &newf->fd_array[0];

	/* We don't yet have the oldf readlock, but even if the old
           fdset gets grown now, we'll only copy up to "size" fds */
	size = oldf->max_fdset;
	if (size > __FD_SETSIZE) {
		newf->max_fdset = 0;
626
		spin_lock(&newf->file_lock);
Linus Torvalds's avatar
Linus Torvalds committed
627
		error = expand_fdset(newf, size-1);
628
		spin_unlock(&newf->file_lock);
Linus Torvalds's avatar
Linus Torvalds committed
629 630 631
		if (error)
			goto out_release;
	}
632
	spin_lock(&oldf->file_lock);
Linus Torvalds's avatar
Linus Torvalds committed
633 634 635 636 637 638 639 640 641 642

	open_files = count_open_files(oldf, size);

	/*
	 * Check whether we need to allocate a larger fd array.
	 * Note: we're not a clone task, so the open count won't
	 * change.
	 */
	nfds = NR_OPEN_DEFAULT;
	if (open_files > nfds) {
643
		spin_unlock(&oldf->file_lock);
Linus Torvalds's avatar
Linus Torvalds committed
644
		newf->max_fds = 0;
645
		spin_lock(&newf->file_lock);
Linus Torvalds's avatar
Linus Torvalds committed
646
		error = expand_fd_array(newf, open_files-1);
647
		spin_unlock(&newf->file_lock);
Linus Torvalds's avatar
Linus Torvalds committed
648 649 650
		if (error) 
			goto out_release;
		nfds = newf->max_fds;
651
		spin_lock(&oldf->file_lock);
Linus Torvalds's avatar
Linus Torvalds committed
652 653 654 655 656 657 658 659 660 661 662 663 664 665
	}

	old_fds = oldf->fd;
	new_fds = newf->fd;

	memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
	memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);

	for (i = open_files; i != 0; i--) {
		struct file *f = *old_fds++;
		if (f)
			get_file(f);
		*new_fds++ = f;
	}
666
	spin_unlock(&oldf->file_lock);
Linus Torvalds's avatar
Linus Torvalds committed
667 668 669 670 671 672 673 674 675 676

	/* compute the remainder to be cleared */
	size = (newf->max_fds - open_files) * sizeof(struct file *);

	/* This is long word aligned thus could use a optimized version */ 
	memset(new_fds, 0, size); 

	if (newf->max_fdset > open_files) {
		int left = (newf->max_fdset-open_files)/8;
		int start = open_files / (8 * sizeof(unsigned long));
677

Linus Torvalds's avatar
Linus Torvalds committed
678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
		memset(&newf->open_fds->fds_bits[start], 0, left);
		memset(&newf->close_on_exec->fds_bits[start], 0, left);
	}

	tsk->files = newf;
	error = 0;
out:
	return error;

out_release:
	free_fdset (newf->close_on_exec, newf->max_fdset);
	free_fdset (newf->open_fds, newf->max_fdset);
	kmem_cache_free(files_cachep, newf);
	goto out;
}

static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
{
696
	struct sighand_struct *sig;
Linus Torvalds's avatar
Linus Torvalds committed
697

698 699
	if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
		atomic_inc(&current->sighand->count);
Linus Torvalds's avatar
Linus Torvalds committed
700 701
		return 0;
	}
702 703
	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
	tsk->sighand = sig;
Linus Torvalds's avatar
Linus Torvalds committed
704
	if (!sig)
705
		return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
706 707
	spin_lock_init(&sig->siglock);
	atomic_set(&sig->count, 1);
708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
	return 0;
}

static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
{
	struct signal_struct *sig;

	if (clone_flags & CLONE_THREAD) {
		atomic_inc(&current->signal->count);
		return 0;
	}
	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
	tsk->signal = sig;
	if (!sig)
723
		return -ENOMEM;
724
	atomic_set(&sig->count, 1);
725 726
	sig->group_exit = 0;
	sig->group_exit_code = 0;
727
	sig->group_exit_task = NULL;
Ingo Molnar's avatar
Ingo Molnar committed
728
	sig->group_stop_count = 0;
Ingo Molnar's avatar
Ingo Molnar committed
729 730 731
	sig->curr_target = NULL;
	init_sigpending(&sig->shared_pending);

Linus Torvalds's avatar
Linus Torvalds committed
732 733 734 735 736 737 738
	return 0;
}

static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
{
	unsigned long new_flags = p->flags;

739
	new_flags &= ~PF_SUPERPRIV;
Linus Torvalds's avatar
Linus Torvalds committed
740 741 742 743 744 745
	new_flags |= PF_FORKNOEXEC;
	if (!(clone_flags & CLONE_PTRACE))
		p->ptrace = 0;
	p->flags = new_flags;
}

746
asmlinkage long sys_set_tid_address(int __user *tidptr)
747
{
748
	current->clear_child_tid = tidptr;
749 750 751 752

	return current->pid;
}

Linus Torvalds's avatar
Linus Torvalds committed
753
/*
754 755 756 757 758 759
 * This creates a new process as a copy of the old one,
 * but does not actually start it yet.
 *
 * It copies the registers, and all the appropriate
 * parts of the process environment (as per the clone
 * flags). The actual kick-off is left to the caller.
Linus Torvalds's avatar
Linus Torvalds committed
760
 */
761 762 763 764 765 766
struct task_struct *copy_process(unsigned long clone_flags,
				 unsigned long stack_start,
				 struct pt_regs *regs,
				 unsigned long stack_size,
				 int __user *parent_tidptr,
				 int __user *child_tidptr)
Linus Torvalds's avatar
Linus Torvalds committed
767
{
Linus Torvalds's avatar
Linus Torvalds committed
768
	int retval;
Rusty Russell's avatar
Rusty Russell committed
769
	struct task_struct *p = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
770

Linus Torvalds's avatar
Linus Torvalds committed
771
	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
Rusty Russell's avatar
Rusty Russell committed
772
		return ERR_PTR(-EINVAL);
Linus Torvalds's avatar
Linus Torvalds committed
773

Ingo Molnar's avatar
Ingo Molnar committed
774
	/*
775 776
	 * Thread groups must share signals as well, and detached threads
	 * can only be started up within the thread group.
Ingo Molnar's avatar
Ingo Molnar committed
777
	 */
778 779 780 781
	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
		return ERR_PTR(-EINVAL);
	if ((clone_flags & CLONE_DETACHED) && !(clone_flags & CLONE_THREAD))
		return ERR_PTR(-EINVAL);
Ingo Molnar's avatar
Ingo Molnar committed
782

783 784
	retval = security_task_create(clone_flags);
	if (retval)
785 786
		goto fork_out;

Linus Torvalds's avatar
Linus Torvalds committed
787
	retval = -ENOMEM;
788
	p = dup_task_struct(current);
Linus Torvalds's avatar
Linus Torvalds committed
789 790 791 792
	if (!p)
		goto fork_out;

	retval = -EAGAIN;
Linus Torvalds's avatar
Linus Torvalds committed
793 794 795 796
	if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur) {
		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE))
			goto bad_fork_free;
	}
Linus Torvalds's avatar
Linus Torvalds committed
797

Linus Torvalds's avatar
Linus Torvalds committed
798 799 800 801
	atomic_inc(&p->user->__count);
	atomic_inc(&p->user->processes);

	/*
802 803 804
	 * If multiple threads are within copy_process(), then this check
	 * triggers too late. This doesn't hurt, the check is only there
	 * to stop root fork bombs.
Linus Torvalds's avatar
Linus Torvalds committed
805 806 807
	 */
	if (nr_threads >= max_threads)
		goto bad_fork_cleanup_count;
808

809 810
	if (!try_module_get(p->thread_info->exec_domain->module))
		goto bad_fork_cleanup_count;
Linus Torvalds's avatar
Linus Torvalds committed
811

812 813
	if (p->binfmt && !try_module_get(p->binfmt->module))
		goto bad_fork_cleanup_put_domain;
Linus Torvalds's avatar
Linus Torvalds committed
814

815 816 817 818 819 820 821
#ifdef CONFIG_PREEMPT
	/*
	 * schedule_tail drops this_rq()->lock so we compensate with a count
	 * of 1.  Also, we want to start with kernel preemption disabled.
	 */
	p->thread_info->preempt_count = 1;
#endif
Linus Torvalds's avatar
Linus Torvalds committed
822 823 824 825
	p->did_exec = 0;
	p->state = TASK_UNINTERRUPTIBLE;

	copy_flags(clone_flags, p);
826 827 828 829 830 831 832
	if (clone_flags & CLONE_IDLETASK)
		p->pid = 0;
	else {
		p->pid = alloc_pidmap();
		if (p->pid == -1)
			goto bad_fork_cleanup;
	}
833 834 835 836 837
	retval = -EFAULT;
	if (clone_flags & CLONE_PARENT_SETTID)
		if (put_user(p->pid, parent_tidptr))
			goto bad_fork_cleanup;

838
	p->proc_dentry = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
839

Linus Torvalds's avatar
Linus Torvalds committed
840
	INIT_LIST_HEAD(&p->run_list);
Linus Torvalds's avatar
Linus Torvalds committed
841

842 843
	INIT_LIST_HEAD(&p->children);
	INIT_LIST_HEAD(&p->sibling);
844
	INIT_LIST_HEAD(&p->posix_timers);
Linus Torvalds's avatar
Linus Torvalds committed
845
	init_waitqueue_head(&p->wait_chldexit);
Linus Torvalds's avatar
Linus Torvalds committed
846
	p->vfork_done = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
847
	spin_lock_init(&p->alloc_lock);
Ingo Molnar's avatar
Ingo Molnar committed
848
	spin_lock_init(&p->switch_lock);
849
	spin_lock_init(&p->proc_lock);
Linus Torvalds's avatar
Linus Torvalds committed
850

Ingo Molnar's avatar
Ingo Molnar committed
851
	clear_tsk_thread_flag(p, TIF_SIGPENDING);
Linus Torvalds's avatar
Linus Torvalds committed
852 853 854 855 856 857 858 859 860
	init_sigpending(&p->pending);

	p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
	p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
	init_timer(&p->real_timer);
	p->real_timer.data = (unsigned long) p;

	p->leader = 0;		/* session leadership doesn't inherit */
	p->tty_old_pgrp = 0;
861 862
	p->utime = p->stime = 0;
	p->cutime = p->cstime = 0;
Linus Torvalds's avatar
Linus Torvalds committed
863
	p->array = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
864
	p->lock_depth = -1;		/* -1 = no lock */
865
	p->start_time = get_jiffies_64();
866
	p->security = NULL;
867
	p->as_io_context = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
868 869

	retval = -ENOMEM;
870
	if ((retval = security_task_alloc(p)))
871
		goto bad_fork_cleanup;
Linus Torvalds's avatar
Linus Torvalds committed
872
	/* copy all the process information */
873
	if ((retval = copy_semundo(clone_flags, p)))
874
		goto bad_fork_cleanup_security;
875
	if ((retval = copy_files(clone_flags, p)))
Dave Olien's avatar
Dave Olien committed
876
		goto bad_fork_cleanup_semundo;
877
	if ((retval = copy_fs(clone_flags, p)))
Linus Torvalds's avatar
Linus Torvalds committed
878
		goto bad_fork_cleanup_files;
879
	if ((retval = copy_sighand(clone_flags, p)))
Linus Torvalds's avatar
Linus Torvalds committed
880
		goto bad_fork_cleanup_fs;
881
	if ((retval = copy_signal(clone_flags, p)))
Linus Torvalds's avatar
Linus Torvalds committed
882
		goto bad_fork_cleanup_sighand;
883
	if ((retval = copy_mm(clone_flags, p)))
884
		goto bad_fork_cleanup_signal;
885
	if ((retval = copy_namespace(clone_flags, p)))
Linus Torvalds's avatar
Linus Torvalds committed
886
		goto bad_fork_cleanup_mm;
Linus Torvalds's avatar
Linus Torvalds committed
887 888
	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
	if (retval)
Linus Torvalds's avatar
Linus Torvalds committed
889
		goto bad_fork_cleanup_namespace;
890

891
	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
892
	/*
893
	 * Clear TID on mm_release()?
894
	 */
895
	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
896

897 898 899 900 901 902
	/*
	 * Syscall tracing should be turned off in the child regardless
	 * of CLONE_PTRACE.
	 */
	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);

Linus Torvalds's avatar
Linus Torvalds committed
903 904 905 906 907 908
	/* Our parent execution domain becomes current domain
	   These must match for thread signalling to apply */
	   
	p->parent_exec_id = p->self_exec_id;

	/* ok, now we should be set up.. */
909 910 911 912
	if (clone_flags & CLONE_DETACHED)
		p->exit_signal = -1;
	else
		p->exit_signal = clone_flags & CSIGNAL;
Linus Torvalds's avatar
Linus Torvalds committed
913 914 915
	p->pdeath_signal = 0;

	/*
Linus Torvalds's avatar
Linus Torvalds committed
916
	 * Share the timeslice between parent and child, thus the
917
	 * total amount of pending timeslices in the system doesn't change,
Linus Torvalds's avatar
Linus Torvalds committed
918
	 * resulting in more scheduling fairness.
Linus Torvalds's avatar
Linus Torvalds committed
919
	 */
Ingo Molnar's avatar
Ingo Molnar committed
920 921 922 923 924 925 926
	local_irq_disable();
        p->time_slice = (current->time_slice + 1) >> 1;
	/*
	 * The remainder of the first timeslice might be recovered by
	 * the parent if the child exits early enough.
	 */
	p->first_time_slice = 1;
Linus Torvalds's avatar
Linus Torvalds committed
927
	current->time_slice >>= 1;
928
	p->last_run = jiffies;
Linus Torvalds's avatar
Linus Torvalds committed
929 930 931 932 933 934 935
	if (!current->time_slice) {
		/*
	 	 * This case is rare, it happens when the parent has only
	 	 * a single jiffy left from its timeslice. Taking the
		 * runqueue lock is not a problem.
		 */
		current->time_slice = 1;
936
		preempt_disable();
Ingo Molnar's avatar
Ingo Molnar committed
937
		scheduler_tick(0, 0);
Ingo Molnar's avatar
Ingo Molnar committed
938
		local_irq_enable();
939 940
		preempt_enable();
	} else
Ingo Molnar's avatar
Ingo Molnar committed
941
		local_irq_enable();
Linus Torvalds's avatar
Linus Torvalds committed
942 943 944 945 946 947
	/*
	 * Ok, add it to the run-queues and make it
	 * visible to the rest of the system.
	 *
	 * Let it rip!
	 */
Rusty Russell's avatar
Rusty Russell committed
948
	p->tgid = p->pid;
949
	p->group_leader = p;
950 951
	INIT_LIST_HEAD(&p->ptrace_children);
	INIT_LIST_HEAD(&p->ptrace_list);
Linus Torvalds's avatar
Linus Torvalds committed
952 953

	/* Need tasklist lock for parent etc handling! */
Linus Torvalds's avatar
Linus Torvalds committed
954
	write_lock_irq(&tasklist_lock);
955 956 957 958 959 960
	/*
	 * Check for pending SIGKILL! The new thread should not be allowed
	 * to slip out of an OOM kill. (or normal SIGKILL.)
	 */
	if (sigismember(&current->pending.signal, SIGKILL)) {
		write_unlock_irq(&tasklist_lock);
Ingo Molnar's avatar
Ingo Molnar committed
961
		retval = -EINTR;
962 963
		goto bad_fork_cleanup_namespace;
	}
Linus Torvalds's avatar
Linus Torvalds committed
964

965
	/* CLONE_PARENT re-uses the old parent */
966 967 968
	if (clone_flags & CLONE_PARENT)
		p->real_parent = current->real_parent;
	else
969
		p->real_parent = current;
970
	p->parent = p->real_parent;
Linus Torvalds's avatar
Linus Torvalds committed
971

Linus Torvalds's avatar
Linus Torvalds committed
972
	if (clone_flags & CLONE_THREAD) {
973
		spin_lock(&current->sighand->siglock);
974 975 976 977 978
		/*
		 * Important: if an exit-all has been started then
		 * do not create this new thread - the whole thread
		 * group is supposed to exit anyway.
		 */
979 980
		if (current->signal->group_exit) {
			spin_unlock(&current->sighand->siglock);
981 982 983
			write_unlock_irq(&tasklist_lock);
			goto bad_fork_cleanup_namespace;
		}
Linus Torvalds's avatar
Linus Torvalds committed
984
		p->tgid = current->tgid;
985
		p->group_leader = current->group_leader;
Ingo Molnar's avatar
Ingo Molnar committed
986

987
		if (current->signal->group_stop_count > 0) {
Ingo Molnar's avatar
Ingo Molnar committed
988 989 990 991 992
			/*
			 * There is an all-stop in progress for the group.
			 * We ourselves will stop as soon as we check signals.
			 * Make the new thread part of that group stop too.
			 */
993
			current->signal->group_stop_count++;
Ingo Molnar's avatar
Ingo Molnar committed
994 995 996
			set_tsk_thread_flag(p, TIF_SIGPENDING);
		}

997
		spin_unlock(&current->sighand->siglock);
Linus Torvalds's avatar
Linus Torvalds committed
998
	}
Linus Torvalds's avatar
Linus Torvalds committed
999

Linus Torvalds's avatar
Linus Torvalds committed
1000
	SET_LINKS(p);
1001 1002
	if (p->ptrace & PT_PTRACED)
		__ptrace_link(p, current->parent);
1003 1004 1005

	attach_pid(p, PIDTYPE_PID, p->pid);
	if (thread_group_leader(p)) {
1006
		attach_pid(p, PIDTYPE_TGID, p->tgid);
1007 1008
		attach_pid(p, PIDTYPE_PGID, p->pgrp);
		attach_pid(p, PIDTYPE_SID, p->session);
1009
		if (p->pid)
1010
			__get_cpu_var(process_counts)++;
1011 1012
	} else
		link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);
1013

Linus Torvalds's avatar
Linus Torvalds committed
1014 1015
	nr_threads++;
	write_unlock_irq(&tasklist_lock);
Rusty Russell's avatar
Rusty Russell committed
1016
	retval = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1017 1018

fork_out:
Rusty Russell's avatar
Rusty Russell committed
1019 1020 1021
	if (retval)
		return ERR_PTR(retval);
	return p;
Linus Torvalds's avatar
Linus Torvalds committed
1022

Linus Torvalds's avatar
Linus Torvalds committed
1023 1024
bad_fork_cleanup_namespace:
	exit_namespace(p);
Linus Torvalds's avatar
Linus Torvalds committed
1025 1026
bad_fork_cleanup_mm:
	exit_mm(p);
1027 1028
bad_fork_cleanup_signal:
	exit_signal(p);
Linus Torvalds's avatar
Linus Torvalds committed
1029 1030 1031 1032 1033 1034
bad_fork_cleanup_sighand:
	exit_sighand(p);
bad_fork_cleanup_fs:
	exit_fs(p); /* blocking */
bad_fork_cleanup_files:
	exit_files(p); /* blocking */
Dave Olien's avatar
Dave Olien committed
1035
bad_fork_cleanup_semundo:
Andrew Morton's avatar
Andrew Morton committed
1036
	exit_sem(p);
1037
bad_fork_cleanup_security:
1038
	security_task_free(p);
Linus Torvalds's avatar
Linus Torvalds committed
1039
bad_fork_cleanup:
1040 1041
	if (p->pid > 0)
		free_pidmap(p->pid);
1042 1043 1044 1045
	if (p->binfmt)
		module_put(p->binfmt->module);
bad_fork_cleanup_put_domain:
	module_put(p->thread_info->exec_domain->module);
Linus Torvalds's avatar
Linus Torvalds committed
1046 1047 1048 1049
bad_fork_cleanup_count:
	atomic_dec(&p->user->processes);
	free_uid(p->user);
bad_fork_free:
1050
	free_task(p);
Linus Torvalds's avatar
Linus Torvalds committed
1051 1052 1053
	goto fork_out;
}

1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069
static inline int fork_traceflag (unsigned clone_flags)
{
	if (clone_flags & (CLONE_UNTRACED | CLONE_IDLETASK))
		return 0;
	else if (clone_flags & CLONE_VFORK) {
		if (current->ptrace & PT_TRACE_VFORK)
			return PTRACE_EVENT_VFORK;
	} else if ((clone_flags & CSIGNAL) != SIGCHLD) {
		if (current->ptrace & PT_TRACE_CLONE)
			return PTRACE_EVENT_CLONE;
	} else if (current->ptrace & PT_TRACE_FORK)
		return PTRACE_EVENT_FORK;

	return 0;
}

1070 1071 1072 1073 1074 1075
/*
 *  Ok, this is the main fork-routine.
 *
 * It copies the process, and if successful kick-starts
 * it and waits for it to finish using the VM if required.
 */
1076 1077 1078 1079 1080 1081
long do_fork(unsigned long clone_flags,
	      unsigned long stack_start,
	      struct pt_regs *regs,
	      unsigned long stack_size,
	      int __user *parent_tidptr,
	      int __user *child_tidptr)
1082 1083
{
	struct task_struct *p;
1084
	int trace = 0;
1085
	long pid;
1086 1087 1088 1089 1090 1091

	if (unlikely(current->ptrace)) {
		trace = fork_traceflag (clone_flags);
		if (trace)
			clone_flags |= CLONE_PTRACE;
	}
1092

1093
	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr);
1094 1095 1096 1097 1098 1099
	/*
	 * Do this prior waking up the new thread - the thread pointer
	 * might get invalid after that point, if the thread exits quickly.
	 */
	pid = IS_ERR(p) ? PTR_ERR(p) : p->pid;

1100 1101 1102 1103 1104 1105 1106 1107
	if (!IS_ERR(p)) {
		struct completion vfork;

		if (clone_flags & CLONE_VFORK) {
			p->vfork_done = &vfork;
			init_completion(&vfork);
		}

Ingo Molnar's avatar
Ingo Molnar committed
1108 1109 1110 1111 1112 1113 1114
		if (p->ptrace & PT_PTRACED) {
			/*
			 * We'll start up with an immediate SIGSTOP.
			 */
			sigaddset(&p->pending.signal, SIGSTOP);
			set_tsk_thread_flag(p, TIF_SIGPENDING);
		}
1115 1116 1117

		wake_up_forked_process(p);		/* do this last */
		++total_forks;
1118 1119

		if (unlikely (trace)) {
1120
			current->ptrace_message = pid;
1121 1122 1123
			ptrace_notify ((trace << 8) | SIGTRAP);
		}

1124
		if (clone_flags & CLONE_VFORK) {
1125
			wait_for_completion(&vfork);
1126 1127 1128
			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
		} else
1129 1130 1131 1132 1133 1134
			/*
			 * Let the child process run first, to avoid most of the
			 * COW overhead when the child exec()s afterwards.
			 */
			set_need_resched();
	}
1135
	return pid;
1136 1137
}

1138 1139 1140 1141 1142
/* SLAB cache for signal_struct structures (tsk->signal) */
kmem_cache_t *signal_cachep;

/* SLAB cache for sighand_struct structures (tsk->sighand) */
kmem_cache_t *sighand_cachep;
Linus Torvalds's avatar
Linus Torvalds committed
1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157

/* SLAB cache for files_struct structures (tsk->files) */
kmem_cache_t *files_cachep;

/* SLAB cache for fs_struct structures (tsk->fs) */
kmem_cache_t *fs_cachep;

/* SLAB cache for vm_area_struct structures */
kmem_cache_t *vm_area_cachep;

/* SLAB cache for mm_struct structures (tsk->mm) */
kmem_cache_t *mm_cachep;

void __init proc_caches_init(void)
{
1158 1159 1160 1161 1162 1163 1164
	sighand_cachep = kmem_cache_create("sighand_cache",
			sizeof(struct sighand_struct), 0,
			SLAB_HWCACHE_ALIGN, NULL, NULL);
	if (!sighand_cachep)
		panic("Cannot create sighand SLAB cache");

	signal_cachep = kmem_cache_create("signal_cache",
Linus Torvalds's avatar
Linus Torvalds committed
1165 1166
			sizeof(struct signal_struct), 0,
			SLAB_HWCACHE_ALIGN, NULL, NULL);
1167 1168
	if (!signal_cachep)
		panic("Cannot create signal SLAB cache");
Linus Torvalds's avatar
Linus Torvalds committed
1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183

	files_cachep = kmem_cache_create("files_cache", 
			 sizeof(struct files_struct), 0, 
			 SLAB_HWCACHE_ALIGN, NULL, NULL);
	if (!files_cachep) 
		panic("Cannot create files SLAB cache");

	fs_cachep = kmem_cache_create("fs_cache", 
			 sizeof(struct fs_struct), 0, 
			 SLAB_HWCACHE_ALIGN, NULL, NULL);
	if (!fs_cachep) 
		panic("Cannot create fs_struct SLAB cache");
 
	vm_area_cachep = kmem_cache_create("vm_area_struct",
			sizeof(struct vm_area_struct), 0,
1184
			0, NULL, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
1185 1186 1187 1188 1189 1190 1191 1192 1193
	if(!vm_area_cachep)
		panic("vma_init: Cannot alloc vm_area_struct SLAB cache");

	mm_cachep = kmem_cache_create("mm_struct",
			sizeof(struct mm_struct), 0,
			SLAB_HWCACHE_ALIGN, NULL, NULL);
	if(!mm_cachep)
		panic("vma_init: Cannot alloc mm_struct SLAB cache");
}