trap.c 8.04 KB
Newer Older
1
/*
Jeff Dike's avatar
Jeff Dike committed
2
 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
Linus Torvalds's avatar
Linus Torvalds committed
3 4 5
 * Licensed under the GPL
 */

Jeff Dike's avatar
Jeff Dike committed
6 7 8
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/hardirq.h>
9
#include <linux/module.h>
10
#include <linux/uaccess.h>
Jeff Dike's avatar
Jeff Dike committed
11 12 13
#include <asm/current.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
14 15 16 17 18
#include <arch.h>
#include <as-layout.h>
#include <kern_util.h>
#include <os.h>
#include <skas.h>
Linus Torvalds's avatar
Linus Torvalds committed
19

Jeff Dike's avatar
Jeff Dike committed
20 21 22 23
/*
 * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
 * segv().
 */
Jeff Dike's avatar
Jeff Dike committed
24
int handle_page_fault(unsigned long address, unsigned long ip,
Linus Torvalds's avatar
Linus Torvalds committed
25 26 27 28 29 30 31 32 33
		      int is_write, int is_user, int *code_out)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	int err = -EFAULT;
34
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
Linus Torvalds's avatar
Linus Torvalds committed
35 36

	*code_out = SEGV_MAPERR;
37

Jeff Dike's avatar
Jeff Dike committed
38
	/*
39
	 * If the fault was with pagefaults disabled, don't take the fault, just
Jeff Dike's avatar
Jeff Dike committed
40 41
	 * fail.
	 */
42
	if (faulthandler_disabled())
43 44
		goto out_nosemaphore;

45 46
	if (is_user)
		flags |= FAULT_FLAG_USER;
47
retry:
Linus Torvalds's avatar
Linus Torvalds committed
48 49
	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
Jeff Dike's avatar
Jeff Dike committed
50
	if (!vma)
Linus Torvalds's avatar
Linus Torvalds committed
51
		goto out;
Jeff Dike's avatar
Jeff Dike committed
52
	else if (vma->vm_start <= address)
Linus Torvalds's avatar
Linus Torvalds committed
53
		goto good_area;
Jeff Dike's avatar
Jeff Dike committed
54
	else if (!(vma->vm_flags & VM_GROWSDOWN))
Linus Torvalds's avatar
Linus Torvalds committed
55
		goto out;
Jeff Dike's avatar
Jeff Dike committed
56
	else if (is_user && !ARCH_IS_STACKGROW(address))
Linus Torvalds's avatar
Linus Torvalds committed
57
		goto out;
Jeff Dike's avatar
Jeff Dike committed
58
	else if (expand_stack(vma, address))
Linus Torvalds's avatar
Linus Torvalds committed
59 60
		goto out;

61
good_area:
Linus Torvalds's avatar
Linus Torvalds committed
62
	*code_out = SEGV_ACCERR;
63 64 65 66 67 68 69 70 71
	if (is_write) {
		if (!(vma->vm_flags & VM_WRITE))
			goto out;
		flags |= FAULT_FLAG_WRITE;
	} else {
		/* Don't require VM_READ|VM_EXEC for write faults! */
		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
			goto out;
	}
Jeff Dike's avatar
Jeff Dike committed
72

Linus Torvalds's avatar
Linus Torvalds committed
73
	do {
Nick Piggin's avatar
Nick Piggin committed
74
		int fault;
75

76 77 78 79 80
		fault = handle_mm_fault(mm, vma, address, flags);

		if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
			goto out_nosemaphore;

Nick Piggin's avatar
Nick Piggin committed
81 82 83
		if (unlikely(fault & VM_FAULT_ERROR)) {
			if (fault & VM_FAULT_OOM) {
				goto out_of_memory;
84 85
			} else if (fault & VM_FAULT_SIGSEGV) {
				goto out;
Nick Piggin's avatar
Nick Piggin committed
86 87 88 89
			} else if (fault & VM_FAULT_SIGBUS) {
				err = -EACCES;
				goto out;
			}
Linus Torvalds's avatar
Linus Torvalds committed
90 91
			BUG();
		}
92 93 94 95 96 97 98
		if (flags & FAULT_FLAG_ALLOW_RETRY) {
			if (fault & VM_FAULT_MAJOR)
				current->maj_flt++;
			else
				current->min_flt++;
			if (fault & VM_FAULT_RETRY) {
				flags &= ~FAULT_FLAG_ALLOW_RETRY;
99
				flags |= FAULT_FLAG_TRIED;
100 101 102 103

				goto retry;
			}
		}
Nick Piggin's avatar
Nick Piggin committed
104

105 106 107 108
		pgd = pgd_offset(mm, address);
		pud = pud_offset(pgd, address);
		pmd = pmd_offset(pud, address);
		pte = pte_offset_kernel(pmd, address);
Jeff Dike's avatar
Jeff Dike committed
109
	} while (!pte_present(*pte));
Linus Torvalds's avatar
Linus Torvalds committed
110
	err = 0;
Jeff Dike's avatar
Jeff Dike committed
111 112
	/*
	 * The below warning was added in place of
113 114 115 116 117 118 119
	 *	pte_mkyoung(); if (is_write) pte_mkdirty();
	 * If it's triggered, we'd see normally a hang here (a clean pte is
	 * marked read-only to emulate the dirty bit).
	 * However, the generic code can mark a PTE writable but clean on a
	 * concurrent read fault, triggering this harmlessly. So comment it out.
	 */
#if 0
120
	WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
121
#endif
122 123
	flush_tlb_page(vma, address);
out:
Linus Torvalds's avatar
Linus Torvalds committed
124
	up_read(&mm->mmap_sem);
125
out_nosemaphore:
Jeff Dike's avatar
Jeff Dike committed
126
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
127 128

out_of_memory:
129 130 131 132 133
	/*
	 * We ran out of memory, call the OOM killer, and return the userspace
	 * (which will retry the fault, or kill us if we got oom-killed).
	 */
	up_read(&mm->mmap_sem);
134 135
	if (!is_user)
		goto out_nosemaphore;
136 137
	pagefault_out_of_memory();
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
138
}
139
EXPORT_SYMBOL(handle_page_fault);
Linus Torvalds's avatar
Linus Torvalds committed
140

141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
static void show_segv_info(struct uml_pt_regs *regs)
{
	struct task_struct *tsk = current;
	struct faultinfo *fi = UPT_FAULTINFO(regs);

	if (!unhandled_signal(tsk, SIGSEGV))
		return;

	if (!printk_ratelimit())
		return;

	printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
		tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
		(void *)UPT_IP(regs), (void *)UPT_SP(regs),
		fi->error_code);

	print_vma_addr(KERN_CONT " in ", UPT_IP(regs));
	printk(KERN_CONT "\n");
}

162 163 164 165 166 167 168 169 170 171 172
static void bad_segv(struct faultinfo fi, unsigned long ip)
{
	struct siginfo si;

	si.si_signo = SIGSEGV;
	si.si_code = SEGV_ACCERR;
	si.si_addr = (void __user *) FAULT_ADDRESS(fi);
	current->thread.arch.faultinfo = fi;
	force_sig_info(SIGSEGV, &si, current);
}

173 174 175
void fatal_sigsegv(void)
{
	force_sigsegv(SIGSEGV, current);
Ingo Molnar's avatar
Ingo Molnar committed
176
	do_signal(&current->thread.regs);
177 178 179 180 181 182 183 184
	/*
	 * This is to tell gcc that we're not returning - do_signal
	 * can, in general, return, but in this case, it's not, since
	 * we just got a fatal SIGSEGV queued.
	 */
	os_dump_core();
}

185
void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
186 187 188
{
	struct faultinfo * fi = UPT_FAULTINFO(regs);

Jeff Dike's avatar
Jeff Dike committed
189
	if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) {
190
		show_segv_info(regs);
191 192 193 194 195 196
		bad_segv(*fi, UPT_IP(regs));
		return;
	}
	segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
}

197 198 199 200 201 202
/*
 * We give a *copy* of the faultinfo in the regs to segv.
 * This must be done, since nesting SEGVs could overwrite
 * the info in the regs. A pointer to the info then would
 * give us bad data!
 */
Jeff Dike's avatar
Jeff Dike committed
203
unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
204
		   struct uml_pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
205 206
{
	struct siginfo si;
Jeff Dike's avatar
Jeff Dike committed
207
	jmp_buf *catcher;
Linus Torvalds's avatar
Linus Torvalds committed
208
	int err;
Jeff Dike's avatar
Jeff Dike committed
209 210
	int is_write = FAULT_WRITE(fi);
	unsigned long address = FAULT_ADDRESS(fi);
Linus Torvalds's avatar
Linus Torvalds committed
211

212
	if (!is_user && regs)
213 214
		current->thread.segv_regs = container_of(regs, struct pt_regs, regs);

Jeff Dike's avatar
Jeff Dike committed
215
	if (!is_user && (address >= start_vm) && (address < end_vm)) {
Jeff Dike's avatar
Jeff Dike committed
216
		flush_tlb_kernel_vm();
217
		goto out;
Jeff Dike's avatar
Jeff Dike committed
218
	}
Jeff Dike's avatar
Jeff Dike committed
219
	else if (current->mm == NULL) {
220
		show_regs(container_of(regs, struct pt_regs, regs));
Jeff Dike's avatar
Jeff Dike committed
221
		panic("Segfault with no mm");
222
	}
223
	else if (!is_user && address > PAGE_SIZE && address < TASK_SIZE) {
224 225 226 227
		show_regs(container_of(regs, struct pt_regs, regs));
		panic("Kernel tried to access user memory at addr 0x%lx, ip 0x%lx",
		       address, ip);
	}
228

229
	if (SEGV_IS_FIXABLE(&fi))
Jeff Dike's avatar
Jeff Dike committed
230 231
		err = handle_page_fault(address, ip, is_write, is_user,
					&si.si_code);
232 233
	else {
		err = -EFAULT;
Jeff Dike's avatar
Jeff Dike committed
234 235 236 237 238
		/*
		 * A thread accessed NULL, we get a fault, but CR2 is invalid.
		 * This code is used in __do_copy_from_user() of TT mode.
		 * XXX tt mode is gone, so maybe this isn't needed any more
		 */
239 240
		address = 0;
	}
Linus Torvalds's avatar
Linus Torvalds committed
241 242

	catcher = current->thread.fault_catcher;
Jeff Dike's avatar
Jeff Dike committed
243
	if (!err)
244
		goto out;
Jeff Dike's avatar
Jeff Dike committed
245
	else if (catcher != NULL) {
Linus Torvalds's avatar
Linus Torvalds committed
246
		current->thread.fault_addr = (void *) address;
Jeff Dike's avatar
Jeff Dike committed
247
		UML_LONGJMP(catcher, 1);
Jeff Dike's avatar
Jeff Dike committed
248
	}
Jeff Dike's avatar
Jeff Dike committed
249
	else if (current->thread.fault_addr != NULL)
Linus Torvalds's avatar
Linus Torvalds committed
250
		panic("fault_addr set but no fault catcher");
Jeff Dike's avatar
Jeff Dike committed
251
	else if (!is_user && arch_fixup(ip, regs))
252
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
253

Jeff Dike's avatar
Jeff Dike committed
254
	if (!is_user) {
255
		show_regs(container_of(regs, struct pt_regs, regs));
Jeff Dike's avatar
Jeff Dike committed
256
		panic("Kernel mode fault at addr 0x%lx, ip 0x%lx",
Linus Torvalds's avatar
Linus Torvalds committed
257
		      address, ip);
258
	}
Linus Torvalds's avatar
Linus Torvalds committed
259

260 261
	show_segv_info(regs);

262
	if (err == -EACCES) {
Linus Torvalds's avatar
Linus Torvalds committed
263 264 265
		si.si_signo = SIGBUS;
		si.si_errno = 0;
		si.si_code = BUS_ADRERR;
Al Viro's avatar
Al Viro committed
266
		si.si_addr = (void __user *)address;
Jeff Dike's avatar
Jeff Dike committed
267
		current->thread.arch.faultinfo = fi;
Linus Torvalds's avatar
Linus Torvalds committed
268
		force_sig_info(SIGBUS, &si, current);
269 270
	} else {
		BUG_ON(err != -EFAULT);
Linus Torvalds's avatar
Linus Torvalds committed
271
		si.si_signo = SIGSEGV;
Al Viro's avatar
Al Viro committed
272
		si.si_addr = (void __user *) address;
Jeff Dike's avatar
Jeff Dike committed
273
		current->thread.arch.faultinfo = fi;
Linus Torvalds's avatar
Linus Torvalds committed
274 275
		force_sig_info(SIGSEGV, &si, current);
	}
276 277 278 279 280

out:
	if (regs)
		current->thread.segv_regs = NULL;

Jeff Dike's avatar
Jeff Dike committed
281
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
282 283
}

284
void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
285
{
286 287 288
	struct faultinfo *fi;
	struct siginfo clean_si;

Jeff Dike's avatar
Jeff Dike committed
289 290 291 292
	if (!UPT_IS_USER(regs)) {
		if (sig == SIGBUS)
			printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
			       "mount likely just ran out of space\n");
Linus Torvalds's avatar
Linus Torvalds committed
293
		panic("Kernel mode signal %d", sig);
294 295
	}

Jeff Dike's avatar
Jeff Dike committed
296 297
	arch_examine_signal(sig, regs);

298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
	memset(&clean_si, 0, sizeof(clean_si));
	clean_si.si_signo = si->si_signo;
	clean_si.si_errno = si->si_errno;
	clean_si.si_code = si->si_code;
	switch (sig) {
	case SIGILL:
	case SIGFPE:
	case SIGSEGV:
	case SIGBUS:
	case SIGTRAP:
		fi = UPT_FAULTINFO(regs);
		clean_si.si_addr = (void __user *) FAULT_ADDRESS(*fi);
		current->thread.arch.faultinfo = *fi;
#ifdef __ARCH_SI_TRAPNO
		clean_si.si_trapno = si->si_trapno;
#endif
		break;
	default:
		printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d)\n",
			sig, si->si_code);
	}

	force_sig_info(sig, &clean_si, current);
Linus Torvalds's avatar
Linus Torvalds committed
321 322
}

323
void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
324
{
Jeff Dike's avatar
Jeff Dike committed
325
	if (current->thread.fault_catcher != NULL)
Jeff Dike's avatar
Jeff Dike committed
326
		UML_LONGJMP(current->thread.fault_catcher, 1);
327 328
	else
		relay_signal(sig, si, regs);
Linus Torvalds's avatar
Linus Torvalds committed
329 330
}

331
void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
Linus Torvalds's avatar
Linus Torvalds committed
332 333 334 335 336 337 338
{
	do_IRQ(WINCH_IRQ, regs);
}

void trap_init(void)
{
}