vmalloc.c 12.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6
/*
 *  linux/mm/vmalloc.c
 *
 *  Copyright (C) 1993  Linus Torvalds
 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
 *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
7
 *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
Linus Torvalds's avatar
Linus Torvalds committed
8 9
 */

10
#include <linux/mm.h>
11
#include <linux/module.h>
Linus Torvalds's avatar
Linus Torvalds committed
12
#include <linux/highmem.h>
13 14
#include <linux/slab.h>
#include <linux/spinlock.h>
15 16
#include <linux/interrupt.h>

17
#include <linux/vmalloc.h>
Linus Torvalds's avatar
Linus Torvalds committed
18 19

#include <asm/uaccess.h>
20
#include <asm/tlbflush.h>
Linus Torvalds's avatar
Linus Torvalds committed
21

22

Linus Torvalds's avatar
Linus Torvalds committed
23
rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
24
struct vm_struct *vmlist;
Linus Torvalds's avatar
Linus Torvalds committed
25

26
static void unmap_area_pte(pmd_t *pmd, unsigned long address,
27
				  unsigned long size)
Linus Torvalds's avatar
Linus Torvalds committed
28 29
{
	unsigned long end;
30
	pte_t *pte;
Linus Torvalds's avatar
Linus Torvalds committed
31 32 33 34 35 36 37 38

	if (pmd_none(*pmd))
		return;
	if (pmd_bad(*pmd)) {
		pmd_ERROR(*pmd);
		pmd_clear(pmd);
		return;
	}
39

40
	pte = pte_offset_kernel(pmd, address);
Linus Torvalds's avatar
Linus Torvalds committed
41 42 43 44
	address &= ~PMD_MASK;
	end = address + size;
	if (end > PMD_SIZE)
		end = PMD_SIZE;
45

Linus Torvalds's avatar
Linus Torvalds committed
46 47 48 49 50 51 52
	do {
		pte_t page;
		page = ptep_get_and_clear(pte);
		address += PAGE_SIZE;
		pte++;
		if (pte_none(page))
			continue;
53
		if (pte_present(page))
Linus Torvalds's avatar
Linus Torvalds committed
54 55 56 57 58
			continue;
		printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
	} while (address < end);
}

59
static void unmap_area_pmd(pgd_t *dir, unsigned long address,
60
				  unsigned long size)
Linus Torvalds's avatar
Linus Torvalds committed
61 62
{
	unsigned long end;
63
	pmd_t *pmd;
Linus Torvalds's avatar
Linus Torvalds committed
64 65 66 67 68 69 70 71

	if (pgd_none(*dir))
		return;
	if (pgd_bad(*dir)) {
		pgd_ERROR(*dir);
		pgd_clear(dir);
		return;
	}
72

Linus Torvalds's avatar
Linus Torvalds committed
73 74 75 76 77
	pmd = pmd_offset(dir, address);
	address &= ~PGDIR_MASK;
	end = address + size;
	if (end > PGDIR_SIZE)
		end = PGDIR_SIZE;
78

Linus Torvalds's avatar
Linus Torvalds committed
79
	do {
80
		unmap_area_pte(pmd, address, end - address);
Linus Torvalds's avatar
Linus Torvalds committed
81 82 83 84 85
		address = (address + PMD_SIZE) & PMD_MASK;
		pmd++;
	} while (address < end);
}

86
static int map_area_pte(pte_t *pte, unsigned long address,
87 88
			       unsigned long size, pgprot_t prot,
			       struct page ***pages)
Linus Torvalds's avatar
Linus Torvalds committed
89 90 91 92 93 94 95
{
	unsigned long end;

	address &= ~PMD_MASK;
	end = address + size;
	if (end > PMD_SIZE)
		end = PMD_SIZE;
96

Linus Torvalds's avatar
Linus Torvalds committed
97
	do {
98 99
		struct page *page = **pages;

Andrew Morton's avatar
Andrew Morton committed
100
		WARN_ON(!pte_none(*pte));
Linus Torvalds's avatar
Linus Torvalds committed
101 102
		if (!page)
			return -ENOMEM;
103

Linus Torvalds's avatar
Linus Torvalds committed
104 105 106
		set_pte(pte, mk_pte(page, prot));
		address += PAGE_SIZE;
		pte++;
107
		(*pages)++;
Linus Torvalds's avatar
Linus Torvalds committed
108 109 110 111
	} while (address < end);
	return 0;
}

112
static int map_area_pmd(pmd_t *pmd, unsigned long address,
113 114
			       unsigned long size, pgprot_t prot,
			       struct page ***pages)
Linus Torvalds's avatar
Linus Torvalds committed
115
{
116
	unsigned long base, end;
Linus Torvalds's avatar
Linus Torvalds committed
117

118
	base = address & PGDIR_MASK;
Linus Torvalds's avatar
Linus Torvalds committed
119 120 121 122
	address &= ~PGDIR_MASK;
	end = address + size;
	if (end > PGDIR_SIZE)
		end = PGDIR_SIZE;
123

Linus Torvalds's avatar
Linus Torvalds committed
124
	do {
125
		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, base + address);
Linus Torvalds's avatar
Linus Torvalds committed
126 127
		if (!pte)
			return -ENOMEM;
128
		if (map_area_pte(pte, address, end - address, prot, pages))
Linus Torvalds's avatar
Linus Torvalds committed
129 130 131 132
			return -ENOMEM;
		address = (address + PMD_SIZE) & PMD_MASK;
		pmd++;
	} while (address < end);
133

Linus Torvalds's avatar
Linus Torvalds committed
134 135 136
	return 0;
}

137 138
void unmap_vm_area(struct vm_struct *area)
{
139
	unsigned long address = (unsigned long) area->addr;
140 141 142 143
	unsigned long end = (address + area->size);
	pgd_t *dir;

	dir = pgd_offset_k(address);
144
	flush_cache_vunmap(address, end);
145 146 147 148 149
	do {
		unmap_area_pmd(dir, address, end - address);
		address = (address + PGDIR_SIZE) & PGDIR_MASK;
		dir++;
	} while (address && (address < end));
150
	flush_tlb_kernel_range((unsigned long) area->addr, end);
151 152 153
}

int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
Linus Torvalds's avatar
Linus Torvalds committed
154
{
155
	unsigned long address = (unsigned long) area->addr;
156 157
	unsigned long end = address + (area->size-PAGE_SIZE);
	pgd_t *dir;
158
	int err = 0;
Linus Torvalds's avatar
Linus Torvalds committed
159 160

	dir = pgd_offset_k(address);
Linus Torvalds's avatar
Linus Torvalds committed
161
	spin_lock(&init_mm.page_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
162
	do {
163
		pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
164 165 166 167 168 169 170 171
		if (!pmd) {
			err = -ENOMEM;
			break;
		}
		if (map_area_pmd(pmd, address, end - address, prot, pages)) {
			err = -ENOMEM;
			break;
		}
Linus Torvalds's avatar
Linus Torvalds committed
172 173 174 175

		address = (address + PGDIR_SIZE) & PGDIR_MASK;
		dir++;
	} while (address && (address < end));
176

Linus Torvalds's avatar
Linus Torvalds committed
177
	spin_unlock(&init_mm.page_table_lock);
178
	flush_cache_vmap((unsigned long) area->addr, end);
179
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
180 181
}

182
#define IOREMAP_MAX_ORDER	(7 + PAGE_SHIFT)	/* 128 pages */
183

184 185
struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
				unsigned long start, unsigned long end)
Linus Torvalds's avatar
Linus Torvalds committed
186 187
{
	struct vm_struct **p, *tmp, *area;
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
	unsigned long align = 1;
	unsigned long addr;

	if (flags & VM_IOREMAP) {
		int bit = fls(size);

		if (bit > IOREMAP_MAX_ORDER)
			bit = IOREMAP_MAX_ORDER;
		else if (bit < PAGE_SHIFT)
			bit = PAGE_SHIFT;

		align = 1ul << bit;
	}
	addr = ALIGN(start, align);

203 204
	area = kmalloc(sizeof(*area), GFP_KERNEL);
	if (unlikely(!area))
Linus Torvalds's avatar
Linus Torvalds committed
205
		return NULL;
206 207 208 209

	/*
	 * We always allocate a guard page.
	 */
Linus Torvalds's avatar
Linus Torvalds committed
210
	size += PAGE_SIZE;
Dave Jones's avatar
Dave Jones committed
211 212 213 214
	if (unlikely(!size)) {
		kfree (area);
		return NULL;
	}
215

Linus Torvalds's avatar
Linus Torvalds committed
216
	write_lock(&vmlist_lock);
217
	for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
218 219 220 221
		if ((unsigned long)tmp->addr < addr) {
			if((unsigned long)tmp->addr + tmp->size >= addr)
				addr = ALIGN(tmp->size + 
					     (unsigned long)tmp->addr, align);
222
			continue;
223
		}
Linus Torvalds's avatar
Linus Torvalds committed
224 225
		if ((size + addr) < addr)
			goto out;
226 227
		if (size + addr <= (unsigned long)tmp->addr)
			goto found;
228
		addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
229
		if (addr > end - size)
Linus Torvalds's avatar
Linus Torvalds committed
230
			goto out;
Linus Torvalds's avatar
Linus Torvalds committed
231
	}
232 233 234 235 236

found:
	area->next = *p;
	*p = area;

Linus Torvalds's avatar
Linus Torvalds committed
237 238 239
	area->flags = flags;
	area->addr = (void *)addr;
	area->size = size;
240 241 242
	area->pages = NULL;
	area->nr_pages = 0;
	area->phys_addr = 0;
Linus Torvalds's avatar
Linus Torvalds committed
243
	write_unlock(&vmlist_lock);
244

Linus Torvalds's avatar
Linus Torvalds committed
245
	return area;
Linus Torvalds's avatar
Linus Torvalds committed
246 247 248 249 250

out:
	write_unlock(&vmlist_lock);
	kfree(area);
	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
251 252
}

253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
/**
 *	get_vm_area  -  reserve a contingous kernel virtual area
 *
 *	@size:		size of the area
 *	@flags:		%VM_IOREMAP for I/O mappings or VM_ALLOC
 *
 *	Search an area of @size in the kernel virtual mapping area,
 *	and reserved it for out purposes.  Returns the area descriptor
 *	on success or %NULL on failure.
 */
struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
{
	return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
}

268 269 270 271 272 273 274 275 276 277
/**
 *	remove_vm_area  -  find and remove a contingous kernel virtual area
 *
 *	@addr:		base address
 *
 *	Search for the kernel VM area starting at @addr, and remove it.
 *	This function returns the found VM area, but using it is NOT safe
 *	on SMP machines.
 */
struct vm_struct *remove_vm_area(void *addr)
Linus Torvalds's avatar
Linus Torvalds committed
278 279
{
	struct vm_struct **p, *tmp;
280

281
	write_lock(&vmlist_lock);
282
	for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
283 284
		 if (tmp->addr == addr)
			 goto found;
285 286 287 288
	}
	write_unlock(&vmlist_lock);
	return NULL;

289
found:
Andrew Morton's avatar
Andrew Morton committed
290
	unmap_vm_area(tmp);
291 292 293 294 295 296
	*p = tmp->next;
	write_unlock(&vmlist_lock);
	return tmp;
}

void __vunmap(void *addr, int deallocate_pages)
297
{
298
	struct vm_struct *area;
Linus Torvalds's avatar
Linus Torvalds committed
299 300 301

	if (!addr)
		return;
302 303

	if ((PAGE_SIZE-1) & (unsigned long)addr) {
Linus Torvalds's avatar
Linus Torvalds committed
304
		printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
305
		WARN_ON(1);
Linus Torvalds's avatar
Linus Torvalds committed
306 307
		return;
	}
308 309 310 311 312

	area = remove_vm_area(addr);
	if (unlikely(!area)) {
		printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
				addr);
313
		WARN_ON(1);
314 315 316 317 318 319 320 321 322 323
		return;
	}
	
	if (deallocate_pages) {
		int i;

		for (i = 0; i < area->nr_pages; i++) {
			if (unlikely(!area->pages[i]))
				BUG();
			__free_page(area->pages[i]);
Linus Torvalds's avatar
Linus Torvalds committed
324
		}
325 326 327 328 329 330

		kfree(area->pages);
	}

	kfree(area);
	return;
Linus Torvalds's avatar
Linus Torvalds committed
331 332
}

333 334 335 336 337
/**
 *	vfree  -  release memory allocated by vmalloc()
 *
 *	@addr:		memory base address
 *
338
 *	Free the virtually contiguous memory area starting at @addr, as
339 340 341
 *	obtained from vmalloc(), vmalloc_32() or __vmalloc().
 *
 *	May not be called in interrupt context.
342
 */
343
void vfree(void *addr)
344
{
345
	BUG_ON(in_interrupt());
346
	__vunmap(addr, 1);
347 348
}

349 350
EXPORT_SYMBOL(vfree);

351 352 353 354 355
/**
 *	vunmap  -  release virtual mapping obtained by vmap()
 *
 *	@addr:		memory base address
 *
356
 *	Free the virtually contiguous memory area starting at @addr,
357 358 359
 *	which was created from the page array passed to vmap().
 *
 *	May not be called in interrupt context.
360
 */
361
void vunmap(void *addr)
362
{
363
	BUG_ON(in_interrupt());
364
	__vunmap(addr, 0);
365 366
}

367 368
EXPORT_SYMBOL(vunmap);

369
/**
370
 *	vmap  -  map an array of pages into virtually contiguous space
371 372 373
 *
 *	@pages:		array of page pointers
 *	@count:		number of pages to map
374 375
 *	@flags:		vm_area->flags
 *	@prot:		page protection for the mapping
376
 *
377
 *	Maps @count pages from @pages into contiguous kernel virtual
378
 *	space.
379
 */
380 381
void *vmap(struct page **pages, unsigned int count,
		unsigned long flags, pgprot_t prot)
382
{
383 384 385 386 387
	struct vm_struct *area;

	if (count > num_physpages)
		return NULL;

388
	area = get_vm_area((count << PAGE_SHIFT), flags);
389 390
	if (!area)
		return NULL;
391
	if (map_vm_area(area, prot, &pages)) {
392 393 394 395 396
		vunmap(area->addr);
		return NULL;
	}

	return area->addr;
397 398
}

399 400
EXPORT_SYMBOL(vmap);

401
/**
402
 *	__vmalloc  -  allocate virtually contiguous memory
403 404 405 406 407 408
 *
 *	@size:		allocation size
 *	@gfp_mask:	flags for the page level allocator
 *	@prot:		protection mask for the allocated pages
 *
 *	Allocate enough pages to cover @size from the page level
409
 *	allocator with @gfp_mask flags.  Map them into contiguous
410 411 412
 *	kernel virtual space, using a pagetable protection of @prot.
 */
void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
Linus Torvalds's avatar
Linus Torvalds committed
413 414
{
	struct vm_struct *area;
415 416
	struct page **pages;
	unsigned int nr_pages, array_size, i;
Linus Torvalds's avatar
Linus Torvalds committed
417 418

	size = PAGE_ALIGN(size);
419
	if (!size || (size >> PAGE_SHIFT) > num_physpages)
Linus Torvalds's avatar
Linus Torvalds committed
420
		return NULL;
421

Linus Torvalds's avatar
Linus Torvalds committed
422 423 424
	area = get_vm_area(size, VM_ALLOC);
	if (!area)
		return NULL;
425

426
	nr_pages = size >> PAGE_SHIFT;
427 428 429 430
	array_size = (nr_pages * sizeof(struct page *));

	area->nr_pages = nr_pages;
	area->pages = pages = kmalloc(array_size, (gfp_mask & ~__GFP_HIGHMEM));
431 432 433
	if (!area->pages) {
		remove_vm_area(area->addr);
		kfree(area);
Linus Torvalds's avatar
Linus Torvalds committed
434
		return NULL;
435
	}
436 437 438 439
	memset(area->pages, 0, array_size);

	for (i = 0; i < area->nr_pages; i++) {
		area->pages[i] = alloc_page(gfp_mask);
440 441 442
		if (unlikely(!area->pages[i])) {
			/* Successfully allocated i pages, free them in __vunmap() */
			area->nr_pages = i;
443
			goto fail;
444
		}
Linus Torvalds's avatar
Linus Torvalds committed
445
	}
446 447 448 449 450 451 452 453 454 455
	
	if (map_vm_area(area, prot, &pages))
		goto fail;
	return area->addr;

fail:
	vfree(area->addr);
	return NULL;
}

456 457
EXPORT_SYMBOL(__vmalloc);

458
/**
459
 *	vmalloc  -  allocate virtually contiguous memory
460 461 462 463
 *
 *	@size:		allocation size
 *
 *	Allocate enough pages to cover @size from the page level
464
 *	allocator and map them into contiguous kernel virtual space.
465 466 467 468 469 470 471 472 473
 *
 *	For tight cotrol over page level allocator and protection flags
 *	use __vmalloc() instead.
 */
void *vmalloc(unsigned long size)
{
       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
}

474 475
EXPORT_SYMBOL(vmalloc);

476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497
/**
 *	vmalloc_exec  -  allocate virtually contiguous, executable memory
 *
 *	@size:		allocation size
 *
 *	Kernel-internal function to allocate enough pages to cover @size
 *	the page level allocator and map them into contiguous and
 *	executable kernel virtual space.
 *
 *	For tight cotrol over page level allocator and protection flags
 *	use __vmalloc() instead.
 */

#ifndef PAGE_KERNEL_EXEC
# define PAGE_KERNEL_EXEC PAGE_KERNEL
#endif

void *vmalloc_exec(unsigned long size)
{
	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
}

498
/**
499
 *	vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
500 501 502 503
 *
 *	@size:		allocation size
 *
 *	Allocate enough 32bit PA addressable pages to cover @size from the
504
 *	page level allocator and map them into contiguous kernel virtual space.
505 506 507 508
 */
void *vmalloc_32(unsigned long size)
{
	return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
Linus Torvalds's avatar
Linus Torvalds committed
509 510
}

511 512
EXPORT_SYMBOL(vmalloc_32);

Linus Torvalds's avatar
Linus Torvalds committed
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
long vread(char *buf, char *addr, unsigned long count)
{
	struct vm_struct *tmp;
	char *vaddr, *buf_start = buf;
	unsigned long n;

	/* Don't allow overflow */
	if ((unsigned long) addr + count < count)
		count = -(unsigned long) addr;

	read_lock(&vmlist_lock);
	for (tmp = vmlist; tmp; tmp = tmp->next) {
		vaddr = (char *) tmp->addr;
		if (addr >= vaddr + tmp->size - PAGE_SIZE)
			continue;
		while (addr < vaddr) {
			if (count == 0)
				goto finished;
			*buf = '\0';
			buf++;
			addr++;
			count--;
		}
		n = vaddr + tmp->size - PAGE_SIZE - addr;
		do {
			if (count == 0)
				goto finished;
			*buf = *addr;
			buf++;
			addr++;
			count--;
		} while (--n > 0);
	}
finished:
	read_unlock(&vmlist_lock);
	return buf - buf_start;
}
550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586

long vwrite(char *buf, char *addr, unsigned long count)
{
	struct vm_struct *tmp;
	char *vaddr, *buf_start = buf;
	unsigned long n;

	/* Don't allow overflow */
	if ((unsigned long) addr + count < count)
		count = -(unsigned long) addr;

	read_lock(&vmlist_lock);
	for (tmp = vmlist; tmp; tmp = tmp->next) {
		vaddr = (char *) tmp->addr;
		if (addr >= vaddr + tmp->size - PAGE_SIZE)
			continue;
		while (addr < vaddr) {
			if (count == 0)
				goto finished;
			buf++;
			addr++;
			count--;
		}
		n = vaddr + tmp->size - PAGE_SIZE - addr;
		do {
			if (count == 0)
				goto finished;
			*addr = *buf;
			buf++;
			addr++;
			count--;
		} while (--n > 0);
	}
finished:
	read_unlock(&vmlist_lock);
	return buf - buf_start;
}