htab.c 10.7 KB
Newer Older
1 2 3 4 5
/*
 * PowerPC64 port by Mike Corrigan and Dave Engebretsen
 *   {mikejc|engebret}@us.ibm.com
 *
 *    Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
6 7 8
 *
 * SMP scalability work:
 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
 * 
 *    Module name: htab.c
 *
 *    Description:
 *      PowerPC Hashed Page Table functions
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

#include <linux/config.h>
#include <linux/spinlock.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/sysctl.h>
#include <linux/ctype.h>
#include <linux/cache.h>
30 31 32 33 34 35 36 37 38 39

#include <asm/ppcdebug.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/types.h>
#include <asm/init.h>
#include <asm/system.h>
40
#include <asm/uaccess.h>
41
#include <asm/naca.h>
42 43 44 45
#include <asm/system.h>
#include <asm/pmc.h>
#include <asm/machdep.h>
#include <asm/lmb.h>
46
#include <asm/abs_addr.h>
47
#include <asm/tlbflush.h>
48 49
#include <asm/eeh.h>

50 51
/*
 * Note:  pte   --> Linux PTE
52 53 54 55 56 57 58 59
 *        HPTE  --> PowerPC Hashed Page Table Entry
 */

HTAB htab_data = {NULL, 0, 0, 0, 0};

extern unsigned long _SDR1;
extern unsigned long klimit;

60
extern unsigned long reloc_offset(void);
61 62 63 64 65 66 67 68 69 70 71 72
#define PTRRELOC(x)	((typeof(x))((unsigned long)(x) - offset))
#define PTRUNRELOC(x)	((typeof(x))((unsigned long)(x) + offset))
#define RELOC(x)	(*PTRRELOC(&(x)))

#define KB (1024)
#define MB (1024*KB)
static inline void
create_pte_mapping(unsigned long start, unsigned long end,
		   unsigned long mode, unsigned long mask, int large)
{
	unsigned long addr, offset = reloc_offset();
	HTAB *_htab_data = PTRRELOC(&htab_data);
73
	HPTE *htab = (HPTE *)__v2a(_htab_data->htab);
74 75 76 77 78 79 80 81 82 83
	unsigned int step;

	if (large)
		step = 16*MB;
	else
		step = 4*KB;

	for (addr = start; addr < end; addr += step) {
		unsigned long vsid = get_kernel_vsid(addr);
		unsigned long va = (vsid << 28) | (addr & 0xfffffff);
84
		if (naca->platform == PLATFORM_PSERIES_LPAR)
85 86 87 88 89
			pSeries_lpar_make_pte(htab, va,
				(unsigned long)__v2a(addr), mode, mask, large);
		else
			pSeries_make_pte(htab, va,
				(unsigned long)__v2a(addr), mode, mask, large);
90 91 92 93 94 95 96 97
	}
}

void
htab_initialize(void)
{
	unsigned long table, htab_size_bytes;
	unsigned long pteg_count;
98
	unsigned long mode_rw, mask;
99
	unsigned long offset = reloc_offset();
100
	struct naca_struct *_naca = RELOC(naca);
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
	HTAB *_htab_data = PTRRELOC(&htab_data);

	/*
	 * Calculate the required size of the htab.  We want the number of
	 * PTEGs to equal one half the number of real pages.
	 */ 
	htab_size_bytes = 1UL << _naca->pftSize;
	pteg_count = htab_size_bytes >> 7;

	/* For debug, make the HTAB 1/8 as big as it normally would be. */
	ifppcdebug(PPCDBG_HTABSIZE) {
		pteg_count >>= 3;
		htab_size_bytes = pteg_count << 7;
	}

	_htab_data->htab_num_ptegs = pteg_count;
	_htab_data->htab_hash_mask = pteg_count - 1;

119
	if (naca->platform == PLATFORM_PSERIES) {
120 121 122 123 124 125 126 127 128 129 130 131
		/* Find storage for the HPT.  Must be contiguous in
		 * the absolute address space.
		 */
		table = lmb_alloc(htab_size_bytes, htab_size_bytes);
		if ( !table )
			panic("ERROR, cannot find space for HPTE\n");
		_htab_data->htab = (HPTE *)__a2v(table);

		/* htab absolute addr + encoded htabsize */
		RELOC(_SDR1) = table + __ilog2(pteg_count) - 11;

		/* Initialize the HPT with no entries */
132
		memset((void *)table, 0, htab_size_bytes);
133 134 135 136 137 138 139 140
	} else {
		_htab_data->htab = NULL;
		RELOC(_SDR1) = 0; 
	}

	mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
	mask = pteg_count-1;

141
	/* XXX we currently map kernel text rw, should fix this */
142 143 144 145 146 147
	if (__is_processor(PV_POWER4) && _naca->physicalMemorySize > 256*MB) {
		create_pte_mapping((unsigned long)KERNELBASE, 
				   KERNELBASE + 256*MB, mode_rw, mask, 0);
		create_pte_mapping((unsigned long)KERNELBASE + 256*MB, 
				   KERNELBASE + (_naca->physicalMemorySize), 
				   mode_rw, mask, 1);
148 149 150 151
	} else {
		create_pte_mapping((unsigned long)KERNELBASE, 
				   KERNELBASE+(_naca->physicalMemorySize), 
				   mode_rw, mask, 0);
152 153 154 155 156 157
	}
}
#undef KB
#undef MB

/*
158
 * find_linux_pte returns the address of a linux pte for a given 
159 160
 * effective address and directory.  If not found, it returns zero.
 */
161
pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
162 163 164 165 166 167
{
	pgd_t *pg;
	pmd_t *pm;
	pte_t *pt = NULL;
	pte_t pte;

168 169 170 171 172 173
	pg = pgdir + pgd_index(ea);
	if (!pgd_none(*pg)) {

		pm = pmd_offset(pg, ea);
		if (!pmd_none(*pm)) { 
			pt = pte_offset_kernel(pm, ea);
174
			pte = *pt;
175
			if (!pte_present(pte))
176 177 178 179 180 181 182
				pt = NULL;
		}
	}

	return pt;
}

183
static inline unsigned long computeHptePP(unsigned long pte)
184
{
185 186 187 188 189
	return (pte & _PAGE_USER) |
		(((pte & _PAGE_USER) >> 1) &
		 ((~((pte >> 2) &	/* _PAGE_RW */
		     (pte >> 7))) &	/* _PAGE_DIRTY */
		  1));
190 191
}

192 193 194
/*
 * Handle a fault by adding an HPTE. If the address can't be determined
 * to be valid via Linux page tables, return 1. If handled return 0
195
 */
196 197
int __hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
		pte_t *ptep)
198
{
199 200
	unsigned long va, vpn;
	unsigned long newpp, prpn;
201
	unsigned long hpteflags;
202
	long slot;
203
	pte_t old_pte, new_pte;
204 205

	/* Search the Linux page table for a match with va */
206
	va = (vsid << 28) | (ea & 0x0fffffff);
207
	vpn = va >> PAGE_SHIFT;
208

209 210 211 212
	/*
	 * If no pte found or not present, send the problem up to
	 * do_page_fault
	 */
213
	if (!ptep || !pte_present(*ptep))
214
		return 1;
215

216
	/* 
217 218 219 220
	 * Check the user's access rights to the page.  If access should be
	 * prevented then send the problem up to do_page_fault.
	 */
	access |= _PAGE_PRESENT;
221
	if (access & ~(pte_val(*ptep)))
222
		return 1;
223

224 225 226 227 228 229 230 231 232 233 234
	/*
	 * At this point, we have a pte (old_pte) which can be used to build
	 * or update an HPTE. There are 2 cases:
	 *
	 * 1. There is a valid (present) pte with no associated HPTE (this is 
	 *	the most common case)
	 * 2. There is a valid (present) pte with an associated HPTE. The
	 *	current values of the pp bits in the HPTE prevent access
	 *	because we are doing software DIRTY bit management and the
	 *	page is currently not DIRTY. 
	 */
235

236
	old_pte = *ptep;
237 238
	new_pte = old_pte;
	/* If the attempted access was a store */
239
	if (access & _PAGE_RW)
240
		pte_val(new_pte) |= _PAGE_ACCESSED | _PAGE_DIRTY;
241 242
	else
		pte_val(new_pte) |= _PAGE_ACCESSED;
243

244
	newpp = computeHptePP(pte_val(new_pte));
245

246 247
	/* Check if pte already has an hpte (case 2) */
	if (pte_val(old_pte) & _PAGE_HASHPTE) {
248 249
		/* There MIGHT be an HPTE for this pte */
		unsigned long hash, slot, secondary;
250

251
		/* XXX fix large pte flag */
252 253
		hash = hpt_hash(vpn, 0);
		secondary = (pte_val(old_pte) & _PAGE_SECONDARY) >> 15;
254
		if (secondary)
255 256 257
			hash = ~hash;
		slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
		slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
258 259 260 261 262

		/* XXX fix large pte flag */
		if (ppc_md.hpte_updatepp(slot, newpp, va, 0) == -1)
			pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
		else
263
			if (!pte_same(old_pte, new_pte))
264 265
				*ptep = new_pte;
	}
266 267

	if (!(pte_val(old_pte) & _PAGE_HASHPTE)) {
268 269 270
		/* XXX fix large pte flag */
		unsigned long hash = hpt_hash(vpn, 0);
		unsigned long hpte_group;
271
		prpn = pte_val(old_pte) >> PTE_SHIFT;
272

273 274 275 276
repeat:
		hpte_group = ((hash & htab_data.htab_hash_mask) *
			      HPTES_PER_GROUP) & ~0x7UL;

277 278 279
		/* Update the linux pte with the HPTE slot */
		pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
		pte_val(new_pte) |= _PAGE_HASHPTE;
280

281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
		/* copy appropriate flags from linux pte */
		hpteflags = (pte_val(new_pte) & 0x1f8) | newpp;

		/* XXX fix large pte flag */
		slot = ppc_md.insert_hpte(hpte_group, vpn, prpn, 0,
					  hpteflags, 0, 0);

		/* Primary is full, try the secondary */
		if (slot == -1) {
			pte_val(new_pte) |= 1 << 15;
			hpte_group = ((~hash & htab_data.htab_hash_mask) *
				      HPTES_PER_GROUP) & ~0x7UL; 
			/* XXX fix large pte flag */
			slot = ppc_md.insert_hpte(hpte_group, vpn, prpn,
						  1, hpteflags, 0, 0);
			if (slot == -1) {
				if (mftb() & 0x1)
					hpte_group = ((hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;

				ppc_md.remove_hpte(hpte_group);
				goto repeat;
                        }
		}

		pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;

		/* 
308 309 310 311
		 * No need to use ldarx/stdcx here because all who
		 * might be updating the pte will hold the
		 * page_table_lock or the hash_table_lock
		 * (we hold both)
312 313
		 */
		*ptep = new_pte;
314
	}
315

316 317
	return 0;
}
318

319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
int hash_page(unsigned long ea, unsigned long access)
{
	void *pgdir;
	unsigned long vsid;
	struct mm_struct *mm;
	pte_t *ptep;
	int ret;

	/* Check for invalid addresses. */
	if (!IS_VALID_EA(ea))
		return 1;

 	switch (REGION_ID(ea)) {
	case USER_REGION_ID:
		mm = current->mm;
		if (mm == NULL)
			return 1;

		vsid = get_vsid(mm->context, ea);
		break;
	case IO_REGION_ID:
		mm = &ioremap_mm;
		vsid = get_kernel_vsid(ea);
		break;
	case VMALLOC_REGION_ID:
		mm = &init_mm;
		vsid = get_kernel_vsid(ea);
		break;
	case IO_UNMAPPED_REGION_ID:
		udbg_printf("EEH Error ea = 0x%lx\n", ea);
		PPCDBG_ENTER_DEBUGGER();
		panic("EEH Error ea = 0x%lx\n", ea);
		break;
	case KERNEL_REGION_ID:
		/*
		 * As htab_initialize is now, we shouldn't ever get here since
		 * we're bolting the entire 0xC0... region.
		 */
		udbg_printf("Little faulted on kernel address 0x%lx\n", ea);
		PPCDBG_ENTER_DEBUGGER();
		panic("Little faulted on kernel address 0x%lx\n", ea);
		break;
	default:
		/* Not a valid range, send the problem up to do_page_fault */
		return 1;
		break;
365
	}
366

367 368 369 370 371 372 373 374 375 376 377 378
	pgdir = mm->pgd;

	if (pgdir == NULL)
		return 1;

	/*
	 * Lock the Linux page table to prevent mmap and kswapd
	 * from modifying entries while we search and update
	 */
	spin_lock(&mm->page_table_lock);
	ptep = find_linux_pte(pgdir, ea);
	ret = __hash_page(ea, access, vsid, ptep);
379
	spin_unlock(&mm->page_table_lock);
380 381

	return ret;
382 383
}

384 385
void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
		     int local)
386
{
387 388 389 390
	unsigned long vsid, vpn, va, hash, secondary, slot;

	/* XXX fix for large ptes */
	unsigned long large = 0;
391

392 393
	if ((ea >= USER_START) && (ea <= USER_END))
		vsid = get_vsid(context, ea);
394
	else
395 396
		vsid = get_kernel_vsid(ea);

397
	va = (vsid << 28) | (ea & 0x0fffffff);
398 399 400 401 402
	if (large)
		vpn = va >> LARGE_PAGE_SHIFT;
	else
		vpn = va >> PAGE_SHIFT;
	hash = hpt_hash(vpn, large);
403
	secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
404
	if (secondary)
405 406 407 408
		hash = ~hash;
	slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
	slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;

409
	ppc_md.hpte_invalidate(slot, va, large, local);
410 411
}

412
void flush_hash_range(unsigned long context, unsigned long number, int local)
413
{
414 415 416 417 418 419
	if (ppc_md.flush_hash_range) {
		ppc_md.flush_hash_range(context, number, local);
	} else {
		int i;
		struct tlb_batch_data *ptes =
			&tlb_batch_array[smp_processor_id()][0];
420

421 422 423
		for (i = 0; i < number; i++) {
			flush_hash_page(context, ptes->addr, ptes->pte, local);
			ptes++;
424 425 426
		}
	}
}