head.S 37.3 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0-only */
Linus Torvalds's avatar
Linus Torvalds committed
2 3 4 5
/*
 *  linux/arch/arm/boot/compressed/head.S
 *
 *  Copyright (C) 1996-2002 Russell King
6
 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
Linus Torvalds's avatar
Linus Torvalds committed
7 8
 */
#include <linux/linkage.h>
9
#include <asm/assembler.h>
10 11
#include <asm/v7m.h>

Roy Franz's avatar
Roy Franz committed
12 13
#include "efi-header.S"

14 15
 AR_CLASS(	.arch	armv7-a	)
 M_CLASS(	.arch	armv7-m	)
Linus Torvalds's avatar
Linus Torvalds committed
16 17 18 19 20 21 22 23 24

/*
 * Debugging stuff
 *
 * Note that these macros must not contain any code which is not
 * 100% relocatable.  Any attempt to do so will result in a crash.
 * Please select one of the following when turning on debugging.
 */
#ifdef DEBUG
25 26

#if defined(CONFIG_DEBUG_ICEDCC)
27

28
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
29
		.macro	loadsp, rb, tmp1, tmp2
30 31 32 33
		.endm
		.macro	writeb, ch, rb
		mcr	p14, 0, \ch, c0, c5, 0
		.endm
34
#elif defined(CONFIG_CPU_XSCALE)
35
		.macro	loadsp, rb, tmp1, tmp2
36 37 38 39
		.endm
		.macro	writeb, ch, rb
		mcr	p14, 0, \ch, c8, c0, 0
		.endm
40
#else
41
		.macro	loadsp, rb, tmp1, tmp2
Linus Torvalds's avatar
Linus Torvalds committed
42
		.endm
43
		.macro	writeb, ch, rb
44
		mcr	p14, 0, \ch, c1, c0, 0
Linus Torvalds's avatar
Linus Torvalds committed
45
		.endm
46 47
#endif

48
#else
49

50
#include CONFIG_DEBUG_LL_INCLUDE
51

52 53
		.macro	writeb,	ch, rb
		senduart \ch, \rb
Linus Torvalds's avatar
Linus Torvalds committed
54
		.endm
55

56
#if defined(CONFIG_ARCH_SA1100)
57
		.macro	loadsp, rb, tmp1, tmp2
Linus Torvalds's avatar
Linus Torvalds committed
58
		mov	\rb, #0x80000000	@ physical base address
59
#ifdef CONFIG_DEBUG_LL_SER3
Linus Torvalds's avatar
Linus Torvalds committed
60
		add	\rb, \rb, #0x00050000	@ Ser3
61
#else
Linus Torvalds's avatar
Linus Torvalds committed
62
		add	\rb, \rb, #0x00010000	@ Ser1
63
#endif
Linus Torvalds's avatar
Linus Torvalds committed
64 65
		.endm
#else
66 67
		.macro	loadsp,	rb, tmp1, tmp2
		addruart \rb, \tmp1, \tmp2
68
		.endm
Linus Torvalds's avatar
Linus Torvalds committed
69
#endif
70
#endif
Linus Torvalds's avatar
Linus Torvalds committed
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
#endif

		.macro	kputc,val
		mov	r0, \val
		bl	putc
		.endm

		.macro	kphex,val,len
		mov	r0, \val
		mov	r1, #\len
		bl	phex
		.endm

		.macro	debug_reloc_start
#ifdef DEBUG
		kputc	#'\n'
		kphex	r6, 8		/* processor id */
		kputc	#':'
		kphex	r7, 8		/* architecture id */
90
#ifdef CONFIG_CPU_CP15
Linus Torvalds's avatar
Linus Torvalds committed
91 92 93
		kputc	#':'
		mrc	p15, 0, r0, c1, c0
		kphex	r0, 8		/* control reg */
94
#endif
Linus Torvalds's avatar
Linus Torvalds committed
95 96 97
		kputc	#'\n'
		kphex	r5, 8		/* decompressed kernel start */
		kputc	#'-'
98
		kphex	r9, 8		/* decompressed kernel end  */
Linus Torvalds's avatar
Linus Torvalds committed
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
		kputc	#'>'
		kphex	r4, 8		/* kernel execution address */
		kputc	#'\n'
#endif
		.endm

		.macro	debug_reloc_end
#ifdef DEBUG
		kphex	r5, 8		/* end of kernel */
		kputc	#'\n'
		mov	r0, r4
		bl	memdump		/* dump 256 bytes at start of kernel */
#endif
		.endm

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
		/*
		 * Debug kernel copy by printing the memory addresses involved
		 */
		.macro dbgkc, begin, end, cbegin, cend
#ifdef DEBUG
		kputc   #'\n'
		kputc   #'C'
		kputc   #':'
		kputc   #'0'
		kputc   #'x'
		kphex   \begin, 8	/* Start of compressed kernel */
		kputc	#'-'
		kputc	#'0'
		kputc	#'x'
		kphex	\end, 8		/* End of compressed kernel */
		kputc	#'-'
		kputc	#'>'
		kputc   #'0'
		kputc   #'x'
		kphex   \cbegin, 8	/* Start of kernel copy */
		kputc	#'-'
		kputc	#'0'
		kputc	#'x'
		kphex	\cend, 8	/* End of kernel copy */
		kputc	#'\n'
		kputc	#'\r'
#endif
		.endm

143 144 145 146 147 148 149 150 151 152 153
		.macro	enable_cp15_barriers, reg
		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
		bne	.L_\@
		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
 THUMB(		isb						)
.L_\@:
		.endm

154
		.section ".start", "ax"
Linus Torvalds's avatar
Linus Torvalds committed
155 156 157 158
/*
 * sort out different calling conventions
 */
		.align
159 160 161 162 163 164
		/*
		 * Always enter in ARM state for CPUs that support the ARM ISA.
		 * As of today (2014) that's exactly the members of the A and R
		 * classes.
		 */
 AR_CLASS(	.arm	)
Linus Torvalds's avatar
Linus Torvalds committed
165 166
start:
		.type	start,#function
167 168 169 170 171 172 173 174 175 176 177 178
		/*
		 * These 7 nops along with the 1 nop immediately below for
		 * !THUMB2 form 8 nops that make the compressed kernel bootable
		 * on legacy ARM systems that were assuming the kernel in a.out
		 * binary format. The boot loaders on these systems would
		 * jump 32 bytes into the image to skip the a.out header.
		 * with these 8 nops filling exactly 32 bytes, things still
		 * work as expected on these legacy systems. Thumb2 mode keeps
		 * 7 of the nops as it turns out that some boot loaders
		 * were patching the initial instructions of the kernel, i.e
		 * had started to exploit this "patch area".
		 */
179
		.rept	7
Roy Franz's avatar
Roy Franz committed
180
		__nop
Linus Torvalds's avatar
Linus Torvalds committed
181
		.endr
182
#ifndef CONFIG_THUMB2_KERNEL
183
		__nop
184 185 186 187 188 189
#else
 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
		.thumb
#endif
		W(b)	1f
Linus Torvalds's avatar
Linus Torvalds committed
190

191 192 193
		.word	_magic_sig	@ Magic numbers to help the loader
		.word	_magic_start	@ absolute load/run zImage address
		.word	_magic_end	@ zImage end address
194
		.word	0x04030201	@ endianness flag
195 196
		.word	0x45454545	@ another magic number to indicate
		.word	_magic_table	@ additional data table
197

198 199
		__EFI_HEADER
1:
200 201
 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
 AR_CLASS(	mrs	r9, cpsr	)
202 203 204 205
#ifdef CONFIG_ARM_VIRT_EXT
		bl	__hyp_stub_install	@ get into SVC mode, reversibly
#endif
		mov	r7, r1			@ save architecture ID
206
		mov	r8, r2			@ save atags pointer
Linus Torvalds's avatar
Linus Torvalds committed
207

208
#ifndef CONFIG_CPU_V7M
Linus Torvalds's avatar
Linus Torvalds committed
209 210 211 212 213 214 215 216 217
		/*
		 * Booting from Angel - need to enter SVC mode and disable
		 * FIQs/IRQs (numeric definitions from angel arm.h source).
		 * We only do this if we were in user mode on entry.
		 */
		mrs	r2, cpsr		@ get current mode
		tst	r2, #3			@ not user?
		bne	not_angel
		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
218 219
 ARM(		swi	0x123456	)	@ angel_SWI_ARM
 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
Linus Torvalds's avatar
Linus Torvalds committed
220
not_angel:
221 222 223
		safe_svcmode_maskall r0
		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
						@ SPSR
224
#endif
Linus Torvalds's avatar
Linus Torvalds committed
225 226 227 228 229 230 231
		/*
		 * Note that some cache flushing and other stuff may
		 * be needed here - is there an Angel SWI call for this?
		 */

		/*
		 * some architecture specific code can be inserted
232
		 * by the linker here, but it should preserve r7, r8, and r9.
Linus Torvalds's avatar
Linus Torvalds committed
233 234 235
		 */

		.text
236

237
#ifdef CONFIG_AUTO_ZRELADDR
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
		/*
		 * Find the start of physical memory.  As we are executing
		 * without the MMU on, we are in the physical address space.
		 * We just need to get rid of any offset by aligning the
		 * address.
		 *
		 * This alignment is a balance between the requirements of
		 * different platforms - we have chosen 128MB to allow
		 * platforms which align the start of their physical memory
		 * to 128MB to use this feature, while allowing the zImage
		 * to be placed within the first 128MB of memory on other
		 * platforms.  Increasing the alignment means we place
		 * stricter alignment requirements on the start of physical
		 * memory, but relaxing it means that we break people who
		 * are already placing their zImage in (eg) the top 64MB
		 * of this range.
		 */
255 256
		mov	r4, pc
		and	r4, r4, #0xf8000000
257
		/* Determine final kernel image address. */
258 259
		add	r4, r4, #TEXT_OFFSET
#else
260
		ldr	r4, =zreladdr
261
#endif
Linus Torvalds's avatar
Linus Torvalds committed
262

263 264
		/*
		 * Set up a page table only if it won't overwrite ourself.
265
		 * That means r4 < pc || r4 - 16k page directory > &_end.
266 267 268 269 270 271 272 273 274 275
		 * Given that r4 > &_end is most unfrequent, we add a rough
		 * additional 1MB of room for a possible appended DTB.
		 */
		mov	r0, pc
		cmp	r0, r4
		ldrcc	r0, LC0+32
		addcc	r0, r0, pc
		cmpcc	r4, r0
		orrcc	r4, r4, #1		@ remember we skipped cache_on
		blcs	cache_on
276 277

restart:	adr	r0, LC0
278
		ldmia	r0, {r1, r2, r3, r6, r10, r11, r12}
279
		ldr	sp, [r0, #28]
280 281 282 283 284 285 286

		/*
		 * We might be running at a different address.  We need
		 * to fix up various pointers.
		 */
		sub	r0, r0, r1		@ calculate the delta offset
		add	r6, r6, r0		@ _edata
287 288 289 290 291 292 293 294 295 296 297 298 299 300
		add	r10, r10, r0		@ inflated kernel size location

		/*
		 * The kernel build system appends the size of the
		 * decompressed kernel at the end of the compressed data
		 * in little-endian form.
		 */
		ldrb	r9, [r10, #0]
		ldrb	lr, [r10, #1]
		orr	r9, r9, lr, lsl #8
		ldrb	lr, [r10, #2]
		ldrb	r10, [r10, #3]
		orr	r9, r9, lr, lsl #16
		orr	r9, r9, r10, lsl #24
Linus Torvalds's avatar
Linus Torvalds committed
301

302 303 304 305 306
#ifndef CONFIG_ZBOOT_ROM
		/* malloc space is above the relocated stack (64k max) */
		add	sp, sp, r0
		add	r10, sp, #0x10000
#else
Linus Torvalds's avatar
Linus Torvalds committed
307
		/*
308 309 310
		 * With ZBOOT_ROM the bss/stack is non relocatable,
		 * but someone could still run this code from RAM,
		 * in which case our reference is _edata.
Linus Torvalds's avatar
Linus Torvalds committed
311
		 */
312 313 314
		mov	r10, r6
#endif

315 316 317 318 319 320
		mov	r5, #0			@ init dtb size to 0
#ifdef CONFIG_ARM_APPENDED_DTB
/*
 *   r0  = delta
 *   r2  = BSS start
 *   r3  = BSS end
321
 *   r4  = final kernel address (possibly with LSB set)
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
 *   r5  = appended dtb size (still unknown)
 *   r6  = _edata
 *   r7  = architecture ID
 *   r8  = atags/device tree pointer
 *   r9  = size of decompressed image
 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 *   r11 = GOT start
 *   r12 = GOT end
 *   sp  = stack pointer
 *
 * if there are device trees (dtb) appended to zImage, advance r10 so that the
 * dtb data will get relocated along with the kernel if necessary.
 */

		ldr	lr, [r6, #0]
#ifndef __ARMEB__
		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
#else
		ldr	r1, =0xd00dfeed
#endif
		cmp	lr, r1
		bne	dtb_check_done		@ not found

345 346 347 348 349
#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
		/*
		 * OK... Let's do some funky business here.
		 * If we do have a DTB appended to zImage, and we do have
		 * an ATAG list around, we want the later to be translated
350 351 352
		 * and folded into the former here. No GOT fixup has occurred
		 * yet, but none of the code we're about to call uses any
		 * global variable.
353
		*/
354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376

		/* Get the initial DTB size */
		ldr	r5, [r6, #4]
#ifndef __ARMEB__
		/* convert to little endian */
		eor	r1, r5, r5, ror #16
		bic	r1, r1, #0x00ff0000
		mov	r5, r5, ror #8
		eor	r5, r5, r1, lsr #8
#endif
		/* 50% DTB growth should be good enough */
		add	r5, r5, r5, lsr #1
		/* preserve 64-bit alignment */
		add	r5, r5, #7
		bic	r5, r5, #7
		/* clamp to 32KB min and 1MB max */
		cmp	r5, #(1 << 15)
		movlo	r5, #(1 << 15)
		cmp	r5, #(1 << 20)
		movhi	r5, #(1 << 20)
		/* temporarily relocate the stack past the DTB work space */
		add	sp, sp, r5

377 378 379
		stmfd	sp!, {r0-r3, ip, lr}
		mov	r0, r8
		mov	r1, r6
380
		mov	r2, r5
381 382 383 384 385 386 387 388
		bl	atags_to_fdt

		/*
		 * If returned value is 1, there is no ATAG at the location
		 * pointed by r8.  Try the typical 0x100 offset from start
		 * of RAM and hope for the best.
		 */
		cmp	r0, #1
389
		sub	r0, r4, #TEXT_OFFSET
390
		bic	r0, r0, #1
391
		add	r0, r0, #0x100
392
		mov	r1, r6
393
		mov	r2, r5
394
		bleq	atags_to_fdt
395 396

		ldmfd	sp!, {r0-r3, ip, lr}
397
		sub	sp, sp, r5
398 399
#endif

400 401
		mov	r8, r6			@ use the appended device tree

402 403 404 405 406 407 408 409 410 411 412 413
		/*
		 * Make sure that the DTB doesn't end up in the final
		 * kernel's .bss area. To do so, we adjust the decompressed
		 * kernel size to compensate if that .bss size is larger
		 * than the relocated code.
		 */
		ldr	r5, =_kernel_bss_size
		adr	r1, wont_overwrite
		sub	r1, r6, r1
		subs	r1, r5, r1
		addhi	r9, r9, r1

414
		/* Get the current DTB size */
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
		ldr	r5, [r6, #4]
#ifndef __ARMEB__
		/* convert r5 (dtb size) to little endian */
		eor	r1, r5, r5, ror #16
		bic	r1, r1, #0x00ff0000
		mov	r5, r5, ror #8
		eor	r5, r5, r1, lsr #8
#endif

		/* preserve 64-bit alignment */
		add	r5, r5, #7
		bic	r5, r5, #7

		/* relocate some pointers past the appended dtb */
		add	r6, r6, r5
		add	r10, r10, r5
		add	sp, sp, r5
dtb_check_done:
#endif

435 436
/*
 * Check to see if we will overwrite ourselves.
437
 *   r4  = final kernel address (possibly with LSB set)
438 439 440
 *   r9  = size of decompressed image
 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 * We basically want:
441
 *   r4 - 16k page directory >= r10 -> OK
442
 *   r4 + image length <= address of wont_overwrite -> OK
443
 * Note: the possible LSB in r4 is harmless here.
444
 */
445
		add	r10, r10, #16384
446 447 448
		cmp	r4, r10
		bhs	wont_overwrite
		add	r10, r4, r9
449 450
		adr	r9, wont_overwrite
		cmp	r10, r9
451 452 453 454 455 456 457 458 459
		bls	wont_overwrite

/*
 * Relocate ourselves past the end of the decompressed kernel.
 *   r6  = _edata
 *   r10 = end of the decompressed kernel
 * Because we always copy ahead, we need to do it from the end and go
 * backward in case the source and destination overlap.
 */
460 461 462 463 464 465
		/*
		 * Bump to the next 256-byte boundary with the size of
		 * the relocation code added. This avoids overwriting
		 * ourself when the offset is small.
		 */
		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
466 467
		bic	r10, r10, #255

468 469 470 471
		/* Get start of code we want to copy and align it down. */
		adr	r5, restart
		bic	r5, r5, #31

472 473 474 475 476 477 478
/* Relocate the hyp vector base if necessary */
#ifdef CONFIG_ARM_VIRT_EXT
		mrs	r0, spsr
		and	r0, r0, #MODE_MASK
		cmp	r0, #HYP_MODE
		bne	1f

479 480 481 482 483 484 485 486 487 488 489
		/*
		 * Compute the address of the hyp vectors after relocation.
		 * This requires some arithmetic since we cannot directly
		 * reference __hyp_stub_vectors in a PC-relative way.
		 * Call __hyp_set_vectors with the new address so that we
		 * can HVC again after the copy.
		 */
0:		adr	r0, 0b
		movw	r1, #:lower16:__hyp_stub_vectors - 0b
		movt	r1, #:upper16:__hyp_stub_vectors - 0b
		add	r0, r0, r1
490 491 492 493 494 495
		sub	r0, r0, r5
		add	r0, r0, r10
		bl	__hyp_set_vectors
1:
#endif

496 497 498 499 500 501
		sub	r9, r6, r5		@ size to copy
		add	r9, r9, #31		@ rounded up to a multiple
		bic	r9, r9, #31		@ ... of 32 bytes
		add	r6, r9, r5
		add	r9, r9, r10

502 503 504 505 506 507 508 509 510 511 512 513 514 515
#ifdef DEBUG
		sub     r10, r6, r5
		sub     r10, r9, r10
		/*
		 * We are about to copy the kernel to a new memory area.
		 * The boundaries of the new memory area can be found in
		 * r10 and r9, whilst r5 and r6 contain the boundaries
		 * of the memory we are going to copy.
		 * Calling dbgkc will help with the printing of this
		 * information.
		 */
		dbgkc	r5, r6, r10, r9
#endif

516 517 518 519 520 521 522 523
1:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
		cmp	r6, r5
		stmdb	r9!, {r0 - r3, r10 - r12, lr}
		bhi	1b

		/* Preserve offset to relocated code. */
		sub	r6, r9, r6

524 525 526 527 528
#ifndef CONFIG_ZBOOT_ROM
		/* cache_clean_flush may use the stack, so relocate it */
		add	sp, sp, r6
#endif

529
		bl	cache_clean_flush
530

531
		badr	r0, restart
532 533 534 535 536 537 538 539 540
		add	r0, r0, r6
		mov	pc, r0

wont_overwrite:
/*
 * If delta is zero, we are running at the address we were linked at.
 *   r0  = delta
 *   r2  = BSS start
 *   r3  = BSS end
541
 *   r4  = kernel execution address (possibly with LSB set)
542
 *   r5  = appended dtb size (0 if not present)
543 544 545 546 547 548
 *   r7  = architecture ID
 *   r8  = atags pointer
 *   r11 = GOT start
 *   r12 = GOT end
 *   sp  = stack pointer
 */
549
		orrs	r1, r0, r5
550
		beq	not_relocated
551

552
		add	r11, r11, r0
553
		add	r12, r12, r0
Linus Torvalds's avatar
Linus Torvalds committed
554 555 556 557 558

#ifndef CONFIG_ZBOOT_ROM
		/*
		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
		 * we need to fix up pointers into the BSS region.
559
		 * Note that the stack pointer has already been fixed up.
Linus Torvalds's avatar
Linus Torvalds committed
560 561 562 563 564 565
		 */
		add	r2, r2, r0
		add	r3, r3, r0

		/*
		 * Relocate all entries in the GOT table.
566
		 * Bump bss entries to _edata + dtb size
Linus Torvalds's avatar
Linus Torvalds committed
567
		 */
568
1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
569 570 571 572 573
		add	r1, r1, r0		@ This fixes up C references
		cmp	r1, r2			@ if entry >= bss_start &&
		cmphs	r3, r1			@       bss_end > entry
		addhi	r1, r1, r5		@    entry += dtb size
		str	r1, [r11], #4		@ next entry
574
		cmp	r11, r12
Linus Torvalds's avatar
Linus Torvalds committed
575
		blo	1b
576 577 578 579 580

		/* bump our bss pointers too */
		add	r2, r2, r5
		add	r3, r3, r5

Linus Torvalds's avatar
Linus Torvalds committed
581 582 583 584 585 586
#else

		/*
		 * Relocate entries in the GOT table.  We only relocate
		 * the entries that are outside the (relocated) BSS region.
		 */
587
1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
Linus Torvalds's avatar
Linus Torvalds committed
588 589 590
		cmp	r1, r2			@ entry < bss_start ||
		cmphs	r3, r1			@ _end < entry
		addlo	r1, r1, r0		@ table.  This fixes up the
591
		str	r1, [r11], #4		@ C references.
592
		cmp	r11, r12
Linus Torvalds's avatar
Linus Torvalds committed
593 594 595 596 597 598 599 600 601 602 603
		blo	1b
#endif

not_relocated:	mov	r0, #0
1:		str	r0, [r2], #4		@ clear bss
		str	r0, [r2], #4
		str	r0, [r2], #4
		str	r0, [r2], #4
		cmp	r2, r3
		blo	1b

604 605 606 607 608 609 610 611 612
		/*
		 * Did we skip the cache setup earlier?
		 * That is indicated by the LSB in r4.
		 * Do it now if so.
		 */
		tst	r4, #1
		bic	r4, r4, #1
		blne	cache_on

Linus Torvalds's avatar
Linus Torvalds committed
613
/*
614 615 616 617 618
 * The C runtime environment should now be setup sufficiently.
 * Set up some pointers, and start decompressing.
 *   r4  = kernel execution address
 *   r7  = architecture ID
 *   r8  = atags pointer
Linus Torvalds's avatar
Linus Torvalds committed
619
 */
620 621 622
		mov	r0, r4
		mov	r1, sp			@ malloc space above stack
		add	r2, sp, #0x10000	@ 64k max
Linus Torvalds's avatar
Linus Torvalds committed
623 624 625
		mov	r3, r7
		bl	decompress_kernel
		bl	cache_clean_flush
626
		bl	cache_off
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647

#ifdef CONFIG_ARM_VIRT_EXT
		mrs	r0, spsr		@ Get saved CPU boot mode
		and	r0, r0, #MODE_MASK
		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
		bne	__enter_kernel		@ boot kernel directly

		adr	r12, .L__hyp_reentry_vectors_offset
		ldr	r0, [r12]
		add	r0, r0, r12

		bl	__hyp_set_vectors
		__HVC(0)			@ otherwise bounce to hyp mode

		b	.			@ should never be reached

		.align	2
.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
#else
		b	__enter_kernel
#endif
Linus Torvalds's avatar
Linus Torvalds committed
648

649
		.align	2
Linus Torvalds's avatar
Linus Torvalds committed
650 651 652 653
		.type	LC0, #object
LC0:		.word	LC0			@ r1
		.word	__bss_start		@ r2
		.word	_end			@ r3
654
		.word	_edata			@ r6
655
		.word	input_data_end - 4	@ r10 (inflated size location)
656
		.word	_got_start		@ r11
Linus Torvalds's avatar
Linus Torvalds committed
657
		.word	_got_end		@ ip
658
		.word	.L_user_stack_end	@ sp
659
		.word	_end - restart + 16384 + 1024*1024
Linus Torvalds's avatar
Linus Torvalds committed
660 661 662 663
		.size	LC0, . - LC0

#ifdef CONFIG_ARCH_RPC
		.globl	params
664
params:		ldr	r0, =0x10000100		@ params_phys for RPC
Linus Torvalds's avatar
Linus Torvalds committed
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680
		mov	pc, lr
		.ltorg
		.align
#endif

/*
 * Turn on the cache.  We need to setup some page tables so that we
 * can have both the I and D caches on.
 *
 * We place the page tables 16k down from the kernel execution address,
 * and we hope that nothing else is using it.  If we're using it, we
 * will go pop!
 *
 * On entry,
 *  r4 = kernel execution address
 *  r7 = architecture number
681
 *  r8 = atags pointer
Linus Torvalds's avatar
Linus Torvalds committed
682
 * On exit,
683
 *  r0, r1, r2, r3, r9, r10, r12 corrupted
Linus Torvalds's avatar
Linus Torvalds committed
684
 * This routine must preserve:
685
 *  r4, r7, r8
Linus Torvalds's avatar
Linus Torvalds committed
686 687 688 689 690
 */
		.align	5
cache_on:	mov	r3, #8			@ cache_on function
		b	call_cache_fn

691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
/*
 * Initialize the highest priority protection region, PR7
 * to cover all 32bit address and cacheable and bufferable.
 */
__armv4_mpu_cache_on:
		mov	r0, #0x3f		@ 4G, the whole
		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
		mcr 	p15, 0, r0, c6, c7, 1

		mov	r0, #0x80		@ PR7
		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on

		mov	r0, #0xc000
		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission

		mov	r0, #0
		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
						@ ...I .... ..D. WC.M
		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
		orr	r0, r0, #0x1000		@ ...1 .... .... ....

		mcr	p15, 0, r0, c1, c0, 0	@ write control reg

		mov	r0, #0
		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
		mov	pc, lr

__armv3_mpu_cache_on:
		mov	r0, #0x3f		@ 4G, the whole
		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting

		mov	r0, #0x80		@ PR7
		mcr	p15, 0, r0, c2, c0, 0	@ cache on
		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on

		mov	r0, #0xc000
		mcr	p15, 0, r0, c5, c0, 0	@ access permission

		mov	r0, #0
		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
738 739 740 741
		/*
		 * ?? ARMv3 MMU does not allow reading the control register,
		 * does this really work on ARMv3 MPU?
		 */
742 743 744
		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
						@ .... .... .... WC.M
		orr	r0, r0, #0x000d		@ .... .... .... 11.1
745
		/* ?? this overwrites the value constructed above? */
746 747 748
		mov	r0, #0
		mcr	p15, 0, r0, c1, c0, 0	@ write control reg

749
		/* ?? invalidate for the second time? */
750 751 752
		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
		mov	pc, lr

753 754 755 756 757 758
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
#define CB_BITS 0x08
#else
#define CB_BITS 0x0c
#endif

Linus Torvalds's avatar
Linus Torvalds committed
759 760 761 762 763 764 765 766
__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
		bic	r3, r3, #0xff		@ Align the pointer
		bic	r3, r3, #0x3f00
/*
 * Initialise the page tables, turning on the cacheable and bufferable
 * bits for the RAM area only.
 */
		mov	r0, r3
767 768 769
		mov	r9, r0, lsr #18
		mov	r9, r9, lsl #18		@ start of RAM
		add	r10, r9, #0x10000000	@ a reasonable RAM size
770 771
		mov	r1, #0x12		@ XN|U + section mapping
		orr	r1, r1, #3 << 10	@ AP=11
Linus Torvalds's avatar
Linus Torvalds committed
772
		add	r2, r3, #16384
773
1:		cmp	r1, r9			@ if virt > start of RAM
774 775 776 777
		cmphs	r10, r1			@   && end of RAM > virt
		bic	r1, r1, #0x1c		@ clear XN|U + C + B
		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
		orrhs	r1, r1, r6		@ set RAM section settings
Linus Torvalds's avatar
Linus Torvalds committed
778 779 780 781 782 783 784 785 786 787
		str	r1, [r0], #4		@ 1:1 mapping
		add	r1, r1, #1048576
		teq	r0, r2
		bne	1b
/*
 * If ever we are running from Flash, then we surely want the cache
 * to be enabled also for our execution instance...  We map 2MB of it
 * so there is no map overlap problem for up to 1 MB compressed kernel.
 * If the execution is in RAM then we would only be duplicating the above.
 */
788
		orr	r1, r6, #0x04		@ ensure B is set for this
Linus Torvalds's avatar
Linus Torvalds committed
789
		orr	r1, r1, #3 << 10
790 791
		mov	r2, pc
		mov	r2, r2, lsr #20
Linus Torvalds's avatar
Linus Torvalds committed
792 793 794 795 796 797
		orr	r1, r1, r2, lsl #20
		add	r0, r3, r2, lsl #2
		str	r1, [r0], #4
		add	r1, r1, #1048576
		str	r1, [r0]
		mov	pc, lr
798
ENDPROC(__setup_mmu)
Linus Torvalds's avatar
Linus Torvalds committed
799

800 801 802 803 804 805 806 807 808
@ Enable unaligned access on v6, to allow better code generation
@ for the decompressor C code:
__armv6_mmu_cache_on:
		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
		bic	r0, r0, #2		@ A (no unaligned access fault)
		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
		b	__armv4_mmu_cache_on

809 810 811 812 813 814
__arm926ejs_mmu_cache_on:
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
		mov	r0, #4			@ put dcache in WT mode
		mcr	p15, 7, r0, c15, c0, 0
#endif

815
__armv4_mmu_cache_on:
Linus Torvalds's avatar
Linus Torvalds committed
816
		mov	r12, lr
817
#ifdef CONFIG_MMU
818
		mov	r6, #CB_BITS | 0x12	@ U
Linus Torvalds's avatar
Linus Torvalds committed
819 820 821 822 823 824 825
		bl	__setup_mmu
		mov	r0, #0
		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
		orr	r0, r0, #0x0030
826
 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
827
		bl	__common_mmu_cache_on
Linus Torvalds's avatar
Linus Torvalds committed
828 829
		mov	r0, #0
		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
830
#endif
Linus Torvalds's avatar
Linus Torvalds committed
831 832
		mov	pc, r12

833
__armv7_mmu_cache_on:
834
		enable_cp15_barriers	r11
835
		mov	r12, lr
836
#ifdef CONFIG_MMU
837 838
		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
		tst	r11, #0xf		@ VMSA
839
		movne	r6, #CB_BITS | 0x02	@ !XN
840 841 842 843 844
		blne	__setup_mmu
		mov	r0, #0
		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
		tst	r11, #0xf		@ VMSA
		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
845
#endif
846
		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
847
		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
848 849
		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
		orr	r0, r0, #0x003c		@ write buffer
850 851 852
		bic	r0, r0, #2		@ A (no unaligned access fault)
		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
						@ (needed for ARM1176)
853
#ifdef CONFIG_MMU
854
 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
855
		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
856
		orrne	r0, r0, #1		@ MMU enabled
857
		movne	r1, #0xfffffffd		@ domain 0 = client
858
		bic     r6, r6, #1 << 31        @ 32-bit translation system
859
		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
860 861
		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
862
		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
863
#endif
864
		mcr	p15, 0, r0, c7, c5, 4	@ ISB
865 866 867 868 869 870
		mcr	p15, 0, r0, c1, c0, 0	@ load control register
		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
		mov	r0, #0
		mcr	p15, 0, r0, c7, c5, 4	@ ISB
		mov	pc, r12

871 872
__fa526_cache_on:
		mov	r12, lr
873
		mov	r6, #CB_BITS | 0x12	@ U
874 875 876 877 878 879 880 881 882 883 884 885
		bl	__setup_mmu
		mov	r0, #0
		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
		orr	r0, r0, #0x1000		@ I-cache enable
		bl	__common_mmu_cache_on
		mov	r0, #0
		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
		mov	pc, r12

886
__common_mmu_cache_on:
887
#ifndef CONFIG_THUMB2_KERNEL
Linus Torvalds's avatar
Linus Torvalds committed
888 889 890 891 892 893
#ifndef DEBUG
		orr	r0, r0, #0x000d		@ Write buffer, mmu
#endif
		mov	r1, #-1
		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
894 895 896 897 898
		b	1f
		.align	5			@ cache line aligned
1:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
899
#endif
Linus Torvalds's avatar
Linus Torvalds committed
900

901 902
#define PROC_ENTRY_SIZE (4*5)

Linus Torvalds's avatar
Linus Torvalds committed
903 904 905 906 907 908 909 910 911 912
/*
 * Here follow the relocatable cache support functions for the
 * various processors.  This is a generic hook for locating an
 * entry and jumping to an instruction at the specified offset
 * from the start of the block.  Please note this is all position
 * independent code.
 *
 *  r1  = corrupted
 *  r2  = corrupted
 *  r3  = block offset
913
 *  r9  = corrupted
Linus Torvalds's avatar
Linus Torvalds committed
914 915 916 917
 *  r12 = corrupted
 */

call_cache_fn:	adr	r12, proc_types
918
#ifdef CONFIG_CPU_CP15
919
		mrc	p15, 0, r9, c0, c0	@ get processor ID
920 921 922 923 924 925 926 927 928 929
#elif defined(CONFIG_CPU_V7M)
		/*
		 * On v7-M the processor id is located in the V7M_SCB_CPUID
		 * register, but as cache handling is IMPLEMENTATION DEFINED on
		 * v7-M (if existant at all) we just return early here.
		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
		 * use cp15 registers that are not implemented on v7-M.
		 */
		bx	lr
930
#else
931
		ldr	r9, =CONFIG_PROCESSOR_ID
932
#endif
Linus Torvalds's avatar
Linus Torvalds committed
933 934
1:		ldr	r1, [r12, #0]		@ get value
		ldr	r2, [r12, #4]		@ get mask
935
		eor	r1, r1, r9		@ (real ^ match)
Linus Torvalds's avatar
Linus Torvalds committed
936
		tst	r1, r2			@       & mask
937 938 939
 ARM(		addeq	pc, r12, r3		) @ call cache function
 THUMB(		addeq	r12, r3			)
 THUMB(		moveq	pc, r12			) @ call cache function
940
		add	r12, r12, #PROC_ENTRY_SIZE
Linus Torvalds's avatar
Linus Torvalds committed
941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956
		b	1b

/*
 * Table for cache operations.  This is basically:
 *   - CPU ID match
 *   - CPU ID mask
 *   - 'cache on' method instruction
 *   - 'cache off' method instruction
 *   - 'cache flush' method instruction
 *
 * We match an entry using: ((real_id ^ match) & mask) == 0
 *
 * Writethrough caches generally only need 'on' and 'off'
 * methods.  Writeback caches _must_ have the flush method
 * defined.
 */
957
		.align	2
Linus Torvalds's avatar
Linus Torvalds committed
958 959
		.type	proc_types,#object
proc_types:
960 961
		.word	0x41000000		@ old ARM ID
		.word	0xff00f000
Linus Torvalds's avatar
Linus Torvalds committed
962
		mov	pc, lr
963
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
964
		mov	pc, lr
965
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
966
		mov	pc, lr
967
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
968 969 970

		.word	0x41007000		@ ARM7/710
		.word	0xfff8fe00
971 972 973 974
		mov	pc, lr
 THUMB(		nop				)
		mov	pc, lr
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
975
		mov	pc, lr
976
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
977 978 979

		.word	0x41807200		@ ARM720T (writethrough)
		.word	0xffffff00
980 981
		W(b)	__armv4_mmu_cache_on
		W(b)	__armv4_mmu_cache_off
Linus Torvalds's avatar
Linus Torvalds committed
982
		mov	pc, lr
983
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
984

985 986
		.word	0x41007400		@ ARM74x
		.word	0xff00ff00
987 988 989
		W(b)	__armv3_mpu_cache_on
		W(b)	__armv3_mpu_cache_off
		W(b)	__armv3_mpu_cache_flush
990 991 992
		
		.word	0x41009400		@ ARM94x
		.word	0xff00ff00
993 994 995
		W(b)	__armv4_mpu_cache_on
		W(b)	__armv4_mpu_cache_off
		W(b)	__armv4_mpu_cache_flush
996

997 998
		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
		.word	0xff0ffff0
999 1000 1001
		W(b)	__arm926ejs_mmu_cache_on
		W(b)	__armv4_mmu_cache_off
		W(b)	__armv5tej_mmu_cache_flush
1002

Linus Torvalds's avatar
Linus Torvalds committed
1003 1004 1005
		.word	0x00007000		@ ARM7 IDs
		.word	0x0000f000
		mov	pc, lr
1006
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
1007
		mov	pc, lr
1008
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
1009
		mov	pc, lr
1010
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
1011 1012 1013 1014 1015

		@ Everything from here on will be the new ID system.

		.word	0x4401a100		@ sa110 / sa1100
		.word	0xffffffe0
1016 1017 1018
		W(b)	__armv4_mmu_cache_on
		W(b)	__armv4_mmu_cache_off
		W(b)	__armv4_mmu_cache_flush
Linus Torvalds's avatar
Linus Torvalds committed
1019 1020 1021

		.word	0x6901b110		@ sa1110
		.word	0xfffffff0
1022 1023 1024
		W(b)	__armv4_mmu_cache_on
		W(b)	__armv4_mmu_cache_off
		W(b)	__armv4_mmu_cache_flush
Linus Torvalds's avatar
Linus Torvalds committed
1025

1026 1027
		.word	0x56056900
		.word	0xffffff00		@ PXA9xx
1028 1029 1030
		W(b)	__armv4_mmu_cache_on
		W(b)	__armv4_mmu_cache_off
		W(b)	__armv4_mmu_cache_flush
1031 1032 1033

		.word	0x56158000		@ PXA168
		.word	0xfffff000
1034 1035 1036
		W(b)	__armv4_mmu_cache_on
		W(b)	__armv4_mmu_cache_off
		W(b)	__armv5tej_mmu_cache_flush
1037

1038 1039
		.word	0x56050000		@ Feroceon
		.word	0xff0f0000
1040 1041 1042
		W(b)	__armv4_mmu_cache_on
		W(b)	__armv4_mmu_cache_off
		W(b)	__armv5tej_mmu_cache_flush
1043

1044 1045 1046 1047 1048 1049 1050 1051 1052
#ifdef CONFIG_CPU_FEROCEON_OLD_ID
		/* this conflicts with the standard ARMv5TE entry */
		.long	0x41009260		@ Old Feroceon
		.long	0xff00fff0
		b	__armv4_mmu_cache_on
		b	__armv4_mmu_cache_off
		b	__armv5tej_mmu_cache_flush
#endif

1053 1054
		.word	0x66015261		@ FA526
		.word	0xff01fff1
1055 1056 1057
		W(b)	__fa526_cache_on
		W(b)	__armv4_mmu_cache_off
		W(b)	__fa526_cache_flush
1058

Linus Torvalds's avatar
Linus Torvalds committed
1059 1060 1061 1062
		@ These match on the architecture ID

		.word	0x00020000		@ ARMv4T
		.word	0x000f0000
1063 1064 1065
		W(b)	__armv4_mmu_cache_on
		W(b)	__armv4_mmu_cache_off
		W(b)	__armv4_mmu_cache_flush
Linus Torvalds's avatar
Linus Torvalds committed
1066 1067 1068

		.word	0x00050000		@ ARMv5TE
		.word	0x000f0000
1069 1070 1071
		W(b)	__armv4_mmu_cache_on
		W(b)	__armv4_mmu_cache_off
		W(b)	__armv4_mmu_cache_flush
Linus Torvalds's avatar
Linus Torvalds committed
1072 1073 1074

		.word	0x00060000		@ ARMv5TEJ
		.word	0x000f0000
1075 1076
		W(b)	__armv4_mmu_cache_on
		W(b)	__armv4_mmu_cache_off
1077
		W(b)	__armv5tej_mmu_cache_flush
Linus Torvalds's avatar
Linus Torvalds committed
1078

1079
		.word	0x0007b000		@ ARMv6
1080
		.word	0x000ff000
1081
		W(b)	__armv6_mmu_cache_on
1082 1083
		W(b)	__armv4_mmu_cache_off
		W(b)	__armv6_mmu_cache_flush
Linus Torvalds's avatar
Linus Torvalds committed
1084

1085 1086
		.word	0x000f0000		@ new CPU Id
		.word	0x000f0000
1087 1088 1089
		W(b)	__armv7_mmu_cache_on
		W(b)	__armv7_mmu_cache_off
		W(b)	__armv7_mmu_cache_flush
1090

Linus Torvalds's avatar
Linus Torvalds committed
1091 1092 1093
		.word	0			@ unrecognised type
		.word	0
		mov	pc, lr
1094
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
1095
		mov	pc, lr
1096
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
1097
		mov	pc, lr
1098
 THUMB(		nop				)
Linus Torvalds's avatar
Linus Torvalds committed
1099 1100 1101

		.size	proc_types, . - proc_types

1102 1103 1104 1105 1106 1107 1108 1109 1110 1111
		/*
		 * If you get a "non-constant expression in ".if" statement"
		 * error from the assembler on this line, check that you have
		 * not accidentally written a "b" instruction where you should
		 * have written W(b).
		 */
		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
		.error "The size of one or more proc_types entries is wrong."
		.endif

Linus Torvalds's avatar
Linus Torvalds committed
1112 1113 1114 1115
/*
 * Turn off the Cache and MMU.  ARMv3 does not support
 * reading the control register, but ARMv4 does.
 *
1116 1117 1118
 * On exit,
 *  r0, r1, r2, r3, r9, r12 corrupted
 * This routine must preserve:
1119
 *  r4, r7, r8
Linus Torvalds's avatar
Linus Torvalds committed
1120 1121 1122 1123 1124
 */
		.align	5
cache_off:	mov	r3, #12			@ cache_off function
		b	call_cache_fn

1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
__armv4_mpu_cache_off:
		mrc	p15, 0, r0, c1, c0
		bic	r0, r0, #0x000d
		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
		mov	r0, #0
		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
		mov	pc, lr

__armv3_mpu_cache_off:
		mrc	p15, 0, r0, c1, c0
		bic	r0, r0, #0x000d
		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
		mov	r0, #0
		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
		mov	pc, lr

1143
__armv4_mmu_cache_off:
1144
#ifdef CONFIG_MMU
Linus Torvalds's avatar
Linus Torvalds committed
1145 1146 1147 1148 1149 1150
		mrc	p15, 0, r0, c1, c0
		bic	r0, r0, #0x000d
		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
		mov	r0, #0
		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1151
#endif
Linus Torvalds's avatar
Linus Torvalds committed
1152 1153
		mov	pc, lr

1154 1155
__armv7_mmu_cache_off:
		mrc	p15, 0, r0, c1, c0
1156
#ifdef CONFIG_MMU
1157
		bic	r0, r0, #0x000d
1158 1159 1160
#else
		bic	r0, r0, #0x000c
#endif
1161 1162 1163 1164
		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
		mov	r12, lr
		bl	__armv7_mmu_cache_flush
		mov	r0, #0
1165
#ifdef CONFIG_MMU
1166
		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1167
#endif
1168 1169 1170
		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
		mcr	p15, 0, r0, c7, c10, 4	@ DSB
		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1171 1172
		mov	pc, r12

Linus Torvalds's avatar
Linus Torvalds committed
1173 1174 1175 1176
/*
 * Clean and flush the cache to maintain consistency.
 *
 * On exit,
1177
 *  r1, r2, r3, r9, r10, r11, r12 corrupted
Linus Torvalds's avatar
Linus Torvalds committed
1178
 * This routine must preserve:
1179
 *  r4, r6, r7, r8
Linus Torvalds's avatar
Linus Torvalds committed
1180 1181 1182 1183 1184 1185
 */
		.align	5
cache_clean_flush:
		mov	r3, #16
		b	call_cache_fn

1186
__armv4_mpu_cache_flush:
1187 1188
		tst	r4, #1
		movne	pc, lr
1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
		mov	r2, #1
		mov	r3, #0
		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
		mov	r1, #7 << 5		@ 8 segments
1:		orr	r3, r1, #63 << 26	@ 64 entries
2:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
		subs	r3, r3, #1 << 26
		bcs	2b			@ entries 63 to 0
		subs 	r1, r1, #1 << 5
		bcs	1b			@ segments 7 to 0

		teq	r2, #0
		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
		mov	pc, lr
		
1205
__fa526_cache_flush:
1206 1207
		tst	r4, #1
		movne	pc, lr
1208 1209 1210 1211 1212
		mov	r1, #0
		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
		mov	pc, lr
1213

1214
__armv6_mmu_cache_flush:
Linus Torvalds's avatar
Linus Torvalds committed
1215
		mov	r1, #0
1216 1217
		tst	r4, #1
		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
Linus Torvalds's avatar
Linus Torvalds committed
1218
		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1219
		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
Linus Torvalds's avatar
Linus Torvalds committed
1220 1221 1222
		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
		mov	pc, lr

1223
__armv7_mmu_cache_flush:
1224
		enable_cp15_barriers	r10
1225 1226
		tst	r4, #1
		bne	iflush
1227 1228 1229
		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
		mov	r10, #0
1230
		beq	hierarchical
1231 1232 1233
		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
		b	iflush
hierarchical:
1234
		mcr	p15, 0, r10, c7, c10, 5	@ DMB
1235
		stmfd	sp!, {r0-r7, r9-r11}
1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253
		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
		ands	r3, r0, #0x7000000	@ extract loc from clidr
		mov	r3, r3, lsr #23		@ left align loc bit field
		beq	finished		@ if loc is 0, then no need to clean
		mov	r10, #0			@ start clean at cache level 0
loop1:
		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
		and	r1, r1, #7		@ mask of the bits for current cache only
		cmp	r1, #2			@ see what cache we have at this level
		blt	skip			@ skip if no cache, or just i-cache
		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
		and	r2, r1, #7		@ extract the length of the cache lines
		add	r2, r2, #4		@ add 4 (line length offset)
		ldr	r4, =0x3ff
		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
1254
		clz	r5, r4			@ find bit position of way size increment
1255 1256 1257 1258 1259
		ldr	r7, =0x7fff
		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
loop2:
		mov	r9, r4			@ create working copy of max way size
loop3:
1260 1261 1262 1263 1264 1265
 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
 THUMB(		lsl	r6, r9, r5		)
 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
 THUMB(		lsl	r6, r7, r2		)
 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
1266 1267 1268 1269 1270 1271 1272 1273 1274 1275
		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
		subs	r9, r9, #1		@ decrement the way
		bge	loop3
		subs	r7, r7, #1		@ decrement the index
		bge	loop2
skip:
		add	r10, r10, #2		@ increment cache number
		cmp	r3, r10
		bgt	loop1
finished:
1276
		ldmfd	sp!, {r0-r7, r9-r11}
1277
		mov	r10, #0			@ switch back to cache level 0
1278 1279
		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
iflush:
1280
		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1281
		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1282 1283
		mcr	p15, 0, r10, c7, c10, 4	@ DSB
		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1284 1285
		mov	pc, lr

1286
__armv5tej_mmu_cache_flush:
1287 1288
		tst	r4, #1
		movne	pc, lr
1289
1:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1290 1291 1292 1293 1294
		bne	1b
		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
		mov	pc, lr

1295
__armv4_mmu_cache_flush:
1296 1297
		tst	r4, #1
		movne	pc, lr
Linus Torvalds's avatar
Linus Torvalds committed
1298 1299 1300
		mov	r2, #64*1024		@ default: 32K dcache size (*2)
		mov	r11, #32		@ default: 32 byte line size
		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1301
		teq	r3, r9			@ cache ID register present?
Linus Torvalds's avatar
Linus Torvalds committed
1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313
		beq	no_cache_id
		mov	r1, r3, lsr #18
		and	r1, r1, #7
		mov	r2, #1024
		mov	r2, r2, lsl r1		@ base dcache size *2
		tst	r3, #1 << 14		@ test M bit
		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
		mov	r3, r3, lsr #12
		and	r3, r3, #3
		mov	r11, #8
		mov	r11, r11, lsl r3	@ cache line size in bytes
no_cache_id:
1314 1315
		mov	r1, pc
		bic	r1, r1, #63		@ align to longest cache line
Linus Torvalds's avatar
Linus Torvalds committed
1316
		add	r2, r1, r2
1317 1318 1319 1320
1:
 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
 THUMB(		add     r1, r1, r11		)
Linus Torvalds's avatar
Linus Torvalds committed
1321 1322 1323 1324 1325 1326 1327 1328
		teq	r1, r2
		bne	1b

		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
		mov	pc, lr

1329
__armv3_mmu_cache_flush:
1330
__armv3_mpu_cache_flush:
1331 1332
		tst	r4, #1
		movne	pc, lr
Linus Torvalds's avatar
Linus Torvalds committed
1333
		mov	r1, #0
1334
		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
Linus Torvalds's avatar
Linus Torvalds committed
1335 1336 1337 1338 1339 1340 1341
		mov	pc, lr

/*
 * Various debugging routines for printing hex characters and
 * memory, which again must be relocatable.
 */
#ifdef DEBUG
1342
		.align	2
Linus Torvalds's avatar
Linus Torvalds committed
1343 1344 1345 1346
		.type	phexbuf,#object
phexbuf:	.space	12
		.size	phexbuf, . - phexbuf

1347
@ phex corrupts {r0, r1, r2, r3}
Linus Torvalds's avatar
Linus Torvalds committed
1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361
phex:		adr	r3, phexbuf
		mov	r2, #0
		strb	r2, [r3, r1]
1:		subs	r1, r1, #1
		movmi	r0, r3
		bmi	puts
		and	r2, r0, #15
		mov	r0, r0, lsr #4
		cmp	r2, #10
		addge	r2, r2, #7
		add	r2, r2, #'0'
		strb	r2, [r3, r1]
		b	1b

1362
@ puts corrupts {r0, r1, r2, r3}
1363
puts:		loadsp	r3, r2, r1
Linus Torvalds's avatar
Linus Torvalds committed
1364 1365 1366
1:		ldrb	r2, [r0], #1
		teq	r2, #0
		moveq	pc, lr
1367
2:		writeb	r2, r3
Linus Torvalds's avatar
Linus Torvalds committed
1368 1369 1370 1371 1372 1373 1374 1375 1376
		mov	r1, #0x00020000
3:		subs	r1, r1, #1
		bne	3b
		teq	r2, #'\n'
		moveq	r2, #'\r'
		beq	2b
		teq	r0, #0
		bne	1b
		mov	pc, lr
1377
@ putc corrupts {r0, r1, r2, r3}
Linus Torvalds's avatar
Linus Torvalds committed
1378 1379
putc:
		mov	r2, r0
1380
		loadsp	r3, r1, r0
Linus Torvalds's avatar
Linus Torvalds committed
1381 1382 1383
		mov	r0, #0
		b	2b

1384
@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
Linus Torvalds's avatar
Linus Torvalds committed
1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413
memdump:	mov	r12, r0
		mov	r10, lr
		mov	r11, #0
2:		mov	r0, r11, lsl #2
		add	r0, r0, r12
		mov	r1, #8
		bl	phex
		mov	r0, #':'
		bl	putc
1:		mov	r0, #' '
		bl	putc
		ldr	r0, [r12, r11, lsl #2]
		mov	r1, #8
		bl	phex
		and	r0, r11, #7
		teq	r0, #3
		moveq	r0, #' '
		bleq	putc
		and	r0, r11, #7
		add	r11, r11, #1
		teq	r0, #7
		bne	1b
		mov	r0, #'\n'
		bl	putc
		cmp	r11, #64
		blt	2b
		mov	pc, r10
#endif

1414
		.ltorg
1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430

#ifdef CONFIG_ARM_VIRT_EXT
.align 5
__hyp_reentry_vectors:
		W(b)	.			@ reset
		W(b)	.			@ undef
		W(b)	.			@ svc
		W(b)	.			@ pabort
		W(b)	.			@ dabort
		W(b)	__enter_kernel		@ hyp
		W(b)	.			@ irq
		W(b)	.			@ fiq
#endif /* CONFIG_ARM_VIRT_EXT */

__enter_kernel:
		mov	r0, #0			@ must be 0
1431 1432
		mov	r1, r7			@ restore architecture number
		mov	r2, r8			@ restore atags pointer
1433 1434 1435
 ARM(		mov	pc, r4		)	@ call kernel
 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1436

1437
reloc_code_end:
Linus Torvalds's avatar
Linus Torvalds committed
1438

Roy Franz's avatar
Roy Franz committed
1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462
#ifdef CONFIG_EFI_STUB
		.align	2
_start:		.long	start - .

ENTRY(efi_stub_entry)
		@ allocate space on stack for passing current zImage address
		@ and for the EFI stub to return of new entry point of
		@ zImage, as EFI stub may copy the kernel. Pointer address
		@ is passed in r2. r0 and r1 are passed through from the
		@ EFI firmware to efi_entry
		adr	ip, _start
		ldr	r3, [ip]
		add	r3, r3, ip
		stmfd	sp!, {r3, lr}
		mov	r2, sp			@ pass zImage address in r2
		bl	efi_entry

		@ Check for error return from EFI stub. r0 has FDT address
		@ or error code.
		cmn	r0, #1
		beq	efi_load_fail

		@ Preserve return value of efi_entry() in r4
		mov	r4, r0
1463
		bl	cache_clean_flush
Roy Franz's avatar
Roy Franz committed
1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485
		bl	cache_off

		@ Set parameters for booting zImage according to boot protocol
		@ put FDT address in r2, it was returned by efi_entry()
		@ r1 is the machine type, and r0 needs to be 0
		mov	r0, #0
		mov	r1, #0xFFFFFFFF
		mov	r2, r4

		@ Branch to (possibly) relocated zImage that is in [sp]
		ldr	lr, [sp]
		ldr	ip, =start_offset
		add	lr, lr, ip
		mov	pc, lr				@ no mode switch

efi_load_fail:
		@ Return EFI_LOAD_ERROR to EFI firmware on error.
		ldr	r0, =0x80000001
		ldmfd	sp!, {ip, pc}
ENDPROC(efi_stub_entry)
#endif

Linus Torvalds's avatar
Linus Torvalds committed
1486
		.align
1487
		.section ".stack", "aw", %nobits
1488 1489
.L_user_stack:	.space	4096
.L_user_stack_end: