[ARM] Convert to constant-optimising udelay() implementation.

This allows us to eliminate a multiplication when we have a constant delay value, as per x86.

[ARM] Convert to constant-optimising udelay() implementation.
This allows us to eliminate a multiplication when we have a constant delay value, as per x86.
a0ae2651 · Russell King · da5508a5 · a0ae2651 · a0ae2651 · a0ae2651
Commit a0ae2651 authored Oct 19, 2004 by Russell King
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 17 deletions

arch/arm/kernel/armksyms.c arch/arm/kernel/armksyms.c +2 -1

arch/arm/lib/delay.S arch/arm/lib/delay.S +7 -6

include/asm-arm/delay.h include/asm-arm/delay.h +20 -10

No files found.
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -60,7 +60,8 @@ EXPORT_SYMBOL_ALIAS(fp_send_sig,send_sig);
 EXPORT_SYMBOL(__backtrace);
 	/* platform dependent support */
-EXPORT_SYMBOL(udelay);
+EXPORT_SYMBOL(__udelay);
+EXPORT_SYMBOL(__const_udelay);
 	/* networking */
 EXPORT_SYMBOL(csum_partial);

--- a/arch/arm/lib/delay.S
+++ b/arch/arm/lib/delay.S
@@ -16,15 +16,16 @@ LC0:		.word	loops_per_jiffy
 /*
 * 0 <= r0 <= 2000
 */
-ENTRY(udelay)
+ENTRY(__udelay)
 		mov	r2,     #0x6800
 		orr	r2, r2, #0x00db
-		mul	r1, r0, r2
+		mul	r0, r2, r0
+ENTRY(__const_udelay)				@ 0 <= r0 <= 0x01ffffff
 		ldr	r2, LC0
-		ldr	r2, [r2]
+		ldr	r2, [r2]		@ max = 0x0fffffff
-		mov	r1, r1, lsr #11
+		mov	r0, r0, lsr #11		@ max = 0x00003fff
-		mov	r2, r2, lsr #11
+		mov	r2, r2, lsr #11		@ max = 0x0003ffff
-		mul	r0, r1, r2
+		mul	r0, r2, r0		@ max = 2^32-1
 		movs	r0, r0, lsr #6
 		RETINSTR(moveq,pc,lr)

--- a/include/asm-arm/delay.h
+++ b/include/asm-arm/delay.h
-#ifndef __ASM_ARM_DELAY_H
-#define __ASM_ARM_DELAY_H
 /*
- * Copyright (C) 1995 Russell King
+ * Copyright (C) 1995-2004 Russell King
 *
 * Delay routines, using a pre-computed "loops_per_second" value.
 */
+#ifndef __ASM_ARM_DELAY_H
+#define __ASM_ARM_DELAY_H
 extern void __delay(int loops);
+/*
+ * This function intentionally does not exist; if you see references to
+ * it, it means that you're calling udelay() with an out of range value.
+ *
+ * With currently imposed limits, this means that we support a max delay
+ * of 2000us and 671 bogomips
+ */
+extern void __bad_udelay(void);
 /*
 * division by multiplication: you don't have to worry about
 * loss of precision.
@@ -19,14 +27,16 @@ extern void __delay(int loops);
 * first constant multiplications gets optimized away if the delay is
 * a constant)
 */
-extern void udelay(unsigned long usecs);
+extern void __udelay(unsigned long usecs);
+extern void __const_udelay(unsigned long);
-static inline unsigned long muldiv(unsigned long a, unsigned long b, unsigned long c)
+#define MAX_UDELAY_MS 2
-{
-	return a * b / c;
-}
+#define udelay(n)						\
+	(__builtin_constant_p(n) ?				\
+	  ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() :	\
+			__const_udelay((n) * 0x68dbul)) :	\
+	  __udelay(n))
 #endif /* defined(_ARM_DELAY_H) */