Commit b20480a9 authored by James Hogan's avatar James Hogan Committed by Stefan Bader

MIPS: Implement __multi3 for GCC7 MIPS64r6 builds

BugLink: http://bugs.launchpad.net/bugs/1764316

commit ebabcf17 upstream.

GCC7 is a bit too eager to generate suboptimal __multi3 calls (128bit
multiply with 128bit result) for MIPS64r6 builds, even in code which
doesn't explicitly use 128bit types, such as the following:

unsigned long func(unsigned long a, unsigned long b)
{
	return a > (~0UL) / b;
}

Which GCC rearanges to:

return (unsigned __int128)a * (unsigned __int128)b > 0xffffffffffffffff;

Therefore implement __multi3, but only for MIPS64r6 with GCC7 as under
normal circumstances we wouldn't expect any calls to __multi3 to be
generated from kernel code.
Reported-by: default avatarThomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: default avatarJames Hogan <jhogan@kernel.org>
Tested-by: default avatarWaldemar Brodkorb <wbx@openadk.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Maciej W. Rozycki <macro@mips.com>
Cc: Matthew Fortune <matthew.fortune@mips.com>
Cc: Florian Fainelli <florian@openwrt.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/17890/
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: default avatarJuerg Haefliger <juergh@canonical.com>
Signed-off-by: default avatarStefan Bader <stefan.bader@canonical.com>
parent e3c0730f
...@@ -15,4 +15,5 @@ obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o ...@@ -15,4 +15,5 @@ obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o
obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o
# libgcc-style stuff needed in the kernel # libgcc-style stuff needed in the kernel
obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o ucmpdi2.o obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o multi3.o \
ucmpdi2.o
...@@ -9,10 +9,18 @@ typedef int word_type __attribute__ ((mode (__word__))); ...@@ -9,10 +9,18 @@ typedef int word_type __attribute__ ((mode (__word__)));
struct DWstruct { struct DWstruct {
int high, low; int high, low;
}; };
struct TWstruct {
long long high, low;
};
#elif defined(__LITTLE_ENDIAN) #elif defined(__LITTLE_ENDIAN)
struct DWstruct { struct DWstruct {
int low, high; int low, high;
}; };
struct TWstruct {
long long low, high;
};
#else #else
#error I feel sick. #error I feel sick.
#endif #endif
...@@ -22,4 +30,13 @@ typedef union { ...@@ -22,4 +30,13 @@ typedef union {
long long ll; long long ll;
} DWunion; } DWunion;
#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6)
typedef int ti_type __attribute__((mode(TI)));
typedef union {
struct TWstruct s;
ti_type ti;
} TWunion;
#endif
#endif /* __ASM_LIBGCC_H */ #endif /* __ASM_LIBGCC_H */
// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include "libgcc.h"
/*
* GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that
* specific case only we'll implement it here.
*
* See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981
*/
#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7)
/* multiply 64-bit values, low 64-bits returned */
static inline long long notrace dmulu(long long a, long long b)
{
long long res;
asm ("dmulu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
return res;
}
/* multiply 64-bit unsigned values, high 64-bits of 128-bit result returned */
static inline long long notrace dmuhu(long long a, long long b)
{
long long res;
asm ("dmuhu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
return res;
}
/* multiply 128-bit values, low 128-bits returned */
ti_type notrace __multi3(ti_type a, ti_type b)
{
TWunion res, aa, bb;
aa.ti = a;
bb.ti = b;
/*
* a * b = (a.lo * b.lo)
* + 2^64 * (a.hi * b.lo + a.lo * b.hi)
* [+ 2^128 * (a.hi * b.hi)]
*/
res.s.low = dmulu(aa.s.low, bb.s.low);
res.s.high = dmuhu(aa.s.low, bb.s.low);
res.s.high += dmulu(aa.s.high, bb.s.low);
res.s.high += dmulu(aa.s.low, bb.s.high);
return res.ti;
}
EXPORT_SYMBOL(__multi3);
#endif /* 64BIT && CPU_MIPSR6 && GCC7 */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment