Commit 7e0fb73c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'hash' of git://ftp.sciencehorizons.net/linux

Pull string hash improvements from George Spelvin:
 "This series does several related things:

   - Makes the dcache hash (fs/namei.c) useful for general kernel use.

     (Thanks to Bruce for noticing the zero-length corner case)

   - Converts the string hashes in <linux/sunrpc/svcauth.h> to use the
     above.

   - Avoids 64-bit multiplies in hash_64() on 32-bit platforms.  Two
     32-bit multiplies will do well enough.

   - Rids the world of the bad hash multipliers in hash_32.

     This finishes the job started in commit 689de1d6 ("Minimal
     fix-up of bad hashing behavior of hash_64()")

     The vast majority of Linux architectures have hardware support for
     32x32-bit multiply and so derive no benefit from "simplified"
     multipliers.

     The few processors that do not (68000, h8/300 and some models of
     Microblaze) have arch-specific implementations added.  Those
     patches are last in the series.

   - Overhauls the dcache hash mixing.

     The patch in commit 0fed3ac8 ("namei: Improve hash mixing if
     CONFIG_DCACHE_WORD_ACCESS") was an off-the-cuff suggestion.
     Replaced with a much more careful design that's simultaneously
     faster and better.  (My own invention, as there was noting suitable
     in the literature I could find.  Comments welcome!)

   - Modify the hash_name() loop to skip the initial HASH_MIX().  This
     would let us salt the hash if we ever wanted to.

   - Sort out partial_name_hash().

     The hash function is declared as using a long state, even though
     it's truncated to 32 bits at the end and the extra internal state
     contributes nothing to the result.  And some callers do odd things:

      - fs/hfs/string.c only allocates 32 bits of state
      - fs/hfsplus/unicode.c uses it to hash 16-bit unicode symbols not bytes

   - Modify bytemask_from_count to handle inputs of 1..sizeof(long)
     rather than 0..sizeof(long)-1.  This would simplify users other
     than full_name_hash"

  Special thanks to Bruce Fields for testing and finding bugs in v1.  (I
  learned some humbling lessons about "obviously correct" code.)

  On the arch-specific front, the m68k assembly has been tested in a
  standalone test harness, I've been in contact with the Microblaze
  maintainers who mostly don't care, as the hardware multiplier is never
  omitted in real-world applications, and I haven't heard anything from
  the H8/300 world"

* 'hash' of git://ftp.sciencehorizons.net/linux:
  h8300: Add <asm/hash.h>
  microblaze: Add <asm/hash.h>
  m68k: Add <asm/hash.h>
  <linux/hash.h>: Add support for architecture-specific functions
  fs/namei.c: Improve dcache hash function
  Eliminate bad hash multipliers from hash_32() and  hash_64()
  Change hash_64() return value to 32 bits
  <linux/sunrpc/svcauth.h>: Define hash_str() in terms of hashlen_string()
  fs/namei.c: Add hashlen_string() function
  Pull out string hash to <linux/stringhash.h>
parents 4e8440b3 4684fe95
...@@ -598,6 +598,14 @@ config HAVE_STACK_VALIDATION ...@@ -598,6 +598,14 @@ config HAVE_STACK_VALIDATION
Architecture supports the 'objtool check' host tool command, which Architecture supports the 'objtool check' host tool command, which
performs compile-time stack metadata validation. performs compile-time stack metadata validation.
config HAVE_ARCH_HASH
bool
default n
help
If this is set, the architecture provides an <asm/hash.h>
file which provides platform-specific implementations of some
functions in <linux/hash.h> or fs/namei.c.
# #
# ABI hall of shame # ABI hall of shame
# #
......
...@@ -20,6 +20,7 @@ config H8300 ...@@ -20,6 +20,7 @@ config H8300
select HAVE_KERNEL_GZIP select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO select HAVE_KERNEL_LZO
select HAVE_ARCH_KGDB select HAVE_ARCH_KGDB
select HAVE_ARCH_HASH
select CPU_NO_EFFICIENT_FFS select CPU_NO_EFFICIENT_FFS
config RWSEM_GENERIC_SPINLOCK config RWSEM_GENERIC_SPINLOCK
......
#ifndef _ASM_HASH_H
#define _ASM_HASH_H
/*
* The later H8SX models have a 32x32-bit multiply, but the H8/300H
* and H8S have only 16x16->32. Since it's tolerably compact, this is
* basically an inlined version of the __mulsi3 code. Since the inputs
* are not expected to be small, it's also simplfied by skipping the
* early-out checks.
*
* (Since neither CPU has any multi-bit shift instructions, a
* shift-and-add version is a non-starter.)
*
* TODO: come up with an arch-specific version of the hashing in fs/namei.c,
* since that is heavily dependent on rotates. Which, as mentioned, suck
* horribly on H8.
*/
#if defined(CONFIG_CPU_H300H) || defined(CONFIG_CPU_H8S)
#define HAVE_ARCH__HASH_32 1
/*
* Multiply by k = 0x61C88647. Fitting this into three registers requires
* one extra instruction, but reducing register pressure will probably
* make that back and then some.
*
* GCC asm note: %e1 is the high half of operand %1, while %f1 is the
* low half. So if %1 is er4, then %e1 is e4 and %f1 is r4.
*
* This has been designed to modify x in place, since that's the most
* common usage, but preserve k, since hash_64() makes two calls in
* quick succession.
*/
static inline u32 __attribute_const__ __hash_32(u32 x)
{
u32 temp;
asm( "mov.w %e1,%f0"
"\n mulxu.w %f2,%0" /* klow * xhigh */
"\n mov.w %f0,%e1" /* The extra instruction */
"\n mov.w %f1,%f0"
"\n mulxu.w %e2,%0" /* khigh * xlow */
"\n add.w %e1,%f0"
"\n mulxu.w %f2,%1" /* klow * xlow */
"\n add.w %f0,%e1"
: "=&r" (temp), "=r" (x)
: "%r" (GOLDEN_RATIO_32), "1" (x));
return x;
}
#endif
#endif /* _ASM_HASH_H */
...@@ -41,6 +41,7 @@ config M68000 ...@@ -41,6 +41,7 @@ config M68000
select CPU_HAS_NO_UNALIGNED select CPU_HAS_NO_UNALIGNED
select GENERIC_CSUM select GENERIC_CSUM
select CPU_NO_EFFICIENT_FFS select CPU_NO_EFFICIENT_FFS
select HAVE_ARCH_HASH
help help
The Freescale (was Motorola) 68000 CPU is the first generation of The Freescale (was Motorola) 68000 CPU is the first generation of
the well known M68K family of processors. The CPU core as well as the well known M68K family of processors. The CPU core as well as
......
#ifndef _ASM_HASH_H
#define _ASM_HASH_H
/*
* If CONFIG_M68000=y (original mc68000/010), this file is #included
* to work around the lack of a MULU.L instruction.
*/
#define HAVE_ARCH__HASH_32 1
/*
* While it would be legal to substitute a different hash operation
* entirely, let's keep it simple and just use an optimized multiply
* by GOLDEN_RATIO_32 = 0x61C88647.
*
* The best way to do that appears to be to multiply by 0x8647 with
* shifts and adds, and use mulu.w to multiply the high half by 0x61C8.
*
* Because the 68000 has multi-cycle shifts, this addition chain is
* chosen to minimise the shift distances.
*
* Despite every attempt to spoon-feed it simple operations, GCC
* 6.1.1 doggedly insists on doing annoying things like converting
* "lsl.l #2,<reg>" (12 cycles) to two adds (8+8 cycles).
*
* It also likes to notice two shifts in a row, like "a = x << 2" and
* "a <<= 7", and convert that to "a = x << 9". But shifts longer
* than 8 bits are extra-slow on m68k, so that's a lose.
*
* Since the 68000 is a very simple in-order processor with no
* instruction scheduling effects on execution time, we can safely
* take it out of GCC's hands and write one big asm() block.
*
* Without calling overhead, this operation is 30 bytes (14 instructions
* plus one immediate constant) and 166 cycles.
*
* (Because %2 is fetched twice, it can't be postincrement, and thus it
* can't be a fully general "g" or "m". Register is preferred, but
* offsettable memory or immediate will work.)
*/
static inline u32 __attribute_const__ __hash_32(u32 x)
{
u32 a, b;
asm( "move.l %2,%0" /* a = x * 0x0001 */
"\n lsl.l #2,%0" /* a = x * 0x0004 */
"\n move.l %0,%1"
"\n lsl.l #7,%0" /* a = x * 0x0200 */
"\n add.l %2,%0" /* a = x * 0x0201 */
"\n add.l %0,%1" /* b = x * 0x0205 */
"\n add.l %0,%0" /* a = x * 0x0402 */
"\n add.l %0,%1" /* b = x * 0x0607 */
"\n lsl.l #5,%0" /* a = x * 0x8040 */
: "=&d,d" (a), "=&r,r" (b)
: "r,roi?" (x)); /* a+b = x*0x8647 */
return ((u16)(x*0x61c8) << 16) + a + b;
}
#endif /* _ASM_HASH_H */
...@@ -16,6 +16,7 @@ config MICROBLAZE ...@@ -16,6 +16,7 @@ config MICROBLAZE
select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW
select GENERIC_PCI_IOMAP select GENERIC_PCI_IOMAP
select GENERIC_SCHED_CLOCK select GENERIC_SCHED_CLOCK
select HAVE_ARCH_HASH
select HAVE_ARCH_KGDB select HAVE_ARCH_KGDB
select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG select HAVE_DMA_API_DEBUG
......
#ifndef _ASM_HASH_H
#define _ASM_HASH_H
/*
* Fortunately, most people who want to run Linux on Microblaze enable
* both multiplier and barrel shifter, but omitting them is technically
* a supported configuration.
*
* With just a barrel shifter, we can implement an efficient constant
* multiply using shifts and adds. GCC can find a 9-step solution, but
* this 6-step solution was found by Yevgen Voronenko's implementation
* of the Hcub algorithm at http://spiral.ece.cmu.edu/mcm/gen.html.
*
* That software is really not designed for a single multiplier this large,
* but if you run it enough times with different seeds, it'll find several
* 6-shift, 6-add sequences for computing x * 0x61C88647. They are all
* c = (x << 19) + x;
* a = (x << 9) + c;
* b = (x << 23) + a;
* return (a<<11) + (b<<6) + (c<<3) - b;
* with variations on the order of the final add.
*
* Without even a shifter, it's hopless; any hash function will suck.
*/
#if CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL == 0
#define HAVE_ARCH__HASH_32 1
/* Multiply by GOLDEN_RATIO_32 = 0x61C88647 */
static inline u32 __attribute_const__ __hash_32(u32 a)
{
#if CONFIG_XILINX_MICROBLAZE0_USE_BARREL
unsigned int b, c;
/* Phase 1: Compute three intermediate values */
b = a << 23;
c = (a << 19) + a;
a = (a << 9) + c;
b += a;
/* Phase 2: Compute (a << 11) + (b << 6) + (c << 3) - b */
a <<= 5;
a += b; /* (a << 5) + b */
a <<= 3;
a += c; /* (a << 8) + (b << 3) + c */
a <<= 3;
return a - b; /* (a << 11) + (b << 6) + (c << 3) - b */
#else
/*
* "This is really going to hurt."
*
* Without a barrel shifter, left shifts are implemented as
* repeated additions, and the best we can do is an optimal
* addition-subtraction chain. This one is not known to be
* optimal, but at 37 steps, it's decent for a 31-bit multiplier.
*
* Question: given its size (37*4 = 148 bytes per instance),
* and slowness, is this worth having inline?
*/
unsigned int b, c, d;
b = a << 4; /* 4 */
c = b << 1; /* 1 5 */
b += a; /* 1 6 */
c += b; /* 1 7 */
c <<= 3; /* 3 10 */
c -= a; /* 1 11 */
d = c << 7; /* 7 18 */
d += b; /* 1 19 */
d <<= 8; /* 8 27 */
d += a; /* 1 28 */
d <<= 1; /* 1 29 */
d += b; /* 1 30 */
d <<= 6; /* 6 36 */
return d + c; /* 1 37 total instructions*/
#endif
}
#endif /* !CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL */
#endif /* _ASM_HASH_H */
...@@ -398,6 +398,8 @@ static int af9015_download_firmware(struct dvb_usb_device *d, ...@@ -398,6 +398,8 @@ static int af9015_download_firmware(struct dvb_usb_device *d,
} }
#define AF9015_EEPROM_SIZE 256 #define AF9015_EEPROM_SIZE 256
/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
#define GOLDEN_RATIO_PRIME_32 0x9e370001UL
/* hash (and dump) eeprom */ /* hash (and dump) eeprom */
static int af9015_eeprom_hash(struct dvb_usb_device *d) static int af9015_eeprom_hash(struct dvb_usb_device *d)
......
...@@ -1670,8 +1670,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) ...@@ -1670,8 +1670,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
struct qstr q; struct qstr q;
q.name = name; q.name = name;
q.len = strlen(name); q.hash_len = hashlen_string(name);
q.hash = full_name_hash(q.name, q.len);
return d_alloc(parent, &q); return d_alloc(parent, &q);
} }
EXPORT_SYMBOL(d_alloc_name); EXPORT_SYMBOL(d_alloc_name);
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <linux/fs_struct.h> #include <linux/fs_struct.h>
#include <linux/posix_acl.h> #include <linux/posix_acl.h>
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/bitops.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include "internal.h" #include "internal.h"
...@@ -1797,74 +1798,144 @@ static int walk_component(struct nameidata *nd, int flags) ...@@ -1797,74 +1798,144 @@ static int walk_component(struct nameidata *nd, int flags)
#include <asm/word-at-a-time.h> #include <asm/word-at-a-time.h>
#ifdef CONFIG_64BIT #ifdef HASH_MIX
static inline unsigned int fold_hash(unsigned long hash) /* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */
{
return hash_64(hash, 32);
}
#elif defined(CONFIG_64BIT)
/* /*
* This is George Marsaglia's XORSHIFT generator. * Register pressure in the mixing function is an issue, particularly
* It implements a maximum-period LFSR in only a few * on 32-bit x86, but almost any function requires one state value and
* instructions. It also has the property (required * one temporary. Instead, use a function designed for two state values
* by hash_name()) that mix_hash(0) = 0. * and no temporaries.
*
* This function cannot create a collision in only two iterations, so
* we have two iterations to achieve avalanche. In those two iterations,
* we have six layers of mixing, which is enough to spread one bit's
* influence out to 2^6 = 64 state bits.
*
* Rotate constants are scored by considering either 64 one-bit input
* deltas or 64*63/2 = 2016 two-bit input deltas, and finding the
* probability of that delta causing a change to each of the 128 output
* bits, using a sample of random initial states.
*
* The Shannon entropy of the computed probabilities is then summed
* to produce a score. Ideally, any input change has a 50% chance of
* toggling any given output bit.
*
* Mixing scores (in bits) for (12,45):
* Input delta: 1-bit 2-bit
* 1 round: 713.3 42542.6
* 2 rounds: 2753.7 140389.8
* 3 rounds: 5954.1 233458.2
* 4 rounds: 7862.6 256672.2
* Perfect: 8192 258048
* (64*128) (64*63/2 * 128)
*/ */
static inline unsigned long mix_hash(unsigned long hash) #define HASH_MIX(x, y, a) \
( x ^= (a), \
y ^= x, x = rol64(x,12),\
x += y, y = rol64(y,45),\
y *= 9 )
/*
* Fold two longs into one 32-bit hash value. This must be fast, but
* latency isn't quite as critical, as there is a fair bit of additional
* work done before the hash value is used.
*/
static inline unsigned int fold_hash(unsigned long x, unsigned long y)
{ {
hash ^= hash << 13; y ^= x * GOLDEN_RATIO_64;
hash ^= hash >> 7; y *= GOLDEN_RATIO_64;
hash ^= hash << 17; return y >> 32;
return hash;
} }
#else /* 32-bit case */ #else /* 32-bit case */
#define fold_hash(x) (x) /*
* Mixing scores (in bits) for (7,20):
* Input delta: 1-bit 2-bit
* 1 round: 330.3 9201.6
* 2 rounds: 1246.4 25475.4
* 3 rounds: 1907.1 31295.1
* 4 rounds: 2042.3 31718.6
* Perfect: 2048 31744
* (32*64) (32*31/2 * 64)
*/
#define HASH_MIX(x, y, a) \
( x ^= (a), \
y ^= x, x = rol32(x, 7),\
x += y, y = rol32(y,20),\
y *= 9 )
static inline unsigned long mix_hash(unsigned long hash) static inline unsigned int fold_hash(unsigned long x, unsigned long y)
{ {
hash ^= hash << 13; /* Use arch-optimized multiply if one exists */
hash ^= hash >> 17; return __hash_32(y ^ __hash_32(x));
hash ^= hash << 5;
return hash;
} }
#endif #endif
unsigned int full_name_hash(const unsigned char *name, unsigned int len) /*
* Return the hash of a string of known length. This is carfully
* designed to match hash_name(), which is the more critical function.
* In particular, we must end by hashing a final word containing 0..7
* payload bytes, to match the way that hash_name() iterates until it
* finds the delimiter after the name.
*/
unsigned int full_name_hash(const char *name, unsigned int len)
{ {
unsigned long a, hash = 0; unsigned long a, x = 0, y = 0;
for (;;) { for (;;) {
if (!len)
goto done;
a = load_unaligned_zeropad(name); a = load_unaligned_zeropad(name);
if (len < sizeof(unsigned long)) if (len < sizeof(unsigned long))
break; break;
hash = mix_hash(hash + a); HASH_MIX(x, y, a);
name += sizeof(unsigned long); name += sizeof(unsigned long);
len -= sizeof(unsigned long); len -= sizeof(unsigned long);
if (!len)
goto done;
} }
hash += a & bytemask_from_count(len); x ^= a & bytemask_from_count(len);
done: done:
return fold_hash(hash); return fold_hash(x, y);
} }
EXPORT_SYMBOL(full_name_hash); EXPORT_SYMBOL(full_name_hash);
/* Return the "hash_len" (hash and length) of a null-terminated string */
u64 hashlen_string(const char *name)
{
unsigned long a = 0, x = 0, y = 0, adata, mask, len;
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
len = -sizeof(unsigned long);
do {
HASH_MIX(x, y, a);
len += sizeof(unsigned long);
a = load_unaligned_zeropad(name+len);
} while (!has_zero(a, &adata, &constants));
adata = prep_zero_mask(a, adata, &constants);
mask = create_zero_mask(adata);
x ^= a & zero_bytemask(mask);
return hashlen_create(fold_hash(x, y), len + find_zero(mask));
}
EXPORT_SYMBOL(hashlen_string);
/* /*
* Calculate the length and hash of the path component, and * Calculate the length and hash of the path component, and
* return the "hash_len" as the result. * return the "hash_len" as the result.
*/ */
static inline u64 hash_name(const char *name) static inline u64 hash_name(const char *name)
{ {
unsigned long a, b, adata, bdata, mask, hash, len; unsigned long a = 0, b, x = 0, y = 0, adata, bdata, mask, len;
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
hash = a = 0;
len = -sizeof(unsigned long); len = -sizeof(unsigned long);
do { do {
hash = mix_hash(hash + a); HASH_MIX(x, y, a);
len += sizeof(unsigned long); len += sizeof(unsigned long);
a = load_unaligned_zeropad(name+len); a = load_unaligned_zeropad(name+len);
b = a ^ REPEAT_BYTE('/'); b = a ^ REPEAT_BYTE('/');
...@@ -1872,25 +1943,40 @@ static inline u64 hash_name(const char *name) ...@@ -1872,25 +1943,40 @@ static inline u64 hash_name(const char *name)
adata = prep_zero_mask(a, adata, &constants); adata = prep_zero_mask(a, adata, &constants);
bdata = prep_zero_mask(b, bdata, &constants); bdata = prep_zero_mask(b, bdata, &constants);
mask = create_zero_mask(adata | bdata); mask = create_zero_mask(adata | bdata);
x ^= a & zero_bytemask(mask);
hash += a & zero_bytemask(mask); return hashlen_create(fold_hash(x, y), len + find_zero(mask));
len += find_zero(mask);
return hashlen_create(fold_hash(hash), len);
} }
#else #else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */
unsigned int full_name_hash(const unsigned char *name, unsigned int len) /* Return the hash of a string of known length */
unsigned int full_name_hash(const char *name, unsigned int len)
{ {
unsigned long hash = init_name_hash(); unsigned long hash = init_name_hash();
while (len--) while (len--)
hash = partial_name_hash(*name++, hash); hash = partial_name_hash((unsigned char)*name++, hash);
return end_name_hash(hash); return end_name_hash(hash);
} }
EXPORT_SYMBOL(full_name_hash); EXPORT_SYMBOL(full_name_hash);
/* Return the "hash_len" (hash and length) of a null-terminated string */
u64 hash_string(const char *name)
{
unsigned long hash = init_name_hash();
unsigned long len = 0, c;
c = (unsigned char)*name;
do {
len++;
hash = partial_name_hash(c, hash);
c = (unsigned char)name[len];
} while (c);
return hashlen_create(end_name_hash(hash), len);
}
EXPORT_SYMBOL(hash_string);
/* /*
* We know there's a real path component here of at least * We know there's a real path component here of at least
* one character. * one character.
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/lockref.h> #include <linux/lockref.h>
#include <linux/stringhash.h>
struct path; struct path;
struct vfsmount; struct vfsmount;
...@@ -52,9 +53,6 @@ struct qstr { ...@@ -52,9 +53,6 @@ struct qstr {
}; };
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n } #define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
#define hashlen_hash(hashlen) ((u32) (hashlen))
#define hashlen_len(hashlen) ((u32)((hashlen) >> 32))
#define hashlen_create(hash,len) (((u64)(len)<<32)|(u32)(hash))
struct dentry_stat_t { struct dentry_stat_t {
long nr_dentry; long nr_dentry;
...@@ -65,29 +63,6 @@ struct dentry_stat_t { ...@@ -65,29 +63,6 @@ struct dentry_stat_t {
}; };
extern struct dentry_stat_t dentry_stat; extern struct dentry_stat_t dentry_stat;
/* Name hashing routines. Initial hash value */
/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
#define init_name_hash() 0
/* partial hash update function. Assume roughly 4 bits per character */
static inline unsigned long
partial_name_hash(unsigned long c, unsigned long prevhash)
{
return (prevhash + (c << 4) + (c >> 4)) * 11;
}
/*
* Finally: cut down the number of bits to a int value (and try to avoid
* losing bits)
*/
static inline unsigned long end_name_hash(unsigned long hash)
{
return (unsigned int) hash;
}
/* Compute the hash for a name string. */
extern unsigned int full_name_hash(const unsigned char *, unsigned int);
/* /*
* Try to keep struct dentry aligned on 64 byte cachelines (this will * Try to keep struct dentry aligned on 64 byte cachelines (this will
* give reasonable cacheline footprint with larger lines without the * give reasonable cacheline footprint with larger lines without the
......
...@@ -3,92 +3,94 @@ ...@@ -3,92 +3,94 @@
/* Fast hashing routine for ints, longs and pointers. /* Fast hashing routine for ints, longs and pointers.
(C) 2002 Nadia Yvette Chambers, IBM */ (C) 2002 Nadia Yvette Chambers, IBM */
/*
* Knuth recommends primes in approximately golden ratio to the maximum
* integer representable by a machine word for multiplicative hashing.
* Chuck Lever verified the effectiveness of this technique:
* http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
*
* These primes are chosen to be bit-sparse, that is operations on
* them can use shifts and additions instead of multiplications for
* machines where multiplications are slow.
*/
#include <asm/types.h> #include <asm/types.h>
#include <linux/compiler.h> #include <linux/compiler.h>
/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ /*
#define GOLDEN_RATIO_PRIME_32 0x9e370001UL * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and
/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ * fs/inode.c. It's not actually prime any more (the previous primes
#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL * were actively bad for hashing), but the name remains.
*/
#if BITS_PER_LONG == 32 #if BITS_PER_LONG == 32
#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32 #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32
#define hash_long(val, bits) hash_32(val, bits) #define hash_long(val, bits) hash_32(val, bits)
#elif BITS_PER_LONG == 64 #elif BITS_PER_LONG == 64
#define hash_long(val, bits) hash_64(val, bits) #define hash_long(val, bits) hash_64(val, bits)
#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64 #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64
#else #else
#error Wordsize not 32 or 64 #error Wordsize not 32 or 64
#endif #endif
/* /*
* The above primes are actively bad for hashing, since they are * This hash multiplies the input by a large odd number and takes the
* too sparse. The 32-bit one is mostly ok, the 64-bit one causes * high bits. Since multiplication propagates changes to the most
* real problems. Besides, the "prime" part is pointless for the * significant end only, it is essential that the high bits of the
* multiplicative hash. * product be used for the hash value.
*
* Chuck Lever verified the effectiveness of this technique:
* http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
* *
* Although a random odd number will do, it turns out that the golden * Although a random odd number will do, it turns out that the golden
* ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
* properties. * properties. (See Knuth vol 3, section 6.4, exercise 9.)
* *
* These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2. * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2,
* (See Knuth vol 3, section 6.4, exercise 9.) * which is very slightly easier to multiply by and makes no
* difference to the hash distribution.
*/ */
#define GOLDEN_RATIO_32 0x61C88647 #define GOLDEN_RATIO_32 0x61C88647
#define GOLDEN_RATIO_64 0x61C8864680B583EBull #define GOLDEN_RATIO_64 0x61C8864680B583EBull
static __always_inline u64 hash_64(u64 val, unsigned int bits) #ifdef CONFIG_HAVE_ARCH_HASH
{ /* This header may use the GOLDEN_RATIO_xx constants */
u64 hash = val; #include <asm/hash.h>
#endif
#if BITS_PER_LONG == 64 /*
hash = hash * GOLDEN_RATIO_64; * The _generic versions exist only so lib/test_hash.c can compare
#else * the arch-optimized versions with the generic.
/* Sigh, gcc can't optimise this alone like it does for 32 bits. */ *
u64 n = hash; * Note that if you change these, any <asm/hash.h> that aren't updated
n <<= 18; * to match need to have their HAVE_ARCH_* define values updated so the
hash -= n; * self-test will not false-positive.
n <<= 33; */
hash -= n; #ifndef HAVE_ARCH__HASH_32
n <<= 3; #define __hash_32 __hash_32_generic
hash += n;
n <<= 3;
hash -= n;
n <<= 4;
hash += n;
n <<= 2;
hash += n;
#endif #endif
static inline u32 __hash_32_generic(u32 val)
{
return val * GOLDEN_RATIO_32;
}
#ifndef HAVE_ARCH_HASH_32
#define hash_32 hash_32_generic
#endif
static inline u32 hash_32_generic(u32 val, unsigned int bits)
{
/* High bits are more random, so use them. */ /* High bits are more random, so use them. */
return hash >> (64 - bits); return __hash_32(val) >> (32 - bits);
} }
static inline u32 hash_32(u32 val, unsigned int bits) #ifndef HAVE_ARCH_HASH_64
#define hash_64 hash_64_generic
#endif
static __always_inline u32 hash_64_generic(u64 val, unsigned int bits)
{ {
/* On some cpus multiply is faster, on others gcc will do shifts */ #if BITS_PER_LONG == 64
u32 hash = val * GOLDEN_RATIO_PRIME_32; /* 64x64-bit multiply is efficient on all 64-bit processors */
return val * GOLDEN_RATIO_64 >> (64 - bits);
/* High bits are more random, so use them. */ #else
return hash >> (32 - bits); /* Hash 64 bits using only 32x32-bit multiply. */
return hash_32((u32)val ^ __hash_32(val >> 32), bits);
#endif
} }
static inline unsigned long hash_ptr(const void *ptr, unsigned int bits) static inline u32 hash_ptr(const void *ptr, unsigned int bits)
{ {
return hash_long((unsigned long)ptr, bits); return hash_long((unsigned long)ptr, bits);
} }
/* This really should be called fold32_ptr; it does no hashing to speak of. */
static inline u32 hash32_ptr(const void *ptr) static inline u32 hash32_ptr(const void *ptr)
{ {
unsigned long val = (unsigned long)ptr; unsigned long val = (unsigned long)ptr;
......
#ifndef __LINUX_STRINGHASH_H
#define __LINUX_STRINGHASH_H
#include <linux/compiler.h> /* For __pure */
#include <linux/types.h> /* For u32, u64 */
/*
* Routines for hashing strings of bytes to a 32-bit hash value.
*
* These hash functions are NOT GUARANTEED STABLE between kernel
* versions, architectures, or even repeated boots of the same kernel.
* (E.g. they may depend on boot-time hardware detection or be
* deliberately randomized.)
*
* They are also not intended to be secure against collisions caused by
* malicious inputs; much slower hash functions are required for that.
*
* They are optimized for pathname components, meaning short strings.
* Even if a majority of files have longer names, the dynamic profile of
* pathname components skews short due to short directory names.
* (E.g. /usr/lib/libsesquipedalianism.so.3.141.)
*/
/*
* Version 1: one byte at a time. Example of use:
*
* unsigned long hash = init_name_hash;
* while (*p)
* hash = partial_name_hash(tolower(*p++), hash);
* hash = end_name_hash(hash);
*
* Although this is designed for bytes, fs/hfsplus/unicode.c
* abuses it to hash 16-bit values.
*/
/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
#define init_name_hash() 0
/* partial hash update function. Assume roughly 4 bits per character */
static inline unsigned long
partial_name_hash(unsigned long c, unsigned long prevhash)
{
return (prevhash + (c << 4) + (c >> 4)) * 11;
}
/*
* Finally: cut down the number of bits to a int value (and try to avoid
* losing bits)
*/
static inline unsigned long end_name_hash(unsigned long hash)
{
return (unsigned int)hash;
}
/*
* Version 2: One word (32 or 64 bits) at a time.
* If CONFIG_DCACHE_WORD_ACCESS is defined (meaning <asm/word-at-a-time.h>
* exists, which describes major Linux platforms like x86 and ARM), then
* this computes a different hash function much faster.
*
* If not set, this falls back to a wrapper around the preceding.
*/
extern unsigned int __pure full_name_hash(const char *, unsigned int);
/*
* A hash_len is a u64 with the hash of a string in the low
* half and the length in the high half.
*/
#define hashlen_hash(hashlen) ((u32)(hashlen))
#define hashlen_len(hashlen) ((u32)((hashlen) >> 32))
#define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash))
/* Return the "hash_len" (hash and length) of a null-terminated string */
extern u64 __pure hashlen_string(const char *name);
#endif /* __LINUX_STRINGHASH_H */
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/sunrpc/cache.h> #include <linux/sunrpc/cache.h>
#include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/gss_api.h>
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/stringhash.h>
#include <linux/cred.h> #include <linux/cred.h>
struct svc_cred { struct svc_cred {
...@@ -165,41 +166,18 @@ extern int svcauth_unix_set_client(struct svc_rqst *rqstp); ...@@ -165,41 +166,18 @@ extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
extern int unix_gid_cache_create(struct net *net); extern int unix_gid_cache_create(struct net *net);
extern void unix_gid_cache_destroy(struct net *net); extern void unix_gid_cache_destroy(struct net *net);
static inline unsigned long hash_str(char *name, int bits) /*
* The <stringhash.h> functions are good enough that we don't need to
* use hash_32() on them; just extracting the high bits is enough.
*/
static inline unsigned long hash_str(char const *name, int bits)
{ {
unsigned long hash = 0; return hashlen_hash(hashlen_string(name)) >> (32 - bits);
unsigned long l = 0;
int len = 0;
unsigned char c;
do {
if (unlikely(!(c = *name++))) {
c = (char)len; len = -1;
}
l = (l << 8) | c;
len++;
if ((len & (BITS_PER_LONG/8-1))==0)
hash = hash_long(hash^l, BITS_PER_LONG);
} while (len);
return hash >> (BITS_PER_LONG - bits);
} }
static inline unsigned long hash_mem(char *buf, int length, int bits) static inline unsigned long hash_mem(char const *buf, int length, int bits)
{ {
unsigned long hash = 0; return full_name_hash(buf, length) >> (32 - bits);
unsigned long l = 0;
int len = 0;
unsigned char c;
do {
if (len == length) {
c = (char)len; len = -1;
} else
c = *buf++;
l = (l << 8) | c;
len++;
if ((len & (BITS_PER_LONG/8-1))==0)
hash = hash_long(hash^l, BITS_PER_LONG);
} while (len);
return hash >> (BITS_PER_LONG - bits);
} }
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -1849,6 +1849,17 @@ config TEST_RHASHTABLE ...@@ -1849,6 +1849,17 @@ config TEST_RHASHTABLE
If unsure, say N. If unsure, say N.
config TEST_HASH
tristate "Perform selftest on hash functions"
default n
help
Enable this option to test the kernel's integer (<linux/hash,h>)
and string (<linux/stringhash.h>) hash functions on boot
(or module load).
This is intended to help people writing architecture-specific
optimized versions. If unsure, say N.
endmenu # runtime tests endmenu # runtime tests
config PROVIDE_OHCI1394_DMA_INIT config PROVIDE_OHCI1394_DMA_INIT
......
...@@ -48,6 +48,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o ...@@ -48,6 +48,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
obj-y += kstrtox.o obj-y += kstrtox.o
obj-$(CONFIG_TEST_BPF) += test_bpf.o obj-$(CONFIG_TEST_BPF) += test_bpf.o
obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
obj-$(CONFIG_TEST_HASH) += test_hash.o
obj-$(CONFIG_TEST_KASAN) += test_kasan.o obj-$(CONFIG_TEST_KASAN) += test_kasan.o
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
obj-$(CONFIG_TEST_LKM) += test_module.o obj-$(CONFIG_TEST_LKM) += test_module.o
......
/*
* Test cases for <linux/hash.h> and <linux/stringhash.h>
* This just verifies that various ways of computing a hash
* produce the same thing and, for cases where a k-bit hash
* value is requested, is of the requested size.
*
* We fill a buffer with a 255-byte null-terminated string,
* and use both full_name_hash() and hashlen_string() to hash the
* substrings from i to j, where 0 <= i < j < 256.
*
* The returned values are used to check that __hash_32() and
* __hash_32_generic() compute the same thing. Likewise hash_32()
* and hash_64().
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt "\n"
#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/hash.h>
#include <linux/stringhash.h>
#include <linux/printk.h>
/* 32-bit XORSHIFT generator. Seed must not be zero. */
static u32 __init __attribute_const__
xorshift(u32 seed)
{
seed ^= seed << 13;
seed ^= seed >> 17;
seed ^= seed << 5;
return seed;
}
/* Given a non-zero x, returns a non-zero byte. */
static u8 __init __attribute_const__
mod255(u32 x)
{
x = (x & 0xffff) + (x >> 16); /* 1 <= x <= 0x1fffe */
x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0x2fd */
x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0x100 */
x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0xff */
return x;
}
/* Fill the buffer with non-zero bytes. */
static void __init
fill_buf(char *buf, size_t len, u32 seed)
{
size_t i;
for (i = 0; i < len; i++) {
seed = xorshift(seed);
buf[i] = mod255(seed);
}
}
/*
* Test the various integer hash functions. h64 (or its low-order bits)
* is the integer to hash. hash_or accumulates the OR of the hash values,
* which are later checked to see that they cover all the requested bits.
*
* Because these functions (as opposed to the string hashes) are all
* inline, the code being tested is actually in the module, and you can
* recompile and re-test the module without rebooting.
*/
static bool __init
test_int_hash(unsigned long long h64, u32 hash_or[2][33])
{
int k;
u32 h0 = (u32)h64, h1, h2;
/* Test __hash32 */
hash_or[0][0] |= h1 = __hash_32(h0);
#ifdef HAVE_ARCH__HASH_32
hash_or[1][0] |= h2 = __hash_32_generic(h0);
#if HAVE_ARCH__HASH_32 == 1
if (h1 != h2) {
pr_err("__hash_32(%#x) = %#x != __hash_32_generic() = %#x",
h0, h1, h2);
return false;
}
#endif
#endif
/* Test k = 1..32 bits */
for (k = 1; k <= 32; k++) {
u32 const m = ((u32)2 << (k-1)) - 1; /* Low k bits set */
/* Test hash_32 */
hash_or[0][k] |= h1 = hash_32(h0, k);
if (h1 > m) {
pr_err("hash_32(%#x, %d) = %#x > %#x", h0, k, h1, m);
return false;
}
#ifdef HAVE_ARCH_HASH_32
h2 = hash_32_generic(h0, k);
#if HAVE_ARCH_HASH_32 == 1
if (h1 != h2) {
pr_err("hash_32(%#x, %d) = %#x != hash_32_generic() "
" = %#x", h0, k, h1, h2);
return false;
}
#else
if (h2 > m) {
pr_err("hash_32_generic(%#x, %d) = %#x > %#x",
h0, k, h1, m);
return false;
}
#endif
#endif
/* Test hash_64 */
hash_or[1][k] |= h1 = hash_64(h64, k);
if (h1 > m) {
pr_err("hash_64(%#llx, %d) = %#x > %#x", h64, k, h1, m);
return false;
}
#ifdef HAVE_ARCH_HASH_64
h2 = hash_64_generic(h64, k);
#if HAVE_ARCH_HASH_64 == 1
if (h1 != h2) {
pr_err("hash_64(%#llx, %d) = %#x != hash_64_generic() "
"= %#x", h64, k, h1, h2);
return false;
}
#else
if (h2 > m) {
pr_err("hash_64_generic(%#llx, %d) = %#x > %#x",
h64, k, h1, m);
return false;
}
#endif
#endif
}
(void)h2; /* Suppress unused variable warning */
return true;
}
#define SIZE 256 /* Run time is cubic in SIZE */
static int __init
test_hash_init(void)
{
char buf[SIZE+1];
u32 string_or = 0, hash_or[2][33] = { 0 };
unsigned tests = 0;
unsigned long long h64 = 0;
int i, j;
fill_buf(buf, SIZE, 1);
/* Test every possible non-empty substring in the buffer. */
for (j = SIZE; j > 0; --j) {
buf[j] = '\0';
for (i = 0; i <= j; i++) {
u64 hashlen = hashlen_string(buf+i);
u32 h0 = full_name_hash(buf+i, j-i);
/* Check that hashlen_string gets the length right */
if (hashlen_len(hashlen) != j-i) {
pr_err("hashlen_string(%d..%d) returned length"
" %u, expected %d",
i, j, hashlen_len(hashlen), j-i);
return -EINVAL;
}
/* Check that the hashes match */
if (hashlen_hash(hashlen) != h0) {
pr_err("hashlen_string(%d..%d) = %08x != "
"full_name_hash() = %08x",
i, j, hashlen_hash(hashlen), h0);
return -EINVAL;
}
string_or |= h0;
h64 = h64 << 32 | h0; /* For use with hash_64 */
if (!test_int_hash(h64, hash_or))
return -EINVAL;
tests++;
} /* i */
} /* j */
/* The OR of all the hash values should cover all the bits */
if (~string_or) {
pr_err("OR of all string hash results = %#x != %#x",
string_or, -1u);
return -EINVAL;
}
if (~hash_or[0][0]) {
pr_err("OR of all __hash_32 results = %#x != %#x",
hash_or[0][0], -1u);
return -EINVAL;
}
#ifdef HAVE_ARCH__HASH_32
#if HAVE_ARCH__HASH_32 != 1 /* Test is pointless if results match */
if (~hash_or[1][0]) {
pr_err("OR of all __hash_32_generic results = %#x != %#x",
hash_or[1][0], -1u);
return -EINVAL;
}
#endif
#endif
/* Likewise for all the i-bit hash values */
for (i = 1; i <= 32; i++) {
u32 const m = ((u32)2 << (i-1)) - 1; /* Low i bits set */
if (hash_or[0][i] != m) {
pr_err("OR of all hash_32(%d) results = %#x "
"(%#x expected)", i, hash_or[0][i], m);
return -EINVAL;
}
if (hash_or[1][i] != m) {
pr_err("OR of all hash_64(%d) results = %#x "
"(%#x expected)", i, hash_or[1][i], m);
return -EINVAL;
}
}
/* Issue notices about skipped tests. */
#ifndef HAVE_ARCH__HASH_32
pr_info("__hash_32() has no arch implementation to test.");
#elif HAVE_ARCH__HASH_32 != 1
pr_info("__hash_32() is arch-specific; not compared to generic.");
#endif
#ifndef HAVE_ARCH_HASH_32
pr_info("hash_32() has no arch implementation to test.");
#elif HAVE_ARCH_HASH_32 != 1
pr_info("hash_32() is arch-specific; not compared to generic.");
#endif
#ifndef HAVE_ARCH_HASH_64
pr_info("hash_64() has no arch implementation to test.");
#elif HAVE_ARCH_HASH_64 != 1
pr_info("hash_64() is arch-specific; not compared to generic.");
#endif
pr_notice("%u tests passed.", tests);
return 0;
}
static void __exit test_hash_exit(void)
{
}
module_init(test_hash_init); /* Does everything */
module_exit(test_hash_exit); /* Does nothing */
MODULE_LICENSE("GPL");
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment