Commit 2538c7cf authored by Daniel Axtens's avatar Daniel Axtens Committed by Daniel Black

Use POWER8 accelerated crc32

 - Make accelerated checksum available to InnoDB and XtraDB.
 - Fall back to slice-by-eight if not available. The mode used is printed on startup.
 - Will only build on POWER systems at the moment until CMakeLists are modified
   to only add the crc32_power8/ files when building on POWER.

running MySQL-5.7 unittest/gunit/innodb/ut0crc32-t

Before:

1..2
Using software crc32 implementation, CPU is little-endian
ok 1
Using software crc32 implementation, CPU is little-endian
    normal CRC32: real    0.148006 sec
    normal CRC32: user    0.148000 sec
    normal CRC32: sys     0.000000 sec
big endian CRC32: real    0.144293 sec
big endian CRC32: user    0.144000 sec
big endian CRC32: sys     0.000000 sec
ok 2

After:

1..2
Using POWER8 crc32 implementation, CPU is little-endian
ok 1
Using POWER8 crc32 implementation, CPU is little-endian
    normal CRC32: real    0.008097 sec
    normal CRC32: user    0.008000 sec
    normal CRC32: sys     0.000000 sec
big endian CRC32: real    0.147043 sec
big endian CRC32: user    0.144000 sec
big endian CRC32: sys     0.000000 sec
ok 2

Author CRC32 ASM code: Anton Blanchard <anton@au.ibm.com>
ref: https://github.com/antonblanchard/crc32-vpmsumSigned-off-by: default avatarDaniel Black <daniel.black@au.ibm.com>
parent 44b107da
......@@ -72,10 +72,15 @@ IF(WITH_INNOBASE_STORAGE_ENGINE OR WITH_XTRADB_STORAGE_ENGINE)
# We use the InnoDB code directly in case the code changes.
ADD_DEFINITIONS("-DUNIV_INNOCHECKSUM")
enable_language(ASM)
SET(INNOBASE_SOURCES
../storage/innobase/buf/buf0checksum.cc
../storage/innobase/ut/ut0crc32.cc
../storage/innobase/ut/ut0ut.cc
../storage/innobase/ut/crc32_power8/crc32.S
../storage/innobase/ut/crc32_power8/crc32_wrapper.c
../storage/innobase/page/page0zip.cc
)
......
......@@ -362,6 +362,7 @@ IF(MSVC)
PROPERTIES COMPILE_FLAGS "/wd4003")
ENDIF()
enable_language(ASM)
SET(INNOBASE_SOURCES
api/api0api.cc
......@@ -477,6 +478,8 @@ SET(INNOBASE_SOURCES
ut/ut0bh.cc
ut/ut0byte.cc
ut/ut0crc32.cc
ut/crc32_power8/crc32.S
ut/crc32_power8/crc32_wrapper.c
ut/ut0dbg.cc
ut/ut0list.cc
ut/ut0mem.cc
......
......@@ -47,5 +47,6 @@ typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len);
extern ib_ut_crc32_t ut_crc32;
extern bool ut_crc32_sse2_enabled;
extern bool ut_crc32_power8_enabled;
#endif /* ut0crc32_h */
......@@ -1869,9 +1869,13 @@ innobase_start_or_create_for_mysql(void)
srv_boot();
ib_logf(IB_LOG_LEVEL_INFO,
"%s CPU crc32 instructions",
ut_crc32_sse2_enabled ? "Using" : "Not using");
if (ut_crc32_sse2_enabled) {
ib_logf(IB_LOG_LEVEL_INFO, "Using SSE crc32 instructions");
} else if (ut_crc32_power8_enabled) {
ib_logf(IB_LOG_LEVEL_INFO, "Using POWER8 crc32 instructions");
} else {
ib_logf(IB_LOG_LEVEL_INFO, "Using generic crc32 instructions");
}
if (!srv_read_only_mode) {
......
This diff is collapsed.
This diff is collapsed.
#include <stdio.h>
#define CRC_TABLE
#include "crc32_constants.h"
#define VMX_ALIGN 16
#define VMX_ALIGN_MASK (VMX_ALIGN-1)
#ifdef REFLECT
static unsigned int crc32_align(unsigned int crc, unsigned char *p,
unsigned long len)
{
while (len--)
crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
return crc;
}
#else
static unsigned int crc32_align(unsigned int crc, unsigned char *p,
unsigned long len)
{
while (len--)
crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
return crc;
}
#endif
unsigned int __crc32_vpmsum(unsigned int crc, unsigned char *p,
unsigned long len);
unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p,
unsigned long len)
{
unsigned int prealign;
unsigned int tail;
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif
if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
crc = crc32_align(crc, p, len);
goto out;
}
if ((unsigned long)p & VMX_ALIGN_MASK) {
prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
crc = crc32_align(crc, p, prealign);
len -= prealign;
p += prealign;
}
crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
tail = len & VMX_ALIGN_MASK;
if (tail) {
p += len & ~VMX_ALIGN_MASK;
crc = crc32_align(crc, p, tail);
}
out:
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif
return crc;
}
#ifndef __OPCODES_H
#define __OPCODES_H
#define __PPC_RA(a) (((a) & 0x1f) << 16)
#define __PPC_RB(b) (((b) & 0x1f) << 11)
#define __PPC_XA(a) ((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3))
#define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4))
#define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5))
#define __PPC_XT(s) __PPC_XS(s)
#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
#define PPC_INST_VPMSUMW 0x10000488
#define PPC_INST_VPMSUMD 0x100004c8
#define PPC_INST_MFVSRD 0x7c000066
#define PPC_INST_MTVSRD 0x7c000166
#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b)
#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b)
#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t)+32, a, 0)
#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t)+32, a, 0)
#endif
......@@ -82,6 +82,12 @@ mysys/my_perf.c, contributed by Facebook under the following license.
#include "univ.i"
#include "ut0crc32.h"
#if defined(__linux__) && defined(__powerpc__)
/* Used to detect at runtime if we have vpmsum instructions (PowerISA 2.07) */
#include <sys/auxv.h>
#include <bits/hwcap.h>
#endif /* defined(__linux__) && defined(__powerpc__) */
#include <string.h>
ib_ut_crc32_t ut_crc32;
......@@ -93,6 +99,7 @@ static ibool ut_crc32_slice8_table_initialized = FALSE;
/* Flag that tells whether the CPU supports CRC32 or not */
UNIV_INTERN bool ut_crc32_sse2_enabled = false;
UNIV_INTERN bool ut_crc32_power8_enabled = false;
/********************************************************************//**
Initializes the table that is used to generate the CRC32 if the CPU does
......@@ -174,6 +181,28 @@ for RHEL4 support (GCC 3 doesn't support this instruction) */
len -= 8, buf += 8
#endif /* defined(__GNUC__) && defined(__x86_64__) */
#if defined(__powerpc__)
extern "C" {
unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
};
#endif /* __powerpc__ */
UNIV_INLINE
ib_uint32_t
ut_crc32_power8(
/*===========*/
const byte* buf, /*!< in: data over which to calculate CRC32 */
ulint len) /*!< in: data length */
{
#if defined(__powerpc__)
return crc32_vpmsum(0, buf, len);
#else
ut_error;
/* silence compiler warning about unused parameters */
return((ib_uint32_t) buf[len]);
#endif /* __powerpc__ */
}
/********************************************************************//**
Calculates CRC32 using CPU instructions.
@return CRC-32C (polynomial 0x11EDC6F41) */
......@@ -309,8 +338,15 @@ ut_crc32_init()
#endif /* defined(__GNUC__) && defined(__x86_64__) */
#if defined(__linux__) && defined(__powerpc__) && defined(AT_HWCAP2)
if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
ut_crc32_power8_enabled = true;
#endif /* defined(__linux__) && defined(__powerpc__) */
if (ut_crc32_sse2_enabled) {
ut_crc32 = ut_crc32_sse42;
} else if (ut_crc32_power8_enabled) {
ut_crc32 = ut_crc32_power8;
} else {
ut_crc32_slice8_table_init();
ut_crc32 = ut_crc32_slice8;
......
......@@ -345,6 +345,8 @@ IF (MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 8)
PROPERTIES COMPILE_FLAGS -Od)
ENDIF()
enable_language(ASM)
SET(INNOBASE_SOURCES
api/api0api.cc
api/api0misc.cc
......@@ -462,6 +464,8 @@ SET(INNOBASE_SOURCES
ut/ut0bh.cc
ut/ut0byte.cc
ut/ut0crc32.cc
ut/crc32_power8/crc32.S
ut/crc32_power8/crc32_wrapper.c
ut/ut0dbg.cc
ut/ut0list.cc
ut/ut0mem.cc
......
......@@ -47,5 +47,6 @@ typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len);
extern ib_ut_crc32_t ut_crc32;
extern bool ut_crc32_sse2_enabled;
extern bool ut_crc32_power8_enabled;
#endif /* ut0crc32_h */
......@@ -1955,9 +1955,13 @@ innobase_start_or_create_for_mysql(void)
srv_boot();
ib_logf(IB_LOG_LEVEL_INFO,
"%s CPU crc32 instructions",
ut_crc32_sse2_enabled ? "Using" : "Not using");
if (ut_crc32_sse2_enabled) {
ib_logf(IB_LOG_LEVEL_INFO, "Using SSE crc32 instructions");
} else if (ut_crc32_power8_enabled) {
ib_logf(IB_LOG_LEVEL_INFO, "Using POWER8 crc32 instructions");
} else {
ib_logf(IB_LOG_LEVEL_INFO, "Using generic crc32 instructions");
}
if (!srv_read_only_mode) {
......
This diff is collapsed.
This diff is collapsed.
#include <stdio.h>
#define CRC_TABLE
#include "crc32_constants.h"
#define VMX_ALIGN 16
#define VMX_ALIGN_MASK (VMX_ALIGN-1)
#ifdef REFLECT
static unsigned int crc32_align(unsigned int crc, unsigned char *p,
unsigned long len)
{
while (len--)
crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
return crc;
}
#else
static unsigned int crc32_align(unsigned int crc, unsigned char *p,
unsigned long len)
{
while (len--)
crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
return crc;
}
#endif
unsigned int __crc32_vpmsum(unsigned int crc, unsigned char *p,
unsigned long len);
unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p,
unsigned long len)
{
unsigned int prealign;
unsigned int tail;
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif
if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
crc = crc32_align(crc, p, len);
goto out;
}
if ((unsigned long)p & VMX_ALIGN_MASK) {
prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
crc = crc32_align(crc, p, prealign);
len -= prealign;
p += prealign;
}
crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
tail = len & VMX_ALIGN_MASK;
if (tail) {
p += len & ~VMX_ALIGN_MASK;
crc = crc32_align(crc, p, tail);
}
out:
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif
return crc;
}
#ifndef __OPCODES_H
#define __OPCODES_H
#define __PPC_RA(a) (((a) & 0x1f) << 16)
#define __PPC_RB(b) (((b) & 0x1f) << 11)
#define __PPC_XA(a) ((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3))
#define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4))
#define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5))
#define __PPC_XT(s) __PPC_XS(s)
#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
#define PPC_INST_VPMSUMW 0x10000488
#define PPC_INST_VPMSUMD 0x100004c8
#define PPC_INST_MFVSRD 0x7c000066
#define PPC_INST_MTVSRD 0x7c000166
#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b)
#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b)
#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t)+32, a, 0)
#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t)+32, a, 0)
#endif
......@@ -82,6 +82,12 @@ mysys/my_perf.c, contributed by Facebook under the following license.
#include "univ.i"
#include "ut0crc32.h"
#if defined(__linux__) && defined(__powerpc__)
/* Used to detect at runtime if we have vpmsum instructions (PowerISA 2.07) */
#include <sys/auxv.h>
#include <bits/hwcap.h>
#endif /* defined(__linux__) && defined(__powerpc__) */
#include <string.h>
ib_ut_crc32_t ut_crc32;
......@@ -93,6 +99,7 @@ static ibool ut_crc32_slice8_table_initialized = FALSE;
/* Flag that tells whether the CPU supports CRC32 or not */
UNIV_INTERN bool ut_crc32_sse2_enabled = false;
UNIV_INTERN bool ut_crc32_power8_enabled = false;
/********************************************************************//**
Initializes the table that is used to generate the CRC32 if the CPU does
......@@ -174,6 +181,28 @@ for RHEL4 support (GCC 3 doesn't support this instruction) */
len -= 8, buf += 8
#endif /* defined(__GNUC__) && defined(__x86_64__) */
#if defined(__powerpc__)
extern "C" {
unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
};
#endif /* __powerpc__ */
UNIV_INLINE
ib_uint32_t
ut_crc32_power8(
/*===========*/
const byte* buf, /*!< in: data over which to calculate CRC32 */
ulint len) /*!< in: data length */
{
#if defined(__powerpc__)
return crc32_vpmsum(0, buf, len);
#else
ut_error;
/* silence compiler warning about unused parameters */
return((ib_uint32_t) buf[len]);
#endif /* __powerpc__ */
}
/********************************************************************//**
Calculates CRC32 using CPU instructions.
@return CRC-32C (polynomial 0x11EDC6F41) */
......@@ -309,8 +338,15 @@ ut_crc32_init()
#endif /* defined(__GNUC__) && defined(__x86_64__) */
#if defined(__linux__) && defined(__powerpc__) && defined(AT_HWCAP2)
if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
ut_crc32_power8_enabled = true;
#endif /* defined(__linux__) && defined(__powerpc__) */
if (ut_crc32_sse2_enabled) {
ut_crc32 = ut_crc32_sse42;
} else if (ut_crc32_power8_enabled) {
ut_crc32 = ut_crc32_power8;
} else {
ut_crc32_slice8_table_init();
ut_crc32 = ut_crc32_slice8;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment