Commit a5add8c7 authored by Michael Munday's avatar Michael Munday Committed by Chris Broadfoot

[release-branch.go1.7] hash/crc32: fix optimized s390x implementation

The code wasn't checking to see if the data was still >= 64 bytes
long after aligning it.

Aligning the data is an optimization and we don't actually need
to do it. In fact for smaller sizes it slows things down due to
the overhead of calling the generic function. Therefore for now
I have simply removed the alignment stage. I have also added a
check into the assembly to deliberately trigger a segmentation
fault if the data is too short.

Fixes #16779.

Change-Id: Ic01636d775efc5ec97689f050991cee04ce8fe73
Reviewed-on: https://go-review.googlesource.com/27409Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Reviewed-on: https://go-review.googlesource.com/28635
parent e464c08b
......@@ -4,14 +4,9 @@
package crc32
import (
"unsafe"
)
const (
vxMinLen = 64
vxAlignment = 16
vxAlignMask = vxAlignment - 1
vxAlignMask = 15 // align to 16 bytes
)
// hasVectorFacility reports whether the machine has the z/Architecture
......@@ -49,20 +44,13 @@ func genericIEEE(crc uint32, p []byte) uint32 {
return update(crc, IEEETable, p)
}
// updateCastagnoli calculates the checksum of p using genericCastagnoli to
// align the data appropriately for vectorCastagnoli. It avoids using
// vectorCastagnoli entirely if the length of p is less than or equal to
// vxMinLen.
// updateCastagnoli calculates the checksum of p using
// vectorizedCastagnoli if possible and falling back onto
// genericCastagnoli as needed.
func updateCastagnoli(crc uint32, p []byte) uint32 {
// Use vectorized function if vector facility is available and
// data length is above threshold.
if hasVX && len(p) > vxMinLen {
pAddr := uintptr(unsafe.Pointer(&p[0]))
if pAddr&vxAlignMask != 0 {
prealign := vxAlignment - int(pAddr&vxAlignMask)
crc = genericCastagnoli(crc, p[:prealign])
p = p[prealign:]
}
if hasVX && len(p) >= vxMinLen {
aligned := len(p) & ^vxAlignMask
crc = vectorizedCastagnoli(crc, p[:aligned])
p = p[aligned:]
......@@ -75,19 +63,12 @@ func updateCastagnoli(crc uint32, p []byte) uint32 {
return genericCastagnoli(crc, p)
}
// updateIEEE calculates the checksum of p using genericIEEE to align the data
// appropriately for vectorIEEE. It avoids using vectorIEEE entirely if the length
// of p is less than or equal to vxMinLen.
// updateIEEE calculates the checksum of p using vectorizedIEEE if
// possible and falling back onto genericIEEE as needed.
func updateIEEE(crc uint32, p []byte) uint32 {
// Use vectorized function if vector facility is available and
// data length is above threshold.
if hasVX && len(p) > vxMinLen {
pAddr := uintptr(unsafe.Pointer(&p[0]))
if pAddr&vxAlignMask != 0 {
prealign := vxAlignment - int(pAddr&vxAlignMask)
crc = genericIEEE(crc, p[:prealign])
p = p[prealign:]
}
if hasVX && len(p) >= vxMinLen {
aligned := len(p) & ^vxAlignMask
crc = vectorizedIEEE(crc, p[:aligned])
p = p[aligned:]
......
......@@ -128,6 +128,10 @@ TEXT vectorizedBody<>(SB),NOSPLIT,$0
VZERO V0
VLVGF $3, R2, V0
// Crash if the input size is less than 64-bytes.
CMP R4, $64
BLT crash
// Load a 64-byte data chunk and XOR with CRC
VLM 0(R3), V1, V4 // 64-bytes into V1..V4
......@@ -243,3 +247,6 @@ done:
XOR $0xffffffff, R2 // NOTW R2
MOVWZ R2, ret + 32(FP)
RET
crash:
MOVD $0, (R0) // input size is less than 64-bytes
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment