Commit 3436f077 authored by Michael Munday's avatar Michael Munday

reflect, runtime: optimize Value.Call on s390x and add benchmark

Use an MVC loop to copy arguments in runtime.call* rather than copying
bytes individually.

I've added the benchmark CallArgCopy to test the speed of Value.Call
for various argument sizes.

name                    old speed      new speed       delta
CallArgCopy/size=128     439MB/s ± 1%    582MB/s ± 1%   +32.41%  (p=0.000 n=10+10)
CallArgCopy/size=256     695MB/s ± 1%   1172MB/s ± 1%   +68.67%  (p=0.000 n=10+10)
CallArgCopy/size=1024    573MB/s ± 8%   4175MB/s ± 2%  +628.11%  (p=0.000 n=10+10)
CallArgCopy/size=4096   1.46GB/s ± 2%  10.19GB/s ± 1%  +600.52%  (p=0.000 n=10+10)
CallArgCopy/size=65536  1.51GB/s ± 0%  12.30GB/s ± 1%  +716.30%   (p=0.000 n=9+10)

Change-Id: I87dae4809330e7964f6cb4a9e40e5b3254dd519d
Reviewed-on: https://go-review.googlesource.com/28096
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBill O'Farrell <billotosyr@gmail.com>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 6c43c0c2
......@@ -1535,6 +1535,34 @@ func BenchmarkCall(b *testing.B) {
})
}
func BenchmarkCallArgCopy(b *testing.B) {
byteArray := func(n int) Value {
return Zero(ArrayOf(n, TypeOf(byte(0))))
}
sizes := [...]struct {
fv Value
arg Value
}{
{ValueOf(func(a [128]byte) {}), byteArray(128)},
{ValueOf(func(a [256]byte) {}), byteArray(256)},
{ValueOf(func(a [1024]byte) {}), byteArray(1024)},
{ValueOf(func(a [4096]byte) {}), byteArray(4096)},
{ValueOf(func(a [65536]byte) {}), byteArray(65536)},
}
for _, size := range sizes {
bench := func(b *testing.B) {
args := []Value{size.arg}
b.SetBytes(int64(size.arg.Len()))
b.ResetTimer()
for i := 0; i < b.N; i++ {
size.fv.Call(args)
}
}
name := fmt.Sprintf("size=%v", size.arg.Len())
b.Run(name, bench)
}
}
func TestMakeFunc(t *testing.T) {
f := dummy
fv := MakeFunc(TypeOf(f), func(in []Value) []Value { return in })
......
......@@ -387,53 +387,55 @@ TEXT ·reflectcall(SB), NOSPLIT, $-8-32
TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
NO_LOCAL_POINTERS; \
/* copy arguments to stack */ \
MOVD arg+16(FP), R3; \
MOVWZ argsize+24(FP), R4; \
MOVD R15, R5; \
ADD $(8-1), R5; \
SUB $1, R3; \
ADD R5, R4; \
CMP R5, R4; \
BEQ 6(PC); \
ADD $1, R3; \
ADD $1, R5; \
MOVBZ 0(R3), R6; \
MOVBZ R6, 0(R5); \
BR -6(PC); \
/* call function */ \
MOVD arg+16(FP), R4; \
MOVWZ argsize+24(FP), R5; \
MOVD $stack-MAXSIZE(SP), R6; \
loopArgs: /* copy 256 bytes at a time */ \
CMP R5, $256; \
BLT tailArgs; \
SUB $256, R5; \
MVC $256, 0(R4), 0(R6); \
MOVD $256(R4), R4; \
MOVD $256(R6), R6; \
BR loopArgs; \
tailArgs: /* copy remaining bytes */ \
CMP R5, $0; \
BEQ callFunction; \
SUB $1, R5; \
EXRL $callfnMVC<>(SB), R5; \
callFunction: \
MOVD f+8(FP), R12; \
MOVD (R12), R8; \
PCDATA $PCDATA_StackMapIndex, $0; \
BL (R8); \
/* copy return values back */ \
MOVD arg+16(FP), R3; \
MOVWZ n+24(FP), R4; \
MOVWZ retoffset+28(FP), R6; \
MOVD R15, R5; \
ADD R6, R5; \
ADD R6, R3; \
SUB R6, R4; \
ADD $(8-1), R5; \
SUB $1, R3; \
ADD R5, R4; \
loop: \
CMP R5, R4; \
BEQ end; \
ADD $1, R5; \
ADD $1, R3; \
MOVBZ 0(R5), R6; \
MOVBZ R6, 0(R3); \
BR loop; \
end: \
MOVD arg+16(FP), R6; \
MOVWZ n+24(FP), R5; \
MOVD $stack-MAXSIZE(SP), R4; \
MOVWZ retoffset+28(FP), R1; \
ADD R1, R4; \
ADD R1, R6; \
SUB R1, R5; \
loopRets: /* copy 256 bytes at a time */ \
CMP R5, $256; \
BLT tailRets; \
SUB $256, R5; \
MVC $256, 0(R4), 0(R6); \
MOVD $256(R4), R4; \
MOVD $256(R6), R6; \
BR loopRets; \
tailRets: /* copy remaining bytes */ \
CMP R5, $0; \
BEQ writeBarrierUpdates; \
SUB $1, R5; \
EXRL $callfnMVC<>(SB), R5; \
writeBarrierUpdates: \
/* execute write barrier updates */ \
MOVD argtype+0(FP), R7; \
MOVD arg+16(FP), R3; \
MOVWZ n+24(FP), R4; \
MOVWZ retoffset+28(FP), R6; \
MOVD R7, 8(R15); \
MOVD R3, 16(R15); \
MOVD R4, 24(R15); \
MOVD R6, 32(R15); \
MOVD argtype+0(FP), R1; \
MOVD arg+16(FP), R2; \
MOVWZ n+24(FP), R3; \
MOVWZ retoffset+28(FP), R4; \
STMG R1, R4, stack-MAXSIZE(SP); \
BL runtime·callwritebarrier(SB); \
RET
......@@ -464,6 +466,10 @@ CALLFN(·call268435456, 268435456)
CALLFNcall536870912, 536870912)
CALLFNcall1073741824, 1073741824)
// Not a function: target for EXRL (execute relative long) instruction.
TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
MVC $1, 0(R4), 0(R6)
TEXT runtime·procyield(SB),NOSPLIT,$0-0
RET
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment