Commit 3cb1e9d9 authored by Tobias Klauser's avatar Tobias Klauser Committed by Tobias Klauser

internal/bytealg: add assembly implementation of Count/CountString on arm

Simple single-byte loop count for now, to be further improved in future
CLs.

Benchmark on linux/arm:

name               old time/op    new time/op     delta
CountSingle/10-4      122ns ± 0%       87ns ± 1%  -28.41%  (p=0.000 n=7+10)
CountSingle/32-4      242ns ± 0%      174ns ± 1%  -28.25%  (p=0.000 n=10+10)
CountSingle/4K-4     24.2µs ± 1%     15.6µs ± 1%  -35.42%  (p=0.000 n=10+10)
CountSingle/4M-4     29.6ms ± 1%     21.3ms ± 1%  -28.09%  (p=0.000 n=10+9)
CountSingle/64M-4     562ms ± 0%      414ms ± 1%  -26.23%  (p=0.000 n=8+10)

name               old speed      new speed       delta
CountSingle/10-4   81.7MB/s ± 1%  114.5MB/s ± 1%  +40.07%  (p=0.000 n=10+10)
CountSingle/32-4    132MB/s ± 0%    184MB/s ± 1%  +39.39%  (p=0.000 n=10+9)
CountSingle/4K-4    170MB/s ± 1%    263MB/s ± 1%  +54.86%  (p=0.000 n=10+10)
CountSingle/4M-4    142MB/s ± 1%    197MB/s ± 1%  +39.07%  (p=0.000 n=10+9)
CountSingle/64M-4   119MB/s ± 0%    162MB/s ± 1%  +35.55%  (p=0.000 n=8+10)

Updates #29001

Change-Id: I42a268215a62044286ec32b548d8e4b86b9570ee
Reviewed-on: https://go-review.googlesource.com/c/go/+/168319
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 834d229e
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Count(SB),NOSPLIT,$0-20
MOVW b_base+0(FP), R0
MOVW b_len+4(FP), R1
MOVBU c+12(FP), R2
MOVW $ret+16(FP), R7
B countbytebody<>(SB)
TEXT ·CountString(SB),NOSPLIT,$0-16
MOVW s_base+0(FP), R0
MOVW s_len+4(FP), R1
MOVBU c+8(FP), R2
MOVW $ret+12(FP), R7
B countbytebody<>(SB)
// Input:
// R0: data
// R1: data length
// R2: byte to find
// R7: address to put result
//
// On exit:
// R4 and R8 are clobbered
TEXT countbytebody<>(SB),NOSPLIT,$0
MOVW $0, R8 // R8 = count of byte to search
CMP $0, R1
B.EQ done // short path to handle 0-byte case
ADD R0, R1 // R1 is the end of the range
byte_loop:
MOVBU.P 1(R0), R4
CMP R4, R2
ADD.EQ $1, R8
CMP R0, R1
B.NE byte_loop
done:
MOVW R8, (R7)
RET
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64,!arm64,!ppc64le,!ppc64
// +build !amd64,!arm,!arm64,!ppc64le,!ppc64
package bytealg
......
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64 arm64 ppc64le ppc64
// +build amd64 arm arm64 ppc64le ppc64
package bytealg
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment