Commit 924831e7 authored by Kirill Smelkov's avatar Kirill Smelkov

X unzlib benchmarks

parent 446029d4
...@@ -36,6 +36,7 @@ import ( ...@@ -36,6 +36,7 @@ import (
"lab.nexedi.com/kirr/go123/xnet" "lab.nexedi.com/kirr/go123/xnet"
"lab.nexedi.com/kirr/neo/go/neo/internal/xsha1" "lab.nexedi.com/kirr/neo/go/neo/internal/xsha1"
"lab.nexedi.com/kirr/neo/go/neo/internal/xzlib"
"lab.nexedi.com/kirr/neo/go/neo/neonet" "lab.nexedi.com/kirr/neo/go/neo/neonet"
"lab.nexedi.com/kirr/neo/go/neo/proto" "lab.nexedi.com/kirr/neo/go/neo/proto"
"lab.nexedi.com/kirr/neo/go/zodb" "lab.nexedi.com/kirr/neo/go/zodb"
...@@ -466,7 +467,7 @@ func (c *Client) _Load(ctx context.Context, xid zodb.Xid) (*mem.Buf, zodb.Tid, e ...@@ -466,7 +467,7 @@ func (c *Client) _Load(ctx context.Context, xid zodb.Xid) (*mem.Buf, zodb.Tid, e
// XXX cleanup mess vvv // XXX cleanup mess vvv
buf2 := mem.BufAlloc(len(buf.Data)) buf2 := mem.BufAlloc(len(buf.Data))
buf2.Data = buf2.Data[:0] buf2.Data = buf2.Data[:0]
udata, err := decompress(buf.Data, buf2.Data) udata, err := xzlib.Decompress(buf.Data, buf2.Data)
buf.Release() buf.Release()
if err != nil { if err != nil {
buf2.Release() buf2.Release()
......
// Copyright (C) 2017-2018 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
// Package zlib provides convenience utilities to compress/decompress zlib data.
package xzlib
import (
"bytes"
"compress/zlib"
"io"
)
// Compress compresses data according to zlib encoding.
//
// XXX default level is used, etc.
func Compress(data []byte) (zdata []byte) {
var b bytes.Buffer
w := zlib.NewWriter(&b)
_, err := w.Write(data)
if err != nil {
panic(err) // bytes.Buffer.Write never return error
}
err = w.Close()
if err != nil {
panic(err) // ----//----
}
return b.Bytes()
}
// Decompress decompresses data according to zlib encoding.
//
// out buffer, if there is enough capacity, is used for decompression destination.
// if out has not enough capacity a new buffer is allocated and used.
//
// return: destination buffer with full decompressed data or error.
func Decompress(in []byte, out []byte) (data []byte, err error) {
bin := bytes.NewReader(in)
zr, err := zlib.NewReader(bin)
if err != nil {
return nil, err
}
defer func() {
err2 := zr.Close()
if err2 != nil && err == nil {
err = err2
data = nil
}
}()
bout := bytes.NewBuffer(out)
_, err = io.Copy(bout, zr)
if err != nil {
return nil, err
}
return bout.Bytes(), nil
}
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
// See COPYING file for full licensing terms. // See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options. // See https://www.nexedi.com/licensing for rationale and options.
package neo package xzlib
import ( import (
"testing" "testing"
...@@ -38,7 +38,7 @@ var ztestv = []struct{in, out string}{ ...@@ -38,7 +38,7 @@ var ztestv = []struct{in, out string}{
func TestDecompress(t *testing.T) { func TestDecompress(t *testing.T) {
for _, tt := range ztestv { for _, tt := range ztestv {
got, err := decompress([]byte(tt.in), nil) got, err := Decompress([]byte(tt.in), nil)
if err != nil { if err != nil {
t.Errorf("decompress err: %q", tt.in) t.Errorf("decompress err: %q", tt.in)
continue continue
......
...@@ -2,6 +2,6 @@ ...@@ -2,6 +2,6 @@
/var /var
/zhash /zhash
/zhash_go /zhash_go
/tsha1 /tcpu
/tsha1_go /tcpu_go
/ioping.tmp /ioping.tmp
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2018 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
"""generate testdata/ files"""
import zlib
import zodbtools.util as zutil
from tcpu import fmtsize
K = 1024
M = 1024*K
sizev = (1*K, 4*K, 2*M)
def writefile(path, data):
with open(path, 'w') as f:
f.write(data)
def zcompress(data):
zdata = zlib.compress(data)
#print '%d -> %d (%.1f%%)' % (len(data), len(zdata), 100. * len(zdata) / len(data))
return zdata
def main():
# zlib/null
for size in sizev:
data = '\0' * size
zdata = zcompress(data)
writefile('testdata/zlib/null-%s' % fmtsize(size), zdata)
# representative ZODB objects
# (to regenerate this requires `neotest zbench-local` to be already run once)
# wendelin.core's ZData
zdatav = []
def update_zdata(objdata):
if 'ZData' in objdata: # XXX hack
zdatav.append(objdata)
iter_zobjects('var/wczblk1-8/fs1/data.fs', update_zdata)
writeobjects('testdata/zlib/wczdata', zdatav)
# min avg max from prod1
prod1_objv = []
def update_prod1(objdata):
prod1_objv.append(objdata)
iter_zobjects('var/prod1-1024/fs1/data.fs', update_prod1)
writeobjects('testdata/zlib/prod1', prod1_objv)
# writeobjects writes to prefix compressed objects with average and maximum uncompressed sizes.
def writeobjects(prefix, objv):
objv.sort(key = lambda obj: len(obj))
lavg = sum(len(_) for _ in objv) // len(objv)
lo, hi = 0, len(objv)
while lo < hi:
#print lo, hi
i = (lo + hi) // 2
l = len(objv[i])
if l < lavg:
lo = i+1
else:
hi = i
objavg = objv[lo]
#print '[%d,%d] -> avgi=%d, avglen=%d maxlen=%d' % (0, len(objv), lo, len(objavg), len(objv[-1]))
writefile('%s-avg' % prefix, zcompress(objavg))
writefile('%s-max' % prefix, zcompress(objv[-1]))
# iter_zobjects iterates throuh all non-nil object data from fs1@path.
#
# for every object f is called, and if it returns !false iteration is stopped.
def iter_zobjects(path, f):
stor = zutil.storageFromURL(path, read_only=True)
for txn in stor.iterator():
for obj in txn:
if obj.data is not None:
if f(obj.data):
return
if __name__ == '__main__':
main()
...@@ -891,12 +891,17 @@ bench_cpu() { ...@@ -891,12 +891,17 @@ bench_cpu() {
sizev="1024 4096 $((2*1024*1024))" sizev="1024 4096 $((2*1024*1024))"
for size in $sizev; do for size in $sizev; do
nrun tsha1.py $size nrun tcpu.py sha1 $size
nrun tsha1_go $size nrun tcpu_go sha1 $size
done done
# TODO bench compress/decompress datav="null-1K null-4K null-2M wczdata-avg wczdata-max prod1-avg prod1-max"
# XXX data: null4K, some real pickle for data in $datav; do
nrun tcpu.py unzlib $data
nrun tcpu_go unzlib $data
done
# TODO bench compress
} }
# bench_disk - benchmark direct (uncached) and cached random reads # bench_disk - benchmark direct (uncached) and cached random reads
...@@ -1396,7 +1401,7 @@ cpustat) ...@@ -1396,7 +1401,7 @@ cpustat)
;; ;;
esac esac
# make sure zhash*, tsha1* and zgenprod are on PATH (because we could be invoked from another dir) # make sure zhash*, tcpu* and zgenprod are on PATH (because we could be invoked from another dir)
X=$(cd `dirname $0` && pwd) X=$(cd `dirname $0` && pwd)
export PATH=$X:$PATH export PATH=$X:$PATH
...@@ -1405,7 +1410,7 @@ export PATH=$X:$PATH ...@@ -1405,7 +1410,7 @@ export PATH=$X:$PATH
go install -v lab.nexedi.com/kirr/neo/go/... go install -v lab.nexedi.com/kirr/neo/go/...
go build -o $X/zhash_go $X/zhash.go go build -o $X/zhash_go $X/zhash.go
#go build -race -o $X/zhash_go $X/zhash.go #go build -race -o $X/zhash_go $X/zhash.go
go build -o $X/tsha1_go $X/tsha1.go go build -o $X/tcpu_go $X/tcpu.go
# setup network & fs environment # setup network & fs environment
init_net init_net
......
...@@ -18,21 +18,27 @@ ...@@ -18,21 +18,27 @@
// See https://www.nexedi.com/licensing for rationale and options. // See https://www.nexedi.com/licensing for rationale and options.
// +build ignore // +build ignore
//go:generate ./gen-testdata
// tsha1 - benchmark sha1 // tcpu - cpu-related benchmarks
package main package main
import ( import (
"crypto/sha1" "crypto/sha1"
"flag"
"fmt" "fmt"
"io/ioutil"
"log" "log"
"os" "os"
"strconv" "strconv"
"testing"
"time" "time"
"lab.nexedi.com/kirr/neo/go/neo/internal/xzlib"
) )
func dieusage() { func dieusage() {
fmt.Fprintf(os.Stderr, "Usage: tsha1 <block-size>\n") fmt.Fprintf(os.Stderr, "Usage: tcpu <benchmark> <block-size>\n")
os.Exit(1) os.Exit(1)
} }
...@@ -50,36 +56,86 @@ func fmtsize(size int) string { ...@@ -50,36 +56,86 @@ func fmtsize(size int) string {
return fmt.Sprintf("%d%c", size, unitv[norder]) return fmt.Sprintf("%d%c", size, unitv[norder])
} }
func main() { func prettyarg(arg string) string {
if len(os.Args) != 2 { size, err := strconv.Atoi(arg)
dieusage() if err != nil {
return arg
} }
blksize, err := strconv.Atoi(os.Args[1]) return fmtsize(size)
}
// benchit runs the benchmark for benchf
func benchit(benchname string, bencharg string, benchf func(*testing.B, string)) {
r := testing.Benchmark(func (b *testing.B) {
benchf(b, bencharg)
})
hostname, err := os.Hostname()
if err != nil { if err != nil {
log.Fatal(err) hostname = "?"
} }
data := make([]byte, blksize) fmt.Printf("Benchmark%s/%s/go/%s %d\t%.3f µs/op\n", hostname, benchname, prettyarg(bencharg), r.N, float64(r.T) / float64(r.N) / float64(time.Microsecond))
h := sha1.New()
}
n := int(1E6) func BenchmarkSha1(b *testing.B, arg string) {
if blksize > 1024 { blksize, err := strconv.Atoi(arg)
n = n * 1024 / blksize // assumes 1K ~= 1μs if err != nil {
b.Fatal(err)
} }
tstart := time.Now() data := make([]byte, blksize)
h := sha1.New()
b.ResetTimer()
for i := 0; i < n; i++ { for i := 0; i < b.N; i++ {
h.Write(data) h.Write(data)
} }
}
tend := time.Now() func xreadfile(t testing.TB, path string) []byte {
δt := tend.Sub(tstart) data, err := ioutil.ReadFile(path)
if err != nil {
t.Fatal(err)
}
return data
}
hostname, err := os.Hostname() func BenchmarkUnzlib(b *testing.B, zfile string) {
zdata := xreadfile(b, fmt.Sprintf("testdata/zlib/%s", zfile))
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := xzlib.Decompress(zdata, nil)
if err != nil { if err != nil {
hostname = "?" b.Fatal(err)
}
}
}
var benchv = map[string]func(*testing.B, string) {
"sha1": BenchmarkSha1,
"unzlib": BenchmarkUnzlib,
}
func main() {
flag.Parse() // so that test.* flags could be processed
argv := flag.Args()
if len(argv) != 2 {
dieusage()
}
benchname := argv[0]
bencharg := argv[1]
benchf, ok := benchv[benchname]
if !ok {
log.Fatalf("Unknown benchmark %q", benchname)
} }
fmt.Printf("Benchmark%s/sha1/go/%s %d\t%.3f µs/op\n", hostname, fmtsize(blksize), n, float64(δt) / float64(n) / float64(time.Microsecond)) benchit(benchname, bencharg, benchf)
} }
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2017 Nexedi SA and Contributors. # Copyright (C) 2017-2018 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -18,13 +18,15 @@ ...@@ -18,13 +18,15 @@
# #
# See COPYING file for full licensing terms. # See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options. # See https://www.nexedi.com/licensing for rationale and options.
"""tsha1 - benchmark sha1""" """tcpu - cpu-related benchmarks"""
from __future__ import print_function from __future__ import print_function
import sys import sys
import hashlib import hashlib
import zlib
from time import time from time import time
from math import ceil, log10
import socket import socket
# fmtsize formats size in human readable form # fmtsize formats size in human readable form
...@@ -38,28 +40,112 @@ def fmtsize(size): ...@@ -38,28 +40,112 @@ def fmtsize(size):
return "%d%s" % (size, _unitv[norder]) return "%d%s" % (size, _unitv[norder])
def prettyarg(arg):
try:
arg = int(arg)
except ValueError:
return arg # return as it is - e.g. "null-4K"
else:
return fmtsize(arg)
def main():
blksize = int(sys.argv[1]) # ---- 8< ---- from wendelin.core/t/py.bench
# benchmarking timer/request passed to benchmarks as fixture
# similar to https://golang.org/pkg/testing/#B
class B:
def __init__(self):
self.N = 1 # default when func does not accept `b` arg
self._t_start = None # t of timer started; None if timer is currently stopped
self.reset_timer()
def reset_timer(self):
self._t_total = 0.
def start_timer(self):
if self._t_start is not None:
return
self._t_start = time()
def stop_timer(self):
if self._t_start is None:
return
t = time()
self._t_total += t - self._t_start
self._t_start = None
def total_time(self):
return self._t_total
# benchit runs benchf auto-adjusting whole runing time to ttarget
def benchit(benchf, bencharg, ttarget = 1.):
b = B()
b.N = 0
t = 0.
while t < (ttarget * 0.9):
if b.N == 0:
b.N = 1
else:
n = b.N * (ttarget / t) # exact how to adjust b.N to reach ttarget
order = int(log10(n)) # n = k·10^order, k ∈ [1,10)
k = float(n) / (10**order)
k = ceil(k) # lift up k to nearest int
b.N = int(k * 10**order) # b.N = int([1,10))·10^order
b.reset_timer()
b.start_timer()
benchf(b, bencharg)
b.stop_timer()
t = b.total_time()
hostname = socket.gethostname()
benchname = benchf.__name__
if benchname.startswith('bench_'):
benchname = benchname[len('bench_'):]
print('Benchmark%s/%s/py/%s %d\t%.3f µs/op' %
(hostname, benchname, prettyarg(bencharg), n, t * 1E6 / n))
# ---- 8< ----
def bench_sha1(b, blksize):
blksize = int(blksize)
data = '\0'*blksize data = '\0'*blksize
h = hashlib.sha1() h = hashlib.sha1()
tstart = time() b.reset_timer()
n = int(1E6)
if blksize > 1024:
n = n * 1024 / blksize # assumes 1K ~= 1μs
n = b.N
i = 0 i = 0
while i < n: while i < n:
h.update(data) h.update(data)
i += 1 i += 1
tend = time()
dt = tend - tstart
hostname = socket.gethostname() def readfile(path):
print('Benchmark%s/sha1/py/%s %d\t%.3f µs/op' % (hostname, fmtsize(blksize), n, dt * 1E6 / n)) with open(path, 'r') as f:
return f.read()
def bench_unzlib(b, zfile):
zdata = readfile('testdata/zlib/%s' % zfile)
b.reset_timer()
n = b.N
i = 0
while i < n:
zlib.decompress(zdata)
i += 1
def main():
bench = sys.argv[1]
bencharg = sys.argv[2]
benchf = globals()['bench_%s' % bench]
benchit(benchf, bencharg)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -20,8 +20,6 @@ ...@@ -20,8 +20,6 @@
package neo package neo
import ( import (
"bytes"
"compress/zlib"
"context" "context"
"io" "io"
...@@ -38,36 +36,6 @@ func lclose(ctx context.Context, c io.Closer) { ...@@ -38,36 +36,6 @@ func lclose(ctx context.Context, c io.Closer) {
} }
} }
// decompress decompresses data according to zlib encoding.
//
// out buffer, if there is enough capacity, is used for decompression destination.
// if out has not enough capacity a new buffer is allocated and used.
//
// return: destination buffer with full decompressed data or error.
func decompress(in []byte, out []byte) (data []byte, err error) {
bin := bytes.NewReader(in)
zr, err := zlib.NewReader(bin)
if err != nil {
return nil, err
}
defer func() {
err2 := zr.Close()
if err2 != nil && err == nil {
err = err2
data = nil
}
}()
bout := bytes.NewBuffer(out)
_, err = io.Copy(bout, zr)
if err != nil {
return nil, err
}
return bout.Bytes(), nil
}
// at2Before converts at to before for ZODB load semantics taking edge cases into account. // at2Before converts at to before for ZODB load semantics taking edge cases into account.
// //
// For most values it is // For most values it is
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment