#!/bin/bash -e # neotest: run tests and benchmarks against FileStorage, ZEO and various NEO/py{sql,sqlite}, NEO/go clusters # ---- deploy NEO for tests/benchmarks at a node ---- die() { echo 2>&1 "$@" exit 1 } # cmd_deploy [user@]<host>:<path> - deploy NEO & needed software for tests there # ssh-key or password for access should be available cmd_deploy() { host=`echo $1 |sed -e 's/:[^:]*$//'` # user@host path=${1:$((${#host} + 1))} # path test -z "$host" -o -z "$path" && die "Usage: neotest deploy [user@]<host>:<path>" echo -e "\n*** deploying to $@ ..." scp $0 $host:neotest ssh $host ./neotest deploy-local "$path" } # cmd_deploy_local <path> - deploy NEO & needed software for tests @path cmd_deploy_local() { path=$1 test -z "$path" && die "Usage: neotest deploy-local <path>" test -e $path/deployed && echo "# already deployed" && return mkdir -p $path cd $path # python part virtualenv venv cat >env.sh << 'EOF' X=${1:-${BASH_SOURCE[0]}} # path to original env.sh is explicitly passed X=$(cd `dirname $X` && pwd) # when there is other env.sh wrapping us export GOPATH=$X:$GOPATH export PATH=$X/bin:$PATH export PS1="(`basename $X`) $PS1" # strip trailing : from $GOPATH GOPATH=${GOPATH%:} # python . $X/venv/bin/activate # lmbench export PATH=$X/lmbench/lmbench3/bin/`cd $X/lmbench/lmbench3/src; ../scripts/os`:$PATH # ioping export PATH=$X/ioping:$PATH # XXX for mysqld export PATH=$PATH:/usr/sbin EOF # NOTE lmbench before env.sh becuase env.sh uses `scripts/os` from lmbench git clone -o kirr -b x/kirr https://lab.nexedi.com/kirr/lmbench.git pushd lmbench/lmbench3/src make -j`nproc` go build -o ../bin/`../scripts/os`/lat_tcp_go lat_tcp.go popd . env.sh pip install git+https://lab.nexedi.com/nexedi/wendelin.core.git@master # XXX does not show git in ver pip install git+https://lab.nexedi.com/kirr/zodburi.git@master pip install zodbtools mkdir -p src/lab.nexedi.com/kirr pushd src/lab.nexedi.com/kirr test -d neo || git clone -o kirr https://lab.nexedi.com/kirr/neo.git neo cd neo git fetch kirr refs/backup/t:refs/kirr-backup/t # XXX temp git checkout refs/kirr-backup/t pip install -e . pip install mysqlclient # XXX better ^^^ `pip install .` pick this up popd go get -v lab.nexedi.com/kirr/neo/go/... go get -v github.com/pkg/profile # used by zhash.go git clone -o kirr -b x/hist https://lab.nexedi.com/kirr/ioping.git pushd ioping make -j`nproc` popd echo ok >deployed echo "# deployed ok" } # jump to deploy early if we have to case "$1" in deploy) shift cmd_deploy "$@" exit ;; deploy-local) shift cmd_deploy_local "$@" exit ;; esac # on <url> ... - run ... on deployed url from inside dir of neotest on() { #echo "on $@" host=`echo $1 |sed -e 's/:[^:]*$//'` # user@host path=${1:$((${#host} + 1))} # path test -z "$host" -o -z "$path" && die "on $1: invalid URL" shift ssh $host "bash -c \"test -e $path/deployed || { echo 1>&2 '$url not yet deployed'; exit 1; } cd $path . env.sh #set -x cd src/lab.nexedi.com/kirr/neo/go/neo/t $@ \"" } # ---------------------------------------- # XXX neo/py, wendelin.core, ... - must be pip install'ed # XXX neo/py: run via relative path to neomaster? (../../neo/neomaster) so we do not need to `pip install -e` ? # local external address IPv4 or IPv6 myaddr=$(getent hosts `hostname` |grep -v 127.0 |awk '{print $1}') # port allocations ([] works for IPv4 too) Abind=[$myaddr]:5551 # NEO admin Mbind=[$myaddr]:5552 # NEO master Zbind=[$myaddr]:5553 # ZEO # NEO storage. bind not strictly needed but we make sure no 2 storages are # started at the same time Sbind=[$myaddr]:5554 # disk allocation log=`pwd`/log; mkdir -p $log var=`pwd`/var; mkdir -p $var fs1=$var/fs1; mkdir -p $fs1 # FileStorage (and so ZEO and NEO/go) data neolite=$var/neo.sqlite # NEO/py: sqlite neosql=$var/neo.sql; mkdir -p $neosql # NEO/py: mariadb mycnf=$neosql/mariadb.cnf # NEO/py: mariadb config mysock=$(realpath $neosql)/my.sock # NEO/py: mariadb socket # cluster name cluster=pygotest # control started NEO cluster xneoctl() { neoctl -a $Abind "$@" } # control started MariaDB xmysql() { mysql --defaults-file=$mycnf "$@" } # if we are abnormally terminating install_trap() { trap 'set +e echo "E: abnormal termination - stopping..." xneoctl set cluster stopping sleep 1 xmysql -e "SHUTDOWN" sleep 1 j="$(jobs -p)" test -z "$j" && exit echo "E: killing left jobs..." jobs -l kill $j' EXIT } # ---- start NEO nodes ---- # M{py,go} ... - spawn master Mpy() { # --autostart=1 exec -a Mpy \ neomaster --cluster=$cluster --bind=$Mbind --masters=$Mbind -r 1 -p 1 --logfile=$log/Mpy.log "$@" & } Mgo() { exec -a Mgo \ neo --log_dir=$log master -cluster=$cluster -bind=$Mbind "$@" & } # Spy ... - spawn NEO/py storage Spy() { # --adapter=... # --database=... # --engine=... exec -a Spy \ neostorage --cluster=$cluster --bind=$Sbind --masters=$Mbind --logfile=$log/Spy.log "$@" & } # Sgo <data.fs> - spawn NEO/go storage Sgo() { # -alsologtostderr # -cpuprofile cpu.out # -trace trace.out exec -a Sgo \ neo -log_dir=$log storage -cluster=$cluster -bind=$Sbind -masters=$Mbind "$@" & } # Apy ... - spawn NEO/py admin Apy() { exec -a Apy \ neoadmin --cluster=$cluster --bind=$Abind --masters=$Mbind --logfile=$log/Apy.log "$@" & } # Zpy <data.fs> ... - spawn ZEO Zpy() { exec -a Zpy \ runzeo --address $Zbind --filename "$@" 2>>$log/Zpy.log & } # ---- start NEO clusters ---- # spawn NEO/go cluster (Sgo+Mpy+Apy) working on data.fs NEOgo() { Mpy --autostart=1 Sgo $fs1/data.fs Apy } # spawn NEO/py cluster working on sqlite db NEOpylite() { Mpy --autostart=1 Spy --adapter=SQLite --database=$neolite Apy } # spawn NEO/py cluster working on mariadb NEOpysql() { MDB sleep 1 # XXX fragile xmysql -e "CREATE DATABASE IF NOT EXISTS neo" Mpy --autostart=1 Spy --adapter=MySQL --engine=InnoDB --database=root@neo$mysock Apy } # setup/spawn mariadb MDB() { cat >$mycnf <<EOF [mysqld] skip_networking socket = $mysock datadir = $neosql/data log_error = $log/mdb.log # the following comes from # https://lab.nexedi.com/nexedi/slapos/blob/master/software/neoppod/my.cnf.in#L18 # ---- 8< ---- # kirr: disabled #plugin-load = ha_tokudb;ha_rocksdb log_warnings = 1 disable-log-bin ## The following settings come from ERP5 configuration. max_allowed_packet = 128M query_cache_size = 32M innodb_locks_unsafe_for_binlog = 1 # Some dangerous settings you may want to uncomment temporarily # if you only want performance or less disk access. #innodb_flush_log_at_trx_commit = 0 #innodb_flush_method = nosync #innodb_doublewrite = 0 #sync_frm = 0 # Extra parameters. log_slow_verbosity = explain,query_plan # kirr: rocksb disabled # rocksdb_block_cache_size = 10G # rocksdb_max_background_compactions = 3 long_query_time = 1 innodb_file_per_table = 1 # Force utf8 usage collation_server = utf8_unicode_ci character_set_server = utf8 skip_character_set_client_handshake [client] socket = $mysock user = root EOF # setup system tables on first run if ! test -e $neosql/data ; then # XXX --cross-bootstrap only to avoid final large print notice # XXX but cross-bootstrap filters out current host name from installed tables - is it ok? mysql_install_db --defaults-file=$mycnf --cross-bootstrap fi mysqld --defaults-file=$mycnf & } # ---- generate test data ---- # generate data with many small (4K) objects export WENDELIN_CORE_ZBLK_FMT=ZBlk1 # XXX 32 temp - raise #work=8 # array size generated (MB) work=32 # array size generated (MB) #work=64 #work=512 # array size generated (MB) # generate data in data.fs GENfs() { test -e $var/generated.fs && return echo -e '\n*** generating fs1 data...' demo-zbigarray --worksize=$work gen $fs1/data.fs sync touch $var/generated.fs } # generate data in sqlite GENsqlite() { test -e $var/generated.sqlite && return echo -e '\n*** generating sqlite data...' NEOpylite demo-zbigarray --worksize=$work gen neo://$cluster@$Mbind xneoctl set cluster stopping wait # XXX fragile - won't work if there are childs spawned outside sync touch $var/generated.sqlite } # generate data in mariadb GENsql() { test -e $var/generated.sql && return echo -e '\n*** generating sql data...' NEOpysql demo-zbigarray --worksize=$work gen neo://$cluster@$Mbind xneoctl set cluster stopping sleep 1 # XXX fragile xmysql -e "SHUTDOWN" wait # XXX fragile sync touch $var/generated.sql } # generate all test databases gen_data() { GENfs GENsqlite GENsql wait sync } # ---- main driver ---- # pyver <egg> (<showas>) - print version of egg pyver() { #return # XXX temp to save time local egg=$1 local showas=$2 test "$showas" == "" && showas=$egg local loc local pyver { read loc read pyver } < <(python -c "import pkg_resources as p; e=p.require(\"$egg\")[0]; print(\"%s\n%s\" % (e.location, e.version))") local gitver=$(git -C $loc describe --long --dirty 2>/dev/null) local ver test "$gitver" != "" && ver="$gitver" || ver="$pyver" printf "# %-16s: %s\n" "$showas" "$ver" } # lspci1 <pcidev> <field> - show <field> from lspci information about <pcidev> lspci1() { lspci -vmm -s $1 |grep "^$2:\\s*" |sed -e "s/^$2:\\s*//" } # show date/hardware/versions header() { echo -n "# "; date --rfc-2822 echo "# `whoami`@`hostname --fqdn` ($myaddr)"; echo -n "# cpu: "; grep "^model name" /proc/cpuinfo |head -1 |sed -e 's/model name\s*: //' # disk under . mntpt=`stat -c '%m' .` # mountpoint of current filesystem mntdev=`findmnt -n -o source $mntpt` # mountpoint -> device blkdev=`echo $mntdev |sed -e 's/[0-9]*$//'` # /dev/sda3 -> /dev/sda blkdev1=`basename $blkdev` # /dev/sda -> sda echo "# $blkdev1: `lsblk -dn -o MODEL $blkdev` rev `lsblk -dn -o REV,SIZE $blkdev`" # all NICs find /sys/class/net -type l -not -lname '*virtual*' | \ while read nic; do nicname=`basename $nic` # /sys/class/net/eth0 -> eth0 echo -n "# $nicname: " nicdev=`realpath $nic/device` # /sys/class/net/eth0 -> /sys/devices/pci0000:00/0000:00:1f.6 case "$nicdev" in *pci*) pcidev=`basename $nicdev` # /sys/devices/pci0000:00/0000:00:1f.6 -> 0000:00:1f.6 #lspci -s $pcidev echo "`lspci1 $pcidev Vendor` `lspci1 $pcidev Device` rev `lspci1 $pcidev Rev`" ;; *) echo "$nicdev (TODO)" ;; esac done echo -n "# "; uname -a echo -n "# "; python --version echo -n "# "; go version echo -n "# "; python -c 'import sqlite3 as s; print "sqlite %s (py mod %s)" % (s.sqlite_version, s.version)' echo -n "# "; mysqld --version pyver neoppod neo pyver zodb pyver zeo pyver mysqlclient pyver wendelin.core } # run benchmarks Nrun=4 # repeat benchmarks N time Npar=8 # run so many parallel clients in parallel phase # nrun ... - run ... $Nrun times serially nrun() { for i in `seq $Nrun`; do "$@" done } # nrunpar ... - run $Npar ... instances in parallel and wait for completion nrunpar() { local jobv for i in `seq $Npar`; do "$@" & jobv="$jobv $!" done wait $jobv } # bench_disk - print disk identification and benchmark direct (uncached) and cached random reads bench_disk() { echo -e "\n*** random direct (no kernel cache) 4K-read disk latency" nrun ioping -D -i 0ms -s 4k -S 1024M -w 3s -q -k . echo -e "\n*** random cached 4K-read disk latency" # warmup so kernel puts the file into pagecache for i in `seq 3`; do cat ioping.tmp >/dev/null done nrun ioping -C -i 0ms -s 4k -S 1024M -w 3s -q -k . } #hashfunc=sha1 #hashfunc=adler32 hashfunc=crc32 #hashfunc=null # bench <url> - run benchmarks against URL bench() { url=$1 # nrun time demo-zbigarray read $url nrun ./zhash.py --$hashfunc $url # echo -e "\n# ${Npar} clients in parallel" # nrunpar ./zhash.py --$hashfunc $url if [[ $url == zeo://* ]]; then echo "(skipping zhash.go on ZEO -- Cgo does not support zeo:// protocol)" return fi echo bench_go $url } # go-only part of bench bench_go() { url=$1 nrun ./zhash_go --log_dir=$log -$hashfunc $url # nrun ./zhash_go --log_dir=$log -$hashfunc -useprefetch $url # echo -e "\n# ${Npar} clients in parallel" # nrunpar ./zhash_go --log_dir=$log -$hashfunc $url } # command: benchmark when client and storage are on the same computer cmd_bench-local() { echo -e ">>> bench-local" header bench_disk install_trap gen_data echo -e "\n*** FileStorage" bench $fs1/data.fs echo -e "\n*** ZEO" Zpy $fs1/data.fs bench zeo://$Zbind killall runzeo wait echo -e "\n*** NEO/py sqlite" NEOpylite bench neo://$cluster@$Mbind xneoctl set cluster stopping wait echo -e "\n*** NEO/py sql" NEOpysql bench neo://$cluster@$Mbind xneoctl set cluster stopping xmysql -e "SHUTDOWN" wait echo -e "\n*** NEO/go" NEOgo bench neo://$cluster@$Mbind xneoctl set cluster stopping wait echo -e "\n*** NEO/go (sha1 disabled)" X_NEOGO_SHA1_SKIP=y NEOgo X_NEOGO_SHA1_SKIP=y bench_go neo://$cluster@$Mbind xneoctl set cluster stopping wait # all ok trap - EXIT exit } # command: benchmark when server runs locally and client is on another node cmd_bench-cluster() { url=$1 test -z "$url" && die "Usage: neotest bench-cluster [user@]<host>:<path>" echo -e ">>> bench-cluster $url" echo -e "\n# server:" header echo -e "\n# client:" on $url ./neotest info-local echo -e "\n*** server disk:" bench_disk echo -e "\n*** link latency:" peer=`python -c "import urlparse as p; u=p.urlparse(\"scheme://$url\"); print u.hostname"` sizev="56 1472" for size in $sizev; do echo -e "\n# `hostname` ⇄ $peer (ping ${size}B)" sudo -n ping -i0 -w 3 -s $size -q $peer || echo "# skipped -> enable ping in sudo for `whoami`@`hostname`" echo -e "\n# $peer ⇄ `hostname` (ping ${size}B)" on $url "sudo -n ping -i0 -w3 -s ${size} -q \$(echo \${SSH_CONNECTION%% *}) || echo \\\"# skipped -> enable ping in sudo for \`whoami\`@\`hostname\`\\\"" done echo -e "\n*** TCP latency:" sizev="1 1472 4096" for size in $sizev; do echo -e "\n# `hostname` ⇄ $peer (lat_tcp.c ${size}B -> lat_tcp.c -s)" on $url "nohup lat_tcp -s </dev/null >/dev/null 2>/dev/null &" nrun lat_tcp -m $size $peer lat_tcp -S $peer echo -e "\n# `hostname` ⇄ $peer (lat_tcp.c ${size}B -> lat_tcp.go -s)" on $url "nohup lat_tcp_go -s </dev/null >/dev/null 2>/dev/null &" nrun lat_tcp -m $size $peer lat_tcp -S $peer echo -e "\n# $peer ⇄ `hostname` (lat_tcp.c ${size}B -> lat_tcp.c -s)" lat_tcp -s nrun on $url "lat_tcp -m $size \${SSH_CONNECTION%% *}" lat_tcp -S localhost echo -e "\n# $peer ⇄ `hostname` (lat_tcp.c ${size}B -> lat_tcp.go -s)" lat_tcp_go -s 2>/dev/null & nrun on $url "lat_tcp -m $size \${SSH_CONNECTION%% *}" lat_tcp -S localhost done echo install_trap gen_data echo -e "\n*** ZEO" Zpy $fs1/data.fs on $url ./neotest run-client zeo://$Zbind killall runzeo wait echo -e "\n*** NEO/py sqlite" NEOpylite on $url ./neotest run-client neo://$cluster@$Mbind xneoctl set cluster stopping wait echo -e "\n*** NEO/py sql" NEOpysql on $url ./neotest run-client neo://$cluster@$Mbind xneoctl set cluster stopping xmysql -e "SHUTDOWN" wait echo -e "\n*** NEO/go" NEOgo on $url ./neotest run-client neo://$cluster@$Mbind xneoctl set cluster stopping wait echo -e "\n*** NEO/go (sha1 disabled)" X_NEOGO_SHA1_SKIP=y NEOgo on $url X_NEOGO_SHA1_SKIP=y ./neotest run-client --goonly neo://$cluster@$Mbind xneoctl set cluster stopping wait # all ok trap - EXIT exit } # command: run client workload against sepearate server cmd_run-client() { goonly="" case "$1" in --goonly) goonly=y shift ;; esac url=$1 test -z "$url" && die "Usage: neotest run-client <url>" test -z "$goonly" && bench $url || bench_go $url } # command: print information about local node cmd_info-local() { header } # ---- main driver ---- usage() { cat 1>&2 << EOF Neotest is a tool to functionally test and benchmark NEO. Usage: neotest command [arguments] The commands are: bench-local run benchmarks when client and server are both on the same localhost bench-cluster run benchmarks when server is local and client is on another node run-client run client benchmarks against separate server deploy deploy NEO & needed software for tests to remote host deploy-local deploy NEO & needed software for tests locally info-local print information about local deployment EOF } case "$1" in # commands that require build bench-local | \ run-client | \ bench-cluster) ;; info-local) shift cmd_info-local "$@" exit 0 ;; -h) usage exit 0 ;; *) usage exit 1 ;; esac # rebuild go bits go install -v lab.nexedi.com/kirr/neo/go/... go build -o zhash_go zhash.go # run the command cmd="$1" shift cmd_$cmd "$@"