#!/bin/bash -e
# neotest: run tests and benchmarks against FileStorage, ZEO and various NEO/py{sql,sqlite}, NEO/go clusters

# ---- deploy NEO for tests/benchmarks at a node ----

die() {
	echo 2>&1 "$@"
	exit 1
}

# cmd_deploy [user@]<host>:<path>	- deploy NEO & needed software for tests there
# ssh-key or password for access should be available
cmd_deploy() {
	host=`echo $1 |sed -e 's/:[^:]*$//'`	# user@host
	path=${1:$((${#host} + 1))}		# path
	test -z "$host" -o -z "$path" && die "Usage: neotest deploy [user@]<host>:<path>"
	echo -e "\n*** deploying to $@ ..."
	scp $0 $host:neotest
	ssh $host ./neotest deploy-local "$path"
}

# cmd_deploy_local <path>		- deploy NEO & needed software for tests @path
cmd_deploy_local() {
	path=$1
	test -z "$path" && die "Usage: neotest deploy-local <path>"
	test -e $path/deployed && echo "# already deployed" && return
	mkdir -p $path
	cd $path

	# python part
	virtualenv venv

	cat >env.sh << 'EOF'
X=${1:-${BASH_SOURCE[0]}}       # path to original env.sh is explicitly passed
X=$(cd `dirname $X` && pwd)     # when there is other env.sh wrapping us

export GOPATH=$X:$GOPATH
export PATH=$X/bin:$PATH
export PS1="(`basename $X`) $PS1"

# strip trailing : from $GOPATH
GOPATH=${GOPATH%:}

# python
. $X/venv/bin/activate

# lmbench
export PATH=$X/lmbench/lmbench3/bin/`cd $X/lmbench/lmbench3/src; ../scripts/os`:$PATH

# ioping
export PATH=$X/ioping:$PATH

# XXX for mysqld
export PATH=$PATH:/usr/sbin
EOF

	# NOTE lmbench before env.sh becuase env.sh uses `scripts/os` from lmbench
	git clone -o kirr -b x/kirr https://lab.nexedi.com/kirr/lmbench.git
	pushd lmbench/lmbench3/src
	make -j`nproc`
	go build -o ../bin/`../scripts/os`/lat_tcp_go lat_tcp.go
	popd

	. env.sh

	pip install git+https://lab.nexedi.com/nexedi/wendelin.core.git@master	# XXX does not show git in ver
	pip install git+https://lab.nexedi.com/kirr/zodburi.git@master
	pip install zodbtools

	mkdir -p src/lab.nexedi.com/kirr
	pushd src/lab.nexedi.com/kirr
	test -d neo || git clone -o kirr https://lab.nexedi.com/kirr/neo.git neo
	cd neo
	git fetch kirr refs/backup/t:refs/kirr-backup/t			# XXX temp
	git checkout refs/kirr-backup/t

	pip install -e .
	pip install mysqlclient		# XXX better ^^^ `pip install .` pick this up
	popd

	go get -v lab.nexedi.com/kirr/neo/go/...
	go get -v github.com/pkg/profile		# used by zhash.go

	git clone -o kirr -b x/hist https://lab.nexedi.com/kirr/ioping.git
	pushd ioping
	make -j`nproc`
	popd

	echo ok >deployed
	echo "# deployed ok"
}

# jump to deploy early if we have to
case "$1" in
deploy)
	shift
	cmd_deploy "$@"
	exit
	;;

deploy-local)
	shift
	cmd_deploy_local "$@"
	exit
	;;
esac

# on <url> ...		- run ... on deployed url from inside dir of neotest
on() {
	#echo "on $@"
	host=`echo $1 |sed -e 's/:[^:]*$//'`	# user@host
	path=${1:$((${#host} + 1))}		# path
	test -z "$host" -o -z "$path" && die "on $1: invalid URL"
	shift
	ssh $host "bash -c \"test -e $path/deployed || { echo 1>&2 '$url not yet deployed'; exit 1; }
cd $path
. env.sh
#set -x
cd src/lab.nexedi.com/kirr/neo/go/neo/t
$@
\""
}

# ----------------------------------------

# XXX neo/py, wendelin.core, ... - must be pip install'ed
# XXX neo/py: run via relative path to neomaster? (../../neo/neomaster) so we do not need to `pip install -e` ?

# local external address IPv4 or IPv6
myaddr=$(getent hosts `hostname` |grep -v 127.0 |awk '{print $1}')

# port allocations ([] works for IPv4 too)
Abind=[$myaddr]:5551	# NEO admin
Mbind=[$myaddr]:5552	# NEO master
Zbind=[$myaddr]:5553	# ZEO

# NEO storage. bind not strictly needed but we make sure no 2 storages are
# started at the same time
Sbind=[$myaddr]:5554

# disk allocation
log=`pwd`/log;		mkdir -p $log
var=`pwd`/var;		mkdir -p $var
fs1=$var/fs1;		mkdir -p $fs1		# FileStorage (and so ZEO and NEO/go) data
neolite=$var/neo.sqlite				# NEO/py: sqlite
neosql=$var/neo.sql;	mkdir -p $neosql	# NEO/py: mariadb
mycnf=$neosql/mariadb.cnf			# NEO/py: mariadb config
mysock=$(realpath $neosql)/my.sock		# NEO/py: mariadb socket

# cluster name
cluster=pygotest

# control started NEO cluster
xneoctl() {
	neoctl -a $Abind "$@"
}

# control started MariaDB
xmysql() {
	mysql --defaults-file=$mycnf "$@"
}

# if we are abnormally terminating
install_trap() {
	trap 'set +e
echo "E: abnormal termination - stopping..."
xneoctl set cluster stopping
sleep 1
xmysql -e "SHUTDOWN"
sleep 1
j="$(jobs -p)"
test -z "$j" && exit
echo "E: killing left jobs..."
jobs -l
kill $j' EXIT
}

# ---- start NEO nodes ----

# M{py,go} ...	- spawn master
Mpy() {
	# --autostart=1
	exec -a Mpy \
		neomaster --cluster=$cluster --bind=$Mbind --masters=$Mbind -r 1 -p 1 --logfile=$log/Mpy.log "$@" &
}

Mgo() {
	exec -a Mgo \
		neo --log_dir=$log master -cluster=$cluster -bind=$Mbind "$@" &
}

# Spy ...	- spawn NEO/py storage
Spy() {
	# --adapter=...
	# --database=...
	# --engine=...
	exec -a Spy \
		neostorage --cluster=$cluster --bind=$Sbind --masters=$Mbind --logfile=$log/Spy.log "$@" &
}

# Sgo <data.fs>	- spawn NEO/go storage
Sgo() {
	# -alsologtostderr
	# -cpuprofile cpu.out
	# -trace trace.out
	exec -a Sgo \
		neo -log_dir=$log storage -cluster=$cluster -bind=$Sbind -masters=$Mbind "$@" &
}

# Apy ...	- spawn NEO/py admin
Apy() {
	exec -a Apy \
		neoadmin --cluster=$cluster --bind=$Abind --masters=$Mbind --logfile=$log/Apy.log "$@" &
}

# Zpy <data.fs> ...	- spawn ZEO
Zpy() {
	exec -a Zpy \
		runzeo --address $Zbind --filename "$@" 2>>$log/Zpy.log &
}


# ---- start NEO clusters ----

# spawn NEO/go cluster (Sgo+Mpy+Apy) working on data.fs
NEOgo() {
	Mpy --autostart=1
	Sgo $fs1/data.fs
	Apy
}

# spawn NEO/py cluster working on sqlite db
NEOpylite() {
	Mpy --autostart=1
	Spy --adapter=SQLite --database=$neolite
	Apy
}

# spawn NEO/py cluster working on mariadb
NEOpysql() {
	MDB
	sleep 1	# XXX fragile
	xmysql -e "CREATE DATABASE IF NOT EXISTS neo"

	Mpy --autostart=1
	Spy --adapter=MySQL --engine=InnoDB --database=root@neo$mysock
	Apy
}


# setup/spawn mariadb
MDB() {
	cat >$mycnf <<EOF
[mysqld]
skip_networking
socket		= $mysock
datadir		= $neosql/data
log_error	= $log/mdb.log

# the following comes from
# https://lab.nexedi.com/nexedi/slapos/blob/master/software/neoppod/my.cnf.in#L18
# ---- 8< ----

# kirr: disabled
#plugin-load = ha_tokudb;ha_rocksdb

log_warnings = 1
disable-log-bin

## The following settings come from ERP5 configuration.

max_allowed_packet = 128M
query_cache_size = 32M
innodb_locks_unsafe_for_binlog = 1

# Some dangerous settings you may want to uncomment temporarily
# if you only want performance or less disk access.
#innodb_flush_log_at_trx_commit = 0
#innodb_flush_method = nosync
#innodb_doublewrite = 0
#sync_frm = 0

# Extra parameters.
log_slow_verbosity = explain,query_plan
# kirr: rocksb disabled
# rocksdb_block_cache_size = 10G
# rocksdb_max_background_compactions = 3
long_query_time = 1
innodb_file_per_table = 1

# Force utf8 usage
collation_server = utf8_unicode_ci
character_set_server = utf8
skip_character_set_client_handshake

[client]
socket = $mysock
user = root
EOF

	# setup system tables on first run
	if ! test -e $neosql/data ; then
		# XXX --cross-bootstrap only to avoid final large print notice
		# XXX but cross-bootstrap filters out current host name from installed tables - is it ok?
		mysql_install_db --defaults-file=$mycnf --cross-bootstrap
	fi

	mysqld --defaults-file=$mycnf &
}

# ---- generate test data ----

# generate data with many small (4K) objects
export WENDELIN_CORE_ZBLK_FMT=ZBlk1

# XXX 32 temp - raise
#work=8	# array size generated (MB)
work=32	# array size generated (MB)
#work=64
#work=512	# array size generated (MB)

# generate data in data.fs
GENfs() {
	test -e $var/generated.fs && return
	echo -e '\n*** generating fs1 data...'
	demo-zbigarray --worksize=$work gen $fs1/data.fs
	sync
	touch $var/generated.fs
}

# generate data in sqlite
GENsqlite() {
	test -e $var/generated.sqlite && return
	echo -e '\n*** generating sqlite data...'
	NEOpylite
	demo-zbigarray --worksize=$work gen neo://$cluster@$Mbind
	xneoctl set cluster stopping
	wait	# XXX fragile - won't work if there are childs spawned outside
	sync
	touch $var/generated.sqlite
}

# generate data in mariadb
GENsql() {
	test -e $var/generated.sql && return
	echo -e '\n*** generating sql data...'
	NEOpysql
	demo-zbigarray --worksize=$work gen neo://$cluster@$Mbind
	xneoctl set cluster stopping
	sleep 1	# XXX fragile
	xmysql -e "SHUTDOWN"
	wait	# XXX fragile
	sync
	touch $var/generated.sql
}

# generate all test databases
gen_data() {
	GENfs
	GENsqlite
	GENsql
	wait
	sync
}


# ---- main driver ----

# pyver <egg> (<showas>) - print version of egg
pyver() {
	#return	# XXX temp to save time
	local egg=$1
	local showas=$2
	test "$showas" == "" && showas=$egg
	local loc
	local pyver
	{
		read loc
		read pyver
	} < <(python -c "import pkg_resources as p; e=p.require(\"$egg\")[0]; print(\"%s\n%s\" % (e.location, e.version))")
	local gitver=$(git -C $loc describe --long --dirty 2>/dev/null)
	local ver
	test "$gitver" != "" && ver="$gitver" || ver="$pyver"
	printf "# %-16s: %s\n" "$showas" "$ver"
}

# lspci1 <pcidev> <field>	- show <field> from lspci information about <pcidev>
lspci1() {
	lspci -vmm -s $1 |grep "^$2:\\s*" |sed -e "s/^$2:\\s*//"
}

# show date/hardware/versions
header() {
	echo -n "# "; date --rfc-2822
	echo    "# `whoami`@`hostname --fqdn` ($myaddr)";
	echo -n "# cpu: "; grep "^model name" /proc/cpuinfo |head -1 |sed -e 's/model name\s*: //'

	# disk under .
	mntpt=`stat -c '%m' .`				# mountpoint of current filesystem
	mntdev=`findmnt -n -o source $mntpt`		# mountpoint -> device
	blkdev=`echo $mntdev |sed -e 's/[0-9]*$//'`	# /dev/sda3 -> /dev/sda
	blkdev1=`basename $blkdev`			# /dev/sda  -> sda
	echo "# $blkdev1: `lsblk -dn -o MODEL $blkdev`  rev `lsblk -dn -o REV,SIZE $blkdev`"

	# all NICs
	find /sys/class/net -type l -not -lname '*virtual*' | \
	while read nic; do
		nicname=`basename $nic`		# /sys/class/net/eth0	-> eth0
		echo -n "# $nicname: "
		nicdev=`realpath $nic/device`	# /sys/class/net/eth0	-> /sys/devices/pci0000:00/0000:00:1f.6

		case "$nicdev" in
		*pci*)
			pcidev=`basename $nicdev`	# /sys/devices/pci0000:00/0000:00:1f.6	-> 0000:00:1f.6
			#lspci -s $pcidev
			echo "`lspci1 $pcidev Vendor` `lspci1 $pcidev Device` rev `lspci1 $pcidev Rev`"
			;;

		*)
			echo "$nicdev (TODO)"
			;;
		esac
	done

	echo -n "# "; uname -a
	echo -n "# "; python --version
	echo -n "# "; go version
	echo -n "# "; python -c 'import sqlite3 as s; print "sqlite %s (py mod %s)" % (s.sqlite_version, s.version)'
	echo -n "# "; mysqld --version

	pyver neoppod neo
	pyver zodb
	pyver zeo
	pyver mysqlclient
	pyver wendelin.core
}

# run benchmarks
Nrun=4		# repeat benchmarks N time
Npar=8		# run so many parallel clients in parallel phase

# nrun ...	- run ... $Nrun times serially
nrun() {
	for i in `seq $Nrun`; do
		"$@"
	done
}

# nrunpar ...	- run $Npar ... instances in parallel and wait for completion
nrunpar() {
	local jobv
	for i in `seq $Npar`; do
		"$@" &
		jobv="$jobv $!"
	done
	wait $jobv
}

# bench_disk	- print disk identification and benchmark direct (uncached) and cached random reads
bench_disk() {
	echo -e "\n*** random direct (no kernel cache) 4K-read disk latency"
	nrun ioping -D -i 0ms -s 4k -S 1024M -w 3s -q -k .

	echo -e "\n*** random cached 4K-read disk latency"
	# warmup so kernel puts the file into pagecache
	for i in `seq 3`; do
		cat ioping.tmp >/dev/null
	done

	nrun ioping -C -i 0ms -s 4k -S 1024M -w 3s -q -k .
}

#hashfunc=sha1
#hashfunc=adler32
hashfunc=crc32
#hashfunc=null

# bench <url>	- run benchmarks against URL
bench() {
	url=$1
#	nrun time demo-zbigarray read $url

	nrun ./zhash.py --$hashfunc $url
#	echo -e "\n# ${Npar} clients in parallel"
#	nrunpar ./zhash.py --$hashfunc $url

	if [[ $url == zeo://* ]]; then
		echo "(skipping zhash.go on ZEO -- Cgo does not support zeo:// protocol)"
		return
	fi
	echo
	bench_go $url
}

# go-only part of bench
bench_go() {
	url=$1
	nrun ./zhash_go --log_dir=$log -$hashfunc $url
#	nrun ./zhash_go --log_dir=$log -$hashfunc -useprefetch $url

#	echo -e "\n# ${Npar} clients in parallel"
#	nrunpar ./zhash_go --log_dir=$log -$hashfunc $url
}


# command: benchmark when client and storage are on the same computer
cmd_bench-local() {
	echo -e ">>> bench-local"
	header
	bench_disk
	install_trap
	gen_data

	echo -e "\n*** FileStorage"
	bench $fs1/data.fs

	echo -e "\n*** ZEO"
	Zpy $fs1/data.fs
	bench zeo://$Zbind
	killall runzeo
	wait

	echo -e "\n*** NEO/py sqlite"
	NEOpylite
	bench neo://$cluster@$Mbind
	xneoctl set cluster stopping
	wait

	echo -e "\n*** NEO/py sql"
	NEOpysql
	bench neo://$cluster@$Mbind
	xneoctl set cluster stopping
	xmysql -e "SHUTDOWN"
	wait

	echo -e "\n*** NEO/go"
	NEOgo
	bench neo://$cluster@$Mbind
	xneoctl set cluster stopping
	wait

	echo -e "\n*** NEO/go (sha1 disabled)"
	X_NEOGO_SHA1_SKIP=y NEOgo
	X_NEOGO_SHA1_SKIP=y bench_go neo://$cluster@$Mbind
	xneoctl set cluster stopping
	wait

	# all ok
	trap - EXIT
	exit
}

# command: benchmark when server runs locally and client is on another node
cmd_bench-cluster() {
	url=$1
	test -z "$url" && die "Usage: neotest bench-cluster [user@]<host>:<path>"

	echo -e ">>> bench-cluster $url"
	echo -e "\n# server:"
	header
	echo -e "\n# client:"
	on $url ./neotest info-local

	echo -e "\n*** server disk:"
	bench_disk

	echo -e "\n*** link latency:"
	peer=`python -c "import urlparse as p; u=p.urlparse(\"scheme://$url\"); print u.hostname"`
	sizev="56 1472"
	for size in $sizev; do
		echo -e "\n# `hostname` ⇄ $peer (ping ${size}B)"
		sudo -n ping -i0 -w 3 -s $size -q $peer	|| echo "# skipped -> enable ping in sudo for `whoami`@`hostname`"
		echo -e "\n# $peer ⇄ `hostname` (ping ${size}B)"
		on $url "sudo -n ping -i0 -w3 -s ${size} -q \$(echo \${SSH_CONNECTION%% *}) || echo \\\"# skipped -> enable ping in sudo for \`whoami\`@\`hostname\`\\\""
	done

	echo -e "\n*** TCP latency:"
	sizev="1 1472 4096"
	for size in $sizev; do
		echo -e "\n# `hostname` ⇄ $peer (lat_tcp.c ${size}B  -> lat_tcp.c -s)"
		on $url "nohup lat_tcp -s </dev/null >/dev/null 2>/dev/null &"
		nrun lat_tcp -m $size $peer
		lat_tcp -S $peer

		echo -e "\n# `hostname` ⇄ $peer (lat_tcp.c ${size}B  -> lat_tcp.go -s)"
		on $url "nohup lat_tcp_go -s </dev/null >/dev/null 2>/dev/null &"
		nrun lat_tcp -m $size $peer
		lat_tcp -S $peer

		echo -e "\n# $peer ⇄ `hostname` (lat_tcp.c ${size}B  -> lat_tcp.c -s)"
		lat_tcp -s
		nrun on $url "lat_tcp -m $size \${SSH_CONNECTION%% *}"
		lat_tcp -S localhost

		echo -e "\n# $peer ⇄ `hostname` (lat_tcp.c ${size}B  -> lat_tcp.go -s)"
		lat_tcp_go -s 2>/dev/null &
		nrun on $url "lat_tcp -m $size \${SSH_CONNECTION%% *}"
		lat_tcp -S localhost
	done


	echo
	install_trap
	gen_data

	echo -e "\n*** ZEO"
	Zpy $fs1/data.fs
	on $url ./neotest run-client zeo://$Zbind
	killall runzeo
	wait

	echo -e "\n*** NEO/py sqlite"
	NEOpylite
	on $url ./neotest run-client neo://$cluster@$Mbind
	xneoctl set cluster stopping
	wait

	echo -e "\n*** NEO/py sql"
	NEOpysql
	on $url ./neotest run-client neo://$cluster@$Mbind
	xneoctl set cluster stopping
	xmysql -e "SHUTDOWN"
	wait

	echo -e "\n*** NEO/go"
	NEOgo
	on $url ./neotest run-client neo://$cluster@$Mbind
	xneoctl set cluster stopping
	wait

	echo -e "\n*** NEO/go (sha1 disabled)"
	X_NEOGO_SHA1_SKIP=y NEOgo
	on $url X_NEOGO_SHA1_SKIP=y ./neotest run-client --goonly neo://$cluster@$Mbind
	xneoctl set cluster stopping
	wait

	# all ok
	trap - EXIT
	exit
}

# command: run client workload against sepearate server
cmd_run-client() {
	goonly=""
	case "$1" in
	--goonly)
		goonly=y
		shift
		;;
	esac

	url=$1
	test -z "$url" && die "Usage: neotest run-client <url>"

	test -z "$goonly" && bench $url || bench_go $url
}

# command: print information about local node
cmd_info-local() {
	header
}

# ---- main driver ----

usage() {
cat 1>&2 << EOF
Neotest is a tool to functionally test and benchmark NEO.

Usage:

	neotest command [arguments]

The commands are:

	bench-local	run benchmarks when client and server are both on the same localhost
	bench-cluster	run benchmarks when server is local and client is on another node

	run-client	run client benchmarks against separate server

	deploy		deploy NEO & needed software for tests to remote host
	deploy-local	deploy NEO & needed software for tests locally

	info-local	print information about local deployment
EOF
}

case "$1" in
# commands that require build
bench-local	| \
run-client	| \
bench-cluster)
	;;

info-local)
	shift
	cmd_info-local "$@"
	exit 0
	;;

-h)
	usage
	exit 0
	;;
*)
	usage
	exit 1
	;;
esac


# rebuild go bits
go install -v lab.nexedi.com/kirr/neo/go/...
go build -o zhash_go zhash.go

# run the command
cmd="$1"
shift
cmd_$cmd "$@"