Commit f3ca92a0 authored by Jacob Vosmaer's avatar Jacob Vosmaer

Add 'resume' capability to parallel-rsync-repos

parent 6d2be021
#!/bin/sh
# this script should run as the 'git' user, not root, because of mkdir
#!/usr/bin/env bash
# this script should run as the 'git' user, not root, because 'root' should not
# own intermediate directories created by rsync.
#
# Example invocation:
# find /var/opt/gitlab/git-data/repositories -maxdepth 2 | \
# parallel-rsync-repos /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories
# parallel-rsync-repos transfer-success.log /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories
#
# You can also rsync to a remote destination.
#
# parallel-rsync-repos /var/opt/gitlab/git-data/repositories user@host:/mnt/gitlab/repositories
# parallel-rsync-repos transfer-success.log /var/opt/gitlab/git-data/repositories user@host:/mnt/gitlab/repositories
#
# If you need to pass extra options to rsync, set the RSYNC variable
#
# env RSYNC='rsync --rsh="foo bar"' parallel-rsync-repos /src dest
# env RSYNC='rsync --rsh="foo bar"' parallel-rsync-repos transfer-success.log /src dest
#
SRC=$1
DEST=$2
LOGFILE=$1
SRC=$2
DEST=$3
if [ -z "$JOBS" ] ; then
JOBS=10
if [ -z "$LOGFILE" ] || [ -z "$SRC" ] || [ -z "$DEST" ] ; then
echo "Usage: $0 LOGFILE SRC DEST"
exit 1
fi
if [ -z "$SRC" ] || [ -z "$DEST" ] ; then
echo "Usage: $0 SRC DEST"
exit 1
if [ -z "$JOBS" ] ; then
JOBS=10
fi
if [ -z "$RSYNC" ] ; then
......@@ -35,5 +37,18 @@ if ! cd $SRC ; then
exit 1
fi
sed "s|$SRC|./|" |\
parallel -j$JOBS --progress "mkdir -p $DEST/{} && $RSYNC --delete -a {}/. $DEST/{}/"
rsyncjob() {
relative_dir="./${1#$SRC}"
if ! $RSYNC --delete --relative -a "$relative_dir" "$DEST" ; then
echo "rsync $1 failed"
return 1
fi
echo "$1" >> $LOGFILE
}
export LOGFILE SRC DEST RSYNC
export -f rsyncjob
parallel -j$JOBS --progress rsyncjob
......@@ -96,25 +96,59 @@ after switching to the new repository storage directory.
### Parallel rsync for all repositories known to GitLab
This will sync repositories with 10 rsync processes at a time.
This will sync repositories with 10 rsync processes at a time. We keep
track of progress so that the transfer can be restarted if necessary.
First we create a new directory, owned by 'git', to hold transfer
logs. We assume the directory is empty before we start the transfer
procedure, and that we are the only ones writing files in it.
```
# Omnibus
sudo gitlab-rake gitlab:list_repos |\
sudo -u git \
sudo mkdir /var/opt/gitlab/transfer-logs
sudo chown git:git /var/opt/gitlab/transfer-logs
# Source
sudo -u git -H mkdir /home/git/transfer-logs
```
We seed the process with a list of the directories we want to copy.
```
# Omnibus
sudo -u git sh -c 'gitlab-rake gitlab:list_repos > /var/opt/gitlab/transfer-logs/all-repos-$(date +%s).txt'
# Source
cd /home/git/gitlab
sudo -u git -H sh -c 'bundle exec rake gitlab:list_repos > /home/git/transfer-logs/all-repos-$(date +%s).txt'
```
Now we can start the transfer. The command below is idempotent, and
the number of jobs done by GNU Parallel should converge to zero. If it
does not some repositories listed in all-repos-1234.txt may have been
deleted/renamed before they could be copied.
```
# Omnibus
sudo -u git sh -c '
cat /var/opt/gitlab/transfer-logs/* | sort | uniq -u |\
/usr/bin/env JOBS=10 \
/opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \
/opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repos \
/var/opt/gitlab/transfer-logs/succes-$(date +%s).log \
/var/opt/gitlab/git-data/repositories \
/mnt/gitlab/repositories
'
# Source
cd /home/git/gitlab
sudo -u git -H bundle exec rake gitlab:list_repos |\
sudo -u git -H \
sudo -u git -H sh -c '
cat /home/git/transfer-logs/* | sort | uniq -u |\
/usr/bin/env JOBS=10 \
bin/parallel-rsync-repos \
/home/git/transfer-logs/succes-$(date +%s).log \
/home/git/repositories \
/mnt/gitlab/repositories
`
```
### Parallel rsync only for repositories with recent activity
......@@ -129,7 +163,8 @@ gitlab:list_repos' to only print repositories with recent activity.
sudo gitlab-rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\
sudo -u git \
/usr/bin/env JOBS=10 \
/opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \
/opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repos \
succes-$(date +%s).log \
/var/opt/gitlab/git-data/repositories \
/mnt/gitlab/repositories
......@@ -139,6 +174,7 @@ sudo -u git -H bundle exec rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\
sudo -u git -H \
/usr/bin/env JOBS=10 \
bin/parallel-rsync-repos \
succes-$(date +%s).log \
/home/git/repositories \
/mnt/gitlab/repositories
```
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment