#!/bin/bash
#
# bontmia (Backup Over Network To Multiple Incremental Archives)
#
# This was written to get the functionality of glastree (Jeremy Wohl)
# available to use towards remote hosts and having a selective long
# term storage.
#
# Written by John Enok Vollestad in April 2003 and have later
# undergone some bugfixes and enhancements.
print_usage()
{
cat <<EOF | ${PAGER:-more}
NAME
Bontmia - Backup Over Network To Multiple Incremental Archives
Version 0.14
SYNOPSIS
bontmia --dest <dest. dir> [options] [source dir [source dir ...]]
DESCRIPTION
Bontmia is a network-based backup tool that saves configurable
numbers of last month, week, day, hour, and minute backups.
Each backup is a complete snapshot of the original
directories. Only new and changed files are copied over the
network when generating a snapshot. Remote access is
implemented securely using ssh. Unchanged files are stored as
hard links in the archive and therefore takes virtually no
space.
The backups is stored in a directory structure in the format
YYYY/MM/DD/HH:MM. Each directory contains a snapshot of the
backed up directories. This is stored incrementally by
letting every file not changed between backups be a hard link
to the same file in the previous backup. The actual copying
is done with rsync. To avoid typing inn a password you do as
usual with ssh by generating a public key on the host where
the backup is stored, read the manual page for ssh, and adding
this to the list of authorized hosts on the remote computer,
read the manual page for sshd.
Which backup to store for how long is configurable, see below.
The return value is 0 on success and 1 if backing up one of
the sources failed.
The destination can not be remote. If you want to place the
backup on a remote server, then run Bontmia as a command with
ssh like "ssh user@host.domain bontmia <options>".
Use at your own risk.
ARGUMENTS
--dest <dir>
Sets the destination directories where the backups is
placed. WARNING! existing files in this directory
will be DELETED!
OPTIONS
--rotation <spec>
Specifies the number of backups in each category to
save. The 'spec' is of the form
[0-9]*minutes[0-9]*hours[0-9]*days[0-9]*weeks[0-9]*month[0-9]*years
so 0minutes0hours7days4weeks12month2years is an
example for daily backup. Bontmia should be run in
cron or manually the numbers given here only specifies
how many backups to save.
The different categories (minutes, hours ...) is just
overlapping filters. More than one filter might save
a backup and a backup is removed when no filter saves
the backup any more.
The number of days given here will apply like this:
If the number of days to save is 3 then the last
backup within each of the last 3 days when backup were
done willl be saved. This means that if you do backup
like this:
day: 0 1 2 3 4 5
backup: y - y - y y
Where y means a backup were done and - means it were
not then after the backup done on day 5 is done, then
the backups on day 2, 4 and 5 is saved when the last 3
day backups is saved. Use the dryrun option below to
experiment and learn more about this option.
--dryrun
When this option is used, the backup is not performed,
but the backups to be removed is listed. This is
handy when you want to ensure that you do not remove
all the precious backups when used in combination with
the rotation option.
--bwlimit <number>
Specifies a maximum transfer rate in kilobytes per
second. This option is most effective with large files
(several megabytes and up). Due to the nature of rsync
transfers, blocks of data are sent, then if rsync
determines the transfer was too fast, it will wait
before sending the next data block. The result is an
average transfer rate equalling the specified limit. A
value of zero specifies no limit. The default is no
limit.
--temp <temp dir>
Specifies the directory used for temporary files during
transfer. NB! This MUST be on the same disk volume as
destination or incremental copy will not work.
--port <number>
Specifies the port number to connect to on the remote
host when using ssh, which is the only option.
EXAMPLES
bontmia --dest ./backup --rotation \\
5minutes0hours0days0weeks0month0years \\
foo@bar:/baz/zoot
Here there is made a copy of foo@bar:/baz/zoot in the
directory backup on the local host. If this command is run
every minute, there is store one backup for every minute for
the last 5 minutes. If it is run once every day. There is
still stored the last 5 backups done at unique minutes so the
last 5 backups is stored which means 5 last days.
Hint: set the values for times shorter than the interval used
when doing backup to 0.
bontmia --dest ./backup --rotation \\
0minutes0hours7days4weeks12month0years \\
foo@bar:/baz/zoot
Here there will at the most be stored 7 + 4 + 12 backups
(minus overlap between the backups). This can be a good
command to run nightly.
bontmia --dest ./backup --rotation \\
0minutes24hours7days4weeks12month0years \\
foo@bar:/baz/zoot
This is for running every hour. Remember that only the
changes is transferred and running more often not necessarily
will mean copying more data over the network.
bontmia --dryrun --dest ./backup --rotation \\
0minutes24hours7days4weeks12month0years \\
/home/bar/baz foo.no:/var/db
With the dryrun option it show what would happen if the backup
were run. No action on the file system done.
When bontmia runs it sends output to standard output. If you
do not want this you can redirect it to /dev/null.
CONTACT
Bontmia was written in april 2003 by John Enok Vollestad
<john.enok@vollestad.no> to merge the functionality of
glastree and rsync in one application with a more flexible
selection of long term storage. It has later undergone some
bugfixes and enhancements. http://folk.uio.no/johnen/bontmia/
EOF
exit 1
}
unlock_destination()
{
rm -f "${tmpdir}/is_running.lock"
}
lock_destination()
{
# Ensure that there is no other bontmia running against the same destination
touch /tmp/bontmia_lock_file.$$
echo "no" | mv -i /tmp/bontmia_lock_file.$$ "${tmpdir}"/is_running.lock 2>&1 | \
grep "overwrite" >/dev/null 2>&1 && {
echo "Bontmia is already doing a backup on this tmpdir:"
echo " ${tmpdir}"
echo
echo " If you are sure this is false you can manually remove the lockfile:"
echo " ${tmpdir}/is_running.lock"
echo
exit 1
}
}
full_name() {
host="${1}";
if host ${host} | grep "domain name pointer" >/dev/null; then
host ${host} | cut -f5 -d' ' | sed -e 's/\.$//';
else
if host ${host} | grep "has address" >/dev/null; then
full_name $(host ${host} | grep "has address" | cut -f4 -d' ');
fi;
fi;
}
filter_template()
{
attribute="${1}"
grep_value="${2}"
last_number="${3}"
sort_options="${4}"
template_name="${5}"
if test "x${sort_options}" != x; then
sort="sort ${sort_options}"
else
sort="cat"
fi
(
cd "${backup_destination}"
for archive in */*/*/*; do
year=$(echo ${archive} | cut -f1 -d'/')
month=$(echo ${archive} | cut -f2 -d'/')
day=$(echo ${archive} | cut -f3 -d'/')
the_clock=$(echo ${archive} | cut -f4 -d'/')
the_date=${year}-${month}-${day}
date -d "${the_date}" "+%Y/%m/%d/${the_clock}@${template_name}@${attribute}" || {
echo "Could not read the time of a previous backup"
echo "Please email the author"
unlock_destination
exit 1
}
done | egrep "@.*@${grep_value}" | sort -r | ${sort} | tail -${last_number}
)
}
handle_last_unfinished()
{
if test -e ${tmpdir}/unfinished_backup/*; then
echo
echo "WARNING: Last backup did not complete"
echo " This unfinished backup is now removed"
echo
chmod -R u+w "${tmpdir}/unfinished_backup"
rm -rf "${tmpdir}/unfinished_backup"
fi
}
make_hard-link_copy_of_last_backup()
{
last_backup=$(find ${backup_destination} -maxdepth 4 -mindepth 4 | sort | tail -1)
if test x${last_backup} != x ; then
echo "Making a hard-link replication of the last backup"
echo " (${last_backup})"
if test "x${dryrun}" == "xno"; then
mkdir -p "${tmpdir}/unfinished_backup/${this_backup}" || {
unlock_destination
exit 1
}
cp -lR "${last_backup}/"* "${tmpdir}/unfinished_backup/${this_backup}" >/dev/null 2>&1 || {
echo " WARNING: The last backup had NO files!"
echo " Or tmpdir is not on the same volume as destination"
}
fi
first="no"
else
echo "No previous backup detected, will start with an empty replication"
if test "x${dryrun}" == "xno"; then
mkdir -p "${tmpdir}/unfinished_backup/$this_backup" || {
unlock_destination
exit 1
}
fi
first="yes"
fi
}
moving_complete_backup_into_archive()
{
echo
echo "Moving the complete backup into the backup archive"
echo " ($tmpdir/unfinished_backup -> $backup_destination/)"
if test "x$dryrun" == "xno"; then
mkdir -p "$backup_destination/$this_backup" || {
unlock_destination
exit 1
}
mv "$tmpdir/unfinished_backup/$this_backup/"* "$backup_destination/$this_backup/" || {
unlock_destination
exit 1
}
fi
}
do_the_backup_exist()
{
if test -e "$backup_destination/$1"; then
echo "The backup destination already exist"
echo "This means that there is less than a minute since the last backup"
unlock_destination
exit 1
fi
}
make_backup()
{
this_backup=$(date +%Y)/$(date +%m)/$(date +%d)/$(date +%H\:%M)
do_the_backup_exist "$this_backup"
handle_last_unfinished
make_hard-link_copy_of_last_backup
# Apply changes to the hard-link copy
echo
if test "x$first" == "xyes"; then
echo "Backing up"
else
echo "Backing up by modifying the replication"
fi
for dir in ${backup_dirs} ; do
dir_wo_user=$(echo "$dir" | cut -f2- -d'@')
echo " ${dir_wo_user}"
hostname=$(echo "$dir" | cut -f2- -d'@' | cut -f1 -d':')":"
userhost=$(echo "$dir" | cut -f1 -d':')
if test "x$hostname" == "x$(hostname):"; then
# remove hostname from dir to speed up local backup
dir=$(echo "$dir" | cut -f2 -d':')
fi
if test "x$dryrun" == "xno"; then
mkdir -p "$tmpdir/unfinished_backup/$this_backup/$hostname"
rsync ${rsync_options} -e "ssh -p $port" "${dir}" "${tmpdir}/unfinished_backup/$this_backup/${hostname}" 2>&1 >/tmp/bontmia_rsync_output.$$ || {
echo
echo " Caught an error doing rsync (return code $?)"
echo " The last 10 lines of output from rsync:"
tail -10 /tmp/bontmia_rsync_output.$$
echo
echo " Retrying rsync..."
rsync ${rsync_options} -e "ssh -p $port" "${dir}" "${tmpdir}/unfinished_backup/$this_backup/${hostname}" 2>&1 >/tmp/bontmia_rsync_output.$$ || {
echo
echo " Still no luck. Rsync failed with returncode $?"
echo " $dir"
echo " Please check that there is room for all the data"
echo " The last 10 lines of output from rsync:"
tail -10 /tmp/bontmia_rsync_output.$$.2
echo
echo
echo " Continuing with the next backup source"
echo
exit_status="1"
}
}
fi
done
delete_outside_sync "${tmpdir}/unfinished_backup/$this_backup/"
moving_complete_backup_into_archive
if test "x$dryrun" == "xno"; then
chmod -R u+w ${tmpdir}/unfinished_backup
rm -rf "$tmpdir/unfinished_backup"
fi
}
delete_old_backup()
{
echo
echo "Calculates which backups to save"
echo "($filter_minutes minutes, $filter_hours hours, $filter_days days, $filter_weeks weeks, $filter_monthly months, $filter_years years)"
# by saving the x last, the backups will not be deleted even if no
# new backups is created
# archives_to_save=$( (
archives_with_filter=$(
if ! test "0$filter_minutes" -ge 0 2>/dev/null; then
print_usage
else
filter_template "" "" "$filter_minutes" "-u" "minutes"
fi
if ! test "0$filter_hours" -ge 0 2>/dev/null; then
print_usage
else
filter_template "" "" "$filter_hours" "-u -k 1,1 -t :" "hours"
fi
if ! test "0$filter_days" -ge 0 2>/dev/null; then
print_usage
else
filter_template "" "" "$filter_days" "-u -k 1,3 -t /" "days"
fi
if ! test "0$filter_weeks" -ge 0 2>/dev/null; then
print_usage
else
filter_template "%Y-%V" "" "$filter_weeks" "-u -k 3 -t @" "weeks"
fi
if ! test "0$filter_monthly" -ge 0 2>/dev/null; then
print_usage
else
filter_template "%B" "" "$filter_monthly" "-u -k 1,2 -t /" "month"
fi
if ! test "0$filter_years" -ge 0 2>/dev/null; then
print_usage
else
filter_template "%Y" "" "$filter_years" "-u -k 1,1 -t /" "years"
fi
)
archives_to_save=$(echo "$archives_with_filter" | cut -f1 -d'@' | sort -u)
(
cd ${backup_destination}
for archive in */*/*/*; do
if ! echo "${archives_to_save}" | grep "^${archive}$" >/dev/null; then
echo " Removing ${backup_destination}/$archive"
if test "x$dryrun" == "xno"; then
chmod -R u+w ${archive}
rm -rf ${archive}
rmdir -p $(echo ${archive} | cut -f1-3 -d'/') 2>/dev/null
fi
else
echo -n " Saving ${backup_destination}/$archive by filters: "
filters=$(echo "$archives_with_filter" | grep "${archive}" | cut -f2 -d'@')
echo $filters
fi
done
)
}
delete_outside_sync()
{
# Since we do a copy from last sync with cp -l, we have to
# remove the extras here
echo
echo "Deletes files that should not be in the latest snapshot"
if test "x$dryrun" == "xno"; then
(
cd $1
IFS='
'
for f in $(find -mindepth 1 -depth | egrep -v "$bdirmatch"); do
if test -e "$f" ||
test -h "$f"; then
rmdir "$f" 2>/dev/null
rm -f "$f" 2>/dev/null
fi
done
unset IFS
)
fi
}
knead_source_path()
{
source="$1"
# remove trailing '/'
if echo "$source" | grep "/$" >/dev/null; then
source=$(echo "$source" | sed -e 's/\/$//')
fi
# make the path absolute
if ! echo "$source" | grep ":" >/dev/null &&
! echo "$source" | grep "^/" >/dev/null; then
if echo "$source" | grep "^./" >/dev/null; then
source=$(echo "$source" | sed -e 's/^\.\///')
fi
source="$current_dir/$source"
fi
echo "$source"
}
knead_dest_path()
{
dest="$1"
if echo "$dest" | grep ":" >/dev/null; then
# The destination can not be remote
print_usage
fi
# remove trailing '/'
if echo "$dest" | grep "/$" >/dev/null; then
dest=$(echo "$dest" | sed -e 's/\/$//')
fi
# make the path absolute
if ! echo "$dest" | grep "^/" >/dev/null; then
if echo "$dest" | grep "^./" >/dev/null; then
dest=$(echo "$dest" | sed -e 's/^\.\///')
fi
dest="$current_dir/$dest"
fi
echo "$dest"
}
check_program()
{
program="$1"
type -a "$program" >/dev/null 2>&1 } || {
echo "You need $program installed and in the path"
echo "Aborting"
unlock_destination
exit 1
}
}
check_for_programs()
{
check_program rsync
check_program host
check_program date
find /tmp -maxdepth 1 -mindepth 1 >/dev/null 2>/dev/null || {
echo "'find' on your system does not support the '--maxdepth' and"
echo "'--mindepth' options"
echo "You might not have the GNU version of 'find'"
echo "Aborting"
unlock_destination
exit 1
}
date -d 20030303 >/dev/null 2>/dev/null || {
echo "'date' on your system does not support the '--date' option"
echo "You might not have the GNU version of 'date'"
echo "Aborting"
unlock_destination
exit 1
}
check_program cp
touch /tmp/bontmia_test_cp_file.$$ || {
echo "Could not create a temporary file on /tmp"
echo "Please make sure that /tmp is writable"
echo "Aborting"
unlock_destination
exit 1
}
cp -l /tmp/bontmia_test_cp_file.$$ /tmp/bontmia_test_cp_file.$$.2 >/dev/null 2>&1 || {
echo "cp on your system does not support the -l option"
echo "You might not have the GNU version of 'cp'"
echo "Aborting"
unlock_destination
exit 1
}
rm -f /tmp/bontmia_test_cp_file.$$ /tmp/bontmia_test_cp_file.$$.2
}
#################################################################
if test "x$*" == x; then
print_usage
fi
current_dir=$(pwd)
filter_minutes="3"
filter_hours="24"
filter_days="7"
filter_weeks="4"
filter_monthly="12"
filter_years="0"
bwlimit=""
backup_dirs=""
exit_status="0"
do_del_old="no"
port="22"
compression=""
rotation=""
dryrun="no"
while test "x$*" != x; do
case "$1" in
( "--dryrun" )
shift
dryrun="yes";;
( "--rotation" )
shift
rotation=$(echo "$1" | perl -e '$_=<STDIN>; m/^([0-9]*)minutes([0-9]*)hours([0-9]*)days([0-9]*)weeks([0-9]*)month([0-9]*)years$/ || exit 1;
print "minutes:$1\n";
print "hours:$2\n";
print "days:$3\n";
print "weeks:$4\n";
print "month:$5\n";
print "years:$6\n";
') || {
echo "the option to rotation were not right!"
print_usage
}
filter_minutes=$(echo "$rotation" | grep minutes | cut -f2 -d':')
filter_hours=$(echo "$rotation" | grep hours | cut -f2 -d':')
filter_days=$(echo "$rotation" | grep days | cut -f2 -d':')
filter_weeks=$(echo "$rotation" | grep weeks | cut -f2 -d':')
filter_monthly=$(echo "$rotation" | grep month | cut -f2 -d':')
filter_years=$(echo "$rotation" | grep years | cut -f2 -d':')
do_del_old="yes";
shift;;
( "--port" )
shift
port="$1"
shift;;
( "--bwlimit" )
shift
bwlimit="--bwlimit=$1"
shift;;
( "--temp" )
shift
tmpdir="$1"
shift;;
( "--compression" )
compression="-z"
shift;;
( "--dest" )
shift;
backup_destination="$(knead_dest_path "$1")";
if ! test -d "$1"; then
echo "Destination dir $backup_destination does not exist or is not a directory"
exit 1
fi
shift;;
( [^-]* ) # The rest is sources
if test "x$1" == "x"; then
echo "Missing source directories"
exit 1
fi
backup_dirs=""
while test "x$*" != x; do
dir="$1"
if ! echo "$dir" | grep ":" >/dev/null; then
remotehost="$(hostname)"
dir="$remotehost:$(knead_source_path "$dir")"
else
backuppath="$(echo "$dir" | cut -f2- -d':')"
if echo "$dir" | grep "@" >/dev/null; then
remotehost=$(echo "$dir" | cut -f1 -d':' | cut -f2 -d'@')
remoteuser=$(echo "$dir" | cut -f1 -d':' | cut -f1 -d'@')
else
remotehost=$(echo "$dir" | cut -f1 -d':')
remoteuser="$(whoami)"
fi
#remotehost=$(full_name $remotehost)
if echo "$dir" | cut -f2- -d':' | grep "^/" >/dev/null; then
abolutepart=""
else
absolutepart=$(ssh $remotehost pwd)"/"
fi
dir="$remoteuser@$remotehost:$absolutepart$backuppath"
fi
backup_dirs="$backup_dirs $(knead_source_path "$dir")"
shift
done;;
( * )
print_usage
exit 1;;
esac
done
check_for_programs
tmpdir=${tmpdir:-"$backup_destination"}
rsync_options="-azv -T $tmpdir --force --relative --hard-links --delete $bwlimit" # --stats"
# to speed up checking for files outside the backup areas
bdirmatch=$(
first="yes"
echo -n "^("
for d in $backup_dirs; do
dir="$d"
if test "$first" == "yes"; then
if echo "$dir" | grep ":" >/dev/null; then
dir_wo_user=$(echo "$dir" | cut -f2- -d'@')
echo -n "./$dir_wo_user/"
else
echo -n ".$dir/"
fi
first="no"
else
if echo "$dir" | grep ":" >/dev/null; then
dir_wo_user=$(echo "$dir" | cut -f2- -d'@')
echo -n "|./$dir_wo_user/"
else
echo -n "|.$dir/"
fi
fi
done
echo -n ")"
)
echo "bdirmatch: $bdirmatch"
lock_destination
if test "x$backup_dirs" != x; then
make_backup
fi
if test "x$do_del_old" == xyes; then
delete_old_backup
fi
unlock_destination
exit $exit_status