#!/bin/bash # # bontmia (Backup Over Network To Multiple Incremental Archives) # # This was written to get the functionality of glastree (Jeremy Wohl) # available to use towards remote hosts and having a selective long # term storage. # # Written by John Enok Vollestad in April 2003 and have later # undergone some bugfixes and enhancements. print_usage() { cat <<EOF | ${PAGER:-more} NAME Bontmia - Backup Over Network To Multiple Incremental Archives Version 0.14 SYNOPSIS bontmia --dest <dest. dir> [options] [source dir [source dir ...]] DESCRIPTION Bontmia is a network-based backup tool that saves configurable numbers of last month, week, day, hour, and minute backups. Each backup is a complete snapshot of the original directories. Only new and changed files are copied over the network when generating a snapshot. Remote access is implemented securely using ssh. Unchanged files are stored as hard links in the archive and therefore takes virtually no space. The backups is stored in a directory structure in the format YYYY/MM/DD/HH:MM. Each directory contains a snapshot of the backed up directories. This is stored incrementally by letting every file not changed between backups be a hard link to the same file in the previous backup. The actual copying is done with rsync. To avoid typing inn a password you do as usual with ssh by generating a public key on the host where the backup is stored, read the manual page for ssh, and adding this to the list of authorized hosts on the remote computer, read the manual page for sshd. Which backup to store for how long is configurable, see below. The return value is 0 on success and 1 if backing up one of the sources failed. The destination can not be remote. If you want to place the backup on a remote server, then run Bontmia as a command with ssh like "ssh user@host.domain bontmia <options>". Use at your own risk. ARGUMENTS --dest <dir> Sets the destination directories where the backups is placed. WARNING! existing files in this directory will be DELETED! OPTIONS --rotation <spec> Specifies the number of backups in each category to save. The 'spec' is of the form [0-9]*minutes[0-9]*hours[0-9]*days[0-9]*weeks[0-9]*month[0-9]*years so 0minutes0hours7days4weeks12month2years is an example for daily backup. Bontmia should be run in cron or manually the numbers given here only specifies how many backups to save. The different categories (minutes, hours ...) is just overlapping filters. More than one filter might save a backup and a backup is removed when no filter saves the backup any more. The number of days given here will apply like this: If the number of days to save is 3 then the last backup within each of the last 3 days when backup were done willl be saved. This means that if you do backup like this: day: 0 1 2 3 4 5 backup: y - y - y y Where y means a backup were done and - means it were not then after the backup done on day 5 is done, then the backups on day 2, 4 and 5 is saved when the last 3 day backups is saved. Use the dryrun option below to experiment and learn more about this option. --dryrun When this option is used, the backup is not performed, but the backups to be removed is listed. This is handy when you want to ensure that you do not remove all the precious backups when used in combination with the rotation option. --bwlimit <number> Specifies a maximum transfer rate in kilobytes per second. This option is most effective with large files (several megabytes and up). Due to the nature of rsync transfers, blocks of data are sent, then if rsync determines the transfer was too fast, it will wait before sending the next data block. The result is an average transfer rate equalling the specified limit. A value of zero specifies no limit. The default is no limit. --temp <temp dir> Specifies the directory used for temporary files during transfer. NB! This MUST be on the same disk volume as destination or incremental copy will not work. --port <number> Specifies the port number to connect to on the remote host when using ssh, which is the only option. EXAMPLES bontmia --dest ./backup --rotation \\ 5minutes0hours0days0weeks0month0years \\ foo@bar:/baz/zoot Here there is made a copy of foo@bar:/baz/zoot in the directory backup on the local host. If this command is run every minute, there is store one backup for every minute for the last 5 minutes. If it is run once every day. There is still stored the last 5 backups done at unique minutes so the last 5 backups is stored which means 5 last days. Hint: set the values for times shorter than the interval used when doing backup to 0. bontmia --dest ./backup --rotation \\ 0minutes0hours7days4weeks12month0years \\ foo@bar:/baz/zoot Here there will at the most be stored 7 + 4 + 12 backups (minus overlap between the backups). This can be a good command to run nightly. bontmia --dest ./backup --rotation \\ 0minutes24hours7days4weeks12month0years \\ foo@bar:/baz/zoot This is for running every hour. Remember that only the changes is transferred and running more often not necessarily will mean copying more data over the network. bontmia --dryrun --dest ./backup --rotation \\ 0minutes24hours7days4weeks12month0years \\ /home/bar/baz foo.no:/var/db With the dryrun option it show what would happen if the backup were run. No action on the file system done. When bontmia runs it sends output to standard output. If you do not want this you can redirect it to /dev/null. CONTACT Bontmia was written in april 2003 by John Enok Vollestad <john.enok@vollestad.no> to merge the functionality of glastree and rsync in one application with a more flexible selection of long term storage. It has later undergone some bugfixes and enhancements. http://folk.uio.no/johnen/bontmia/ EOF exit 1 } unlock_destination() { rm -f "${tmpdir}/is_running.lock" } lock_destination() { # Ensure that there is no other bontmia running against the same destination touch /tmp/bontmia_lock_file.$$ echo "no" | mv -i /tmp/bontmia_lock_file.$$ "${tmpdir}"/is_running.lock 2>&1 | \ grep "overwrite" >/dev/null 2>&1 && { echo "Bontmia is already doing a backup on this tmpdir:" echo " ${tmpdir}" echo echo " If you are sure this is false you can manually remove the lockfile:" echo " ${tmpdir}/is_running.lock" echo exit 1 } } full_name() { host="${1}"; if host ${host} | grep "domain name pointer" >/dev/null; then host ${host} | cut -f5 -d' ' | sed -e 's/\.$//'; else if host ${host} | grep "has address" >/dev/null; then full_name $(host ${host} | grep "has address" | cut -f4 -d' '); fi; fi; } filter_template() { attribute="${1}" grep_value="${2}" last_number="${3}" sort_options="${4}" template_name="${5}" if test "x${sort_options}" != x; then sort="sort ${sort_options}" else sort="cat" fi ( cd "${backup_destination}" for archive in */*/*/*; do year=$(echo ${archive} | cut -f1 -d'/') month=$(echo ${archive} | cut -f2 -d'/') day=$(echo ${archive} | cut -f3 -d'/') the_clock=$(echo ${archive} | cut -f4 -d'/') the_date=${year}-${month}-${day} date -d "${the_date}" "+%Y/%m/%d/${the_clock}@${template_name}@${attribute}" || { echo "Could not read the time of a previous backup" echo "Please email the author" unlock_destination exit 1 } done | egrep "@.*@${grep_value}" | sort -r | ${sort} | tail -${last_number} ) } handle_last_unfinished() { if test -e ${tmpdir}/unfinished_backup/*; then echo echo "WARNING: Last backup did not complete" echo " This unfinished backup is now removed" echo chmod -R u+w "${tmpdir}/unfinished_backup" rm -rf "${tmpdir}/unfinished_backup" fi } make_hard-link_copy_of_last_backup() { last_backup=$(find ${backup_destination} -maxdepth 4 -mindepth 4 | sort | tail -1) if test x${last_backup} != x ; then echo "Making a hard-link replication of the last backup" echo " (${last_backup})" if test "x${dryrun}" == "xno"; then mkdir -p "${tmpdir}/unfinished_backup/${this_backup}" || { unlock_destination exit 1 } cp -lR "${last_backup}/"* "${tmpdir}/unfinished_backup/${this_backup}" >/dev/null 2>&1 || { echo " WARNING: The last backup had NO files!" echo " Or tmpdir is not on the same volume as destination" } fi first="no" else echo "No previous backup detected, will start with an empty replication" if test "x${dryrun}" == "xno"; then mkdir -p "${tmpdir}/unfinished_backup/$this_backup" || { unlock_destination exit 1 } fi first="yes" fi } moving_complete_backup_into_archive() { echo echo "Moving the complete backup into the backup archive" echo " ($tmpdir/unfinished_backup -> $backup_destination/)" if test "x$dryrun" == "xno"; then mkdir -p "$backup_destination/$this_backup" || { unlock_destination exit 1 } mv "$tmpdir/unfinished_backup/$this_backup/"* "$backup_destination/$this_backup/" || { unlock_destination exit 1 } fi } do_the_backup_exist() { if test -e "$backup_destination/$1"; then echo "The backup destination already exist" echo "This means that there is less than a minute since the last backup" unlock_destination exit 1 fi } make_backup() { this_backup=$(date +%Y)/$(date +%m)/$(date +%d)/$(date +%H\:%M) do_the_backup_exist "$this_backup" handle_last_unfinished make_hard-link_copy_of_last_backup # Apply changes to the hard-link copy echo if test "x$first" == "xyes"; then echo "Backing up" else echo "Backing up by modifying the replication" fi for dir in ${backup_dirs} ; do dir_wo_user=$(echo "$dir" | cut -f2- -d'@') echo " ${dir_wo_user}" hostname=$(echo "$dir" | cut -f2- -d'@' | cut -f1 -d':')":" userhost=$(echo "$dir" | cut -f1 -d':') if test "x$hostname" == "x$(hostname):"; then # remove hostname from dir to speed up local backup dir=$(echo "$dir" | cut -f2 -d':') fi if test "x$dryrun" == "xno"; then mkdir -p "$tmpdir/unfinished_backup/$this_backup/$hostname" rsync ${rsync_options} -e "ssh -p $port" "${dir}" "${tmpdir}/unfinished_backup/$this_backup/${hostname}" 2>&1 >/tmp/bontmia_rsync_output.$$ || { echo echo " Caught an error doing rsync (return code $?)" echo " The last 10 lines of output from rsync:" tail -10 /tmp/bontmia_rsync_output.$$ echo echo " Retrying rsync..." rsync ${rsync_options} -e "ssh -p $port" "${dir}" "${tmpdir}/unfinished_backup/$this_backup/${hostname}" 2>&1 >/tmp/bontmia_rsync_output.$$ || { echo echo " Still no luck. Rsync failed with returncode $?" echo " $dir" echo " Please check that there is room for all the data" echo " The last 10 lines of output from rsync:" tail -10 /tmp/bontmia_rsync_output.$$.2 echo echo echo " Continuing with the next backup source" echo exit_status="1" } } fi done delete_outside_sync "${tmpdir}/unfinished_backup/$this_backup/" moving_complete_backup_into_archive if test "x$dryrun" == "xno"; then chmod -R u+w ${tmpdir}/unfinished_backup rm -rf "$tmpdir/unfinished_backup" fi } delete_old_backup() { echo echo "Calculates which backups to save" echo "($filter_minutes minutes, $filter_hours hours, $filter_days days, $filter_weeks weeks, $filter_monthly months, $filter_years years)" # by saving the x last, the backups will not be deleted even if no # new backups is created # archives_to_save=$( ( archives_with_filter=$( if ! test "0$filter_minutes" -ge 0 2>/dev/null; then print_usage else filter_template "" "" "$filter_minutes" "-u" "minutes" fi if ! test "0$filter_hours" -ge 0 2>/dev/null; then print_usage else filter_template "" "" "$filter_hours" "-u -k 1,1 -t :" "hours" fi if ! test "0$filter_days" -ge 0 2>/dev/null; then print_usage else filter_template "" "" "$filter_days" "-u -k 1,3 -t /" "days" fi if ! test "0$filter_weeks" -ge 0 2>/dev/null; then print_usage else filter_template "%Y-%V" "" "$filter_weeks" "-u -k 3 -t @" "weeks" fi if ! test "0$filter_monthly" -ge 0 2>/dev/null; then print_usage else filter_template "%B" "" "$filter_monthly" "-u -k 1,2 -t /" "month" fi if ! test "0$filter_years" -ge 0 2>/dev/null; then print_usage else filter_template "%Y" "" "$filter_years" "-u -k 1,1 -t /" "years" fi ) archives_to_save=$(echo "$archives_with_filter" | cut -f1 -d'@' | sort -u) ( cd ${backup_destination} for archive in */*/*/*; do if ! echo "${archives_to_save}" | grep "^${archive}$" >/dev/null; then echo " Removing ${backup_destination}/$archive" if test "x$dryrun" == "xno"; then chmod -R u+w ${archive} rm -rf ${archive} rmdir -p $(echo ${archive} | cut -f1-3 -d'/') 2>/dev/null fi else echo -n " Saving ${backup_destination}/$archive by filters: " filters=$(echo "$archives_with_filter" | grep "${archive}" | cut -f2 -d'@') echo $filters fi done ) } delete_outside_sync() { # Since we do a copy from last sync with cp -l, we have to # remove the extras here echo echo "Deletes files that should not be in the latest snapshot" if test "x$dryrun" == "xno"; then ( cd $1 IFS=' ' for f in $(find -mindepth 1 -depth | egrep -v "$bdirmatch"); do if test -e "$f" || test -h "$f"; then rmdir "$f" 2>/dev/null rm -f "$f" 2>/dev/null fi done unset IFS ) fi } knead_source_path() { source="$1" # remove trailing '/' if echo "$source" | grep "/$" >/dev/null; then source=$(echo "$source" | sed -e 's/\/$//') fi # make the path absolute if ! echo "$source" | grep ":" >/dev/null && ! echo "$source" | grep "^/" >/dev/null; then if echo "$source" | grep "^./" >/dev/null; then source=$(echo "$source" | sed -e 's/^\.\///') fi source="$current_dir/$source" fi echo "$source" } knead_dest_path() { dest="$1" if echo "$dest" | grep ":" >/dev/null; then # The destination can not be remote print_usage fi # remove trailing '/' if echo "$dest" | grep "/$" >/dev/null; then dest=$(echo "$dest" | sed -e 's/\/$//') fi # make the path absolute if ! echo "$dest" | grep "^/" >/dev/null; then if echo "$dest" | grep "^./" >/dev/null; then dest=$(echo "$dest" | sed -e 's/^\.\///') fi dest="$current_dir/$dest" fi echo "$dest" } check_program() { program="$1" type -a "$program" >/dev/null 2>&1 } || { echo "You need $program installed and in the path" echo "Aborting" unlock_destination exit 1 } } check_for_programs() { check_program rsync check_program host check_program date find /tmp -maxdepth 1 -mindepth 1 >/dev/null 2>/dev/null || { echo "'find' on your system does not support the '--maxdepth' and" echo "'--mindepth' options" echo "You might not have the GNU version of 'find'" echo "Aborting" unlock_destination exit 1 } date -d 20030303 >/dev/null 2>/dev/null || { echo "'date' on your system does not support the '--date' option" echo "You might not have the GNU version of 'date'" echo "Aborting" unlock_destination exit 1 } check_program cp touch /tmp/bontmia_test_cp_file.$$ || { echo "Could not create a temporary file on /tmp" echo "Please make sure that /tmp is writable" echo "Aborting" unlock_destination exit 1 } cp -l /tmp/bontmia_test_cp_file.$$ /tmp/bontmia_test_cp_file.$$.2 >/dev/null 2>&1 || { echo "cp on your system does not support the -l option" echo "You might not have the GNU version of 'cp'" echo "Aborting" unlock_destination exit 1 } rm -f /tmp/bontmia_test_cp_file.$$ /tmp/bontmia_test_cp_file.$$.2 } ################################################################# if test "x$*" == x; then print_usage fi current_dir=$(pwd) filter_minutes="3" filter_hours="24" filter_days="7" filter_weeks="4" filter_monthly="12" filter_years="0" bwlimit="" backup_dirs="" exit_status="0" do_del_old="no" port="22" compression="" rotation="" dryrun="no" while test "x$*" != x; do case "$1" in ( "--dryrun" ) shift dryrun="yes";; ( "--rotation" ) shift rotation=$(echo "$1" | perl -e '$_=<STDIN>; m/^([0-9]*)minutes([0-9]*)hours([0-9]*)days([0-9]*)weeks([0-9]*)month([0-9]*)years$/ || exit 1; print "minutes:$1\n"; print "hours:$2\n"; print "days:$3\n"; print "weeks:$4\n"; print "month:$5\n"; print "years:$6\n"; ') || { echo "the option to rotation were not right!" print_usage } filter_minutes=$(echo "$rotation" | grep minutes | cut -f2 -d':') filter_hours=$(echo "$rotation" | grep hours | cut -f2 -d':') filter_days=$(echo "$rotation" | grep days | cut -f2 -d':') filter_weeks=$(echo "$rotation" | grep weeks | cut -f2 -d':') filter_monthly=$(echo "$rotation" | grep month | cut -f2 -d':') filter_years=$(echo "$rotation" | grep years | cut -f2 -d':') do_del_old="yes"; shift;; ( "--port" ) shift port="$1" shift;; ( "--bwlimit" ) shift bwlimit="--bwlimit=$1" shift;; ( "--temp" ) shift tmpdir="$1" shift;; ( "--compression" ) compression="-z" shift;; ( "--dest" ) shift; backup_destination="$(knead_dest_path "$1")"; if ! test -d "$1"; then echo "Destination dir $backup_destination does not exist or is not a directory" exit 1 fi shift;; ( [^-]* ) # The rest is sources if test "x$1" == "x"; then echo "Missing source directories" exit 1 fi backup_dirs="" while test "x$*" != x; do dir="$1" if ! echo "$dir" | grep ":" >/dev/null; then remotehost="$(hostname)" dir="$remotehost:$(knead_source_path "$dir")" else backuppath="$(echo "$dir" | cut -f2- -d':')" if echo "$dir" | grep "@" >/dev/null; then remotehost=$(echo "$dir" | cut -f1 -d':' | cut -f2 -d'@') remoteuser=$(echo "$dir" | cut -f1 -d':' | cut -f1 -d'@') else remotehost=$(echo "$dir" | cut -f1 -d':') remoteuser="$(whoami)" fi #remotehost=$(full_name $remotehost) if echo "$dir" | cut -f2- -d':' | grep "^/" >/dev/null; then abolutepart="" else absolutepart=$(ssh $remotehost pwd)"/" fi dir="$remoteuser@$remotehost:$absolutepart$backuppath" fi backup_dirs="$backup_dirs $(knead_source_path "$dir")" shift done;; ( * ) print_usage exit 1;; esac done check_for_programs tmpdir=${tmpdir:-"$backup_destination"} rsync_options="-azv -T $tmpdir --force --relative --hard-links --delete $bwlimit" # --stats" # to speed up checking for files outside the backup areas bdirmatch=$( first="yes" echo -n "^(" for d in $backup_dirs; do dir="$d" if test "$first" == "yes"; then if echo "$dir" | grep ":" >/dev/null; then dir_wo_user=$(echo "$dir" | cut -f2- -d'@') echo -n "./$dir_wo_user/" else echo -n ".$dir/" fi first="no" else if echo "$dir" | grep ":" >/dev/null; then dir_wo_user=$(echo "$dir" | cut -f2- -d'@') echo -n "|./$dir_wo_user/" else echo -n "|.$dir/" fi fi done echo -n ")" ) echo "bdirmatch: $bdirmatch" lock_destination if test "x$backup_dirs" != x; then make_backup fi if test "x$do_del_old" == xyes; then delete_old_backup fi unlock_destination exit $exit_status