{{tag>ssh rsync backup script}}

  #!/bin/bash
  #
  # bontmia (Backup Over Network To Multiple Incremental Archives)
  #
  # This was written to get the functionality of glastree (Jeremy Wohl)
  # available to use towards remote hosts and having a selective long
  # term storage.
  #
  # Written by John Enok Vollestad in April 2003 and have later
  # undergone some bugfixes and enhancements.
  
  print_usage()
  {
      cat <<EOF | ${PAGER:-more}
  
  NAME
          Bontmia - Backup Over Network To Multiple Incremental Archives
          Version 0.14
  
  SYNOPSIS
          bontmia --dest <dest. dir> [options] [source dir [source dir ...]]
  
  DESCRIPTION
          Bontmia is a network-based backup tool that saves configurable
          numbers of last month, week, day, hour, and minute backups.
          Each backup is a complete snapshot of the original
          directories.  Only new and changed files are copied over the
          network when generating a snapshot.  Remote access is
          implemented securely using ssh.  Unchanged files are stored as
          hard links in the archive and therefore takes virtually no
          space.
  
          The backups is stored in a directory structure in the format
          YYYY/MM/DD/HH:MM.  Each directory contains a snapshot of the
          backed up directories.  This is stored incrementally by
          letting every file not changed between backups be a hard link
          to the same file in the previous backup.  The actual copying
          is done with rsync.  To avoid typing inn a password you do as
          usual with ssh by generating a public key on the host where
          the backup is stored, read the manual page for ssh, and adding
          this to the list of authorized hosts on the remote computer,
          read the manual page for sshd.
  
          Which backup to store for how long is configurable, see below.
  
          The return value is 0 on success and 1 if backing up one of
          the sources failed.
  
          The destination can not be remote.  If you want to place the
          backup on a remote server, then run Bontmia as a command with
          ssh like "ssh user@host.domain bontmia <options>".
  
          Use at your own risk.
  
  ARGUMENTS
          --dest <dir> 
                  Sets the destination directories where the backups is
                  placed.  WARNING! existing files in this directory
                  will be DELETED!
  
  
  OPTIONS
          --rotation <spec>
  
                  Specifies the number of backups in each category to
                  save.  The 'spec' is of the form
  
            [0-9]*minutes[0-9]*hours[0-9]*days[0-9]*weeks[0-9]*month[0-9]*years
  
                  so 0minutes0hours7days4weeks12month2years is an
                  example for daily backup.  Bontmia should be run in
                  cron or manually the numbers given here only specifies
                  how many backups to save.
  
                  The different categories (minutes, hours ...) is just
                  overlapping filters.  More than one filter might save
                  a backup and a backup is removed when no filter saves
                  the backup any more.
  
                  The number of days given here will apply like this:
  
                  If the number of days to save is 3 then the last
                  backup within each of the last 3 days when backup were
                  done willl be saved.  This means that if you do backup
                  like this:
  
                  day:    0 1 2 3 4 5
                  backup: y - y - y y
  
                  Where y means a backup were done and - means it were
                  not then after the backup done on day 5 is done, then
                  the backups on day 2, 4 and 5 is saved when the last 3
                  day backups is saved.  Use the dryrun option below to
                  experiment and learn more about this option.
  
          --dryrun
                  When this option is used, the backup is not performed,
                  but the backups to be removed is listed.  This is
                  handy when you want to ensure that you do not remove
                  all the precious backups when used in combination with
                  the rotation option.
  
          --bwlimit <number>
                  Specifies a maximum transfer rate in kilobytes per
                  second. This option is most effective with large files
                  (several megabytes and up). Due to the nature of rsync
                  transfers, blocks of data are sent, then if rsync
                  determines the transfer was too fast, it will wait
                  before sending the next data block. The result is an
                  average transfer rate equalling the specified limit. A
                  value of zero specifies no limit.  The default is no
                  limit.
  
          --temp <temp dir>
                  Specifies the directory used for temporary files during
                  transfer.  NB! This MUST be on the same disk volume as
                  destination or incremental copy will not work.
  
          --port <number>
                  Specifies the port number to connect to on the remote
                  host when using ssh, which is the only option.
  
  
  EXAMPLES
          bontmia --dest ./backup  --rotation \\
                  5minutes0hours0days0weeks0month0years \\
                  foo@bar:/baz/zoot
  
          Here there is made a copy of foo@bar:/baz/zoot in the
          directory backup on the local host.  If this command is run
          every minute, there is store one backup for every minute for
          the last 5 minutes.  If it is run once every day.  There is
          still stored the last 5 backups done at unique minutes so the
          last 5 backups is stored which means 5 last days.
  
          Hint: set the values for times shorter than the interval used
                when doing backup to 0.
  
          bontmia --dest ./backup  --rotation \\
                  0minutes0hours7days4weeks12month0years \\
                  foo@bar:/baz/zoot
  
          Here there will at the most be stored 7 + 4 + 12 backups
          (minus overlap between the backups).  This can be a good
          command to run nightly.
  
  
          bontmia --dest ./backup  --rotation \\
                  0minutes24hours7days4weeks12month0years \\
                  foo@bar:/baz/zoot
  
          This is for running every hour.  Remember that only the
          changes is transferred and running more often not necessarily
          will mean copying more data over the network.
  
  
          bontmia --dryrun --dest ./backup  --rotation \\
                  0minutes24hours7days4weeks12month0years \\
                  /home/bar/baz foo.no:/var/db
  
          With the dryrun option it show what would happen if the backup
          were run.  No action on the file system done.
  
  
          When bontmia runs it sends output to standard output.  If you
          do not want this you can redirect it to /dev/null.
  
  CONTACT
          Bontmia was written in april 2003 by John Enok Vollestad
          <john.enok@vollestad.no> to merge the functionality of
          glastree and rsync in one application with a more flexible
          selection of long term storage.  It has later undergone some
          bugfixes and enhancements. http://folk.uio.no/johnen/bontmia/
  
  EOF
      exit 1
  }
  
  
  unlock_destination()
  {
      rm -f "${tmpdir}/is_running.lock"
  }
  
  
  lock_destination()
  {
      # Ensure that there is no other bontmia running against the same destination
      touch /tmp/bontmia_lock_file.$$
      echo "no" | mv -i /tmp/bontmia_lock_file.$$ "${tmpdir}"/is_running.lock 2>&1 | \
    grep "overwrite" >/dev/null 2>&1 && {
  	echo "Bontmia is already doing a backup on this tmpdir:"
  	echo "    ${tmpdir}"
  	echo
  	echo "    If you are sure this is false you can manually remove the lockfile:"
  	echo "        ${tmpdir}/is_running.lock"
  	echo
  	exit 1
      }
  }
  
  
  full_name() {
      host="${1}";
      if host ${host} | grep "domain name pointer" >/dev/null; then
  	host ${host} | cut -f5 -d' ' | sed -e 's/\.$//';
      else
  	if host ${host} | grep "has address" >/dev/null; then
  	    full_name $(host ${host} | grep "has address" | cut -f4 -d' ');
  	fi;
      fi;
  }
  
  
  filter_template()
  {
      attribute="${1}"
      grep_value="${2}"
      last_number="${3}"
      sort_options="${4}"
      template_name="${5}"
  
      if test "x${sort_options}" != x; then
  	sort="sort ${sort_options}"
      else
  	sort="cat"
      fi
  
      (
  	cd "${backup_destination}"
  	for archive in */*/*/*; do
  	    year=$(echo ${archive} | cut -f1 -d'/')
  	    month=$(echo ${archive} | cut -f2 -d'/')
  	    day=$(echo ${archive} | cut -f3 -d'/')
  	    the_clock=$(echo ${archive} | cut -f4 -d'/')
  	    
  	    the_date=${year}-${month}-${day}
  	    date -d "${the_date}" "+%Y/%m/%d/${the_clock}@${template_name}@${attribute}" || {
  		echo "Could not read the time of a previous backup"
  		echo "Please email the author"
  		unlock_destination
  		exit 1
  	    }
  	done | egrep "@.*@${grep_value}" | sort -r | ${sort} | tail -${last_number}
      )
  }
  
  
  handle_last_unfinished()
  {
      if test -e ${tmpdir}/unfinished_backup/*; then
  	echo
  	echo "WARNING: Last backup did not complete"
  	echo "  This unfinished backup is now removed"
  	echo
  	chmod -R u+w "${tmpdir}/unfinished_backup"
  	rm -rf "${tmpdir}/unfinished_backup"
      fi
  }
  
  
  make_hard-link_copy_of_last_backup()
  {
      last_backup=$(find ${backup_destination} -maxdepth 4 -mindepth 4 | sort | tail -1)
      if test x${last_backup} != x ; then
  	echo "Making a hard-link replication of the last backup"
  	echo "  (${last_backup})"
  	if test "x${dryrun}" == "xno"; then
  	    mkdir -p "${tmpdir}/unfinished_backup/${this_backup}" || {
  		unlock_destination
  		exit 1
  	    }
  	    cp -lR "${last_backup}/"* "${tmpdir}/unfinished_backup/${this_backup}" >/dev/null 2>&1 || {
  		echo "    WARNING: The last backup had NO files!"
  		echo "             Or tmpdir is not on the same volume as destination"
  	    }
  	fi
  	first="no"
      else
  	echo "No previous backup detected, will start with an empty replication"
  	if test "x${dryrun}" == "xno"; then
  	    mkdir -p "${tmpdir}/unfinished_backup/$this_backup" || {
  		unlock_destination
  		exit 1
  	    }
  	fi
  	first="yes"
      fi
  }
  
  
  moving_complete_backup_into_archive()
  {
      echo
      echo "Moving the complete backup into the backup archive"
      echo "  ($tmpdir/unfinished_backup -> $backup_destination/)"
      
      if test "x$dryrun" == "xno"; then
  	mkdir -p "$backup_destination/$this_backup" || {
  	    unlock_destination
  	    exit 1
  	}
  	mv "$tmpdir/unfinished_backup/$this_backup/"* "$backup_destination/$this_backup/" || {
  	    unlock_destination
  	    exit 1
  	}
      fi
  }
  
  
  do_the_backup_exist()
  {
      if test -e "$backup_destination/$1"; then
  	echo "The backup destination already exist"
  	echo "This means that there is less than a minute since the last backup"
  	unlock_destination
  	exit 1
      fi
  }
  
  
  make_backup()
  {
      this_backup=$(date +%Y)/$(date +%m)/$(date +%d)/$(date +%H\:%M)
  
      do_the_backup_exist "$this_backup"
  
      handle_last_unfinished
      make_hard-link_copy_of_last_backup
  
      # Apply changes to the hard-link copy
      echo
      if test "x$first" == "xyes"; then
  	echo "Backing up"
      else
  	echo "Backing up by modifying the replication"
      fi
  
      for dir in ${backup_dirs} ; do
  	dir_wo_user=$(echo "$dir" | cut -f2- -d'@')
  	echo "  ${dir_wo_user}"
  	hostname=$(echo "$dir" | cut -f2- -d'@' | cut -f1 -d':')":"
  	userhost=$(echo "$dir" | cut -f1 -d':')
  
  	if test "x$hostname" == "x$(hostname):"; then
  	    # remove hostname from dir to speed up local backup
  	    dir=$(echo "$dir" | cut -f2 -d':')
  	fi
  
  	if test "x$dryrun" == "xno"; then
  	    mkdir -p "$tmpdir/unfinished_backup/$this_backup/$hostname"
  
  	    rsync ${rsync_options} -e "ssh -p $port" "${dir}" "${tmpdir}/unfinished_backup/$this_backup/${hostname}" 2>&1 >/tmp/bontmia_rsync_output.$$ || {
  		echo
  		echo "  Caught an error doing rsync (return code $?)"
  		echo "  The last 10 lines of output from rsync:"
  		tail -10 /tmp/bontmia_rsync_output.$$
  		echo
  		echo "  Retrying rsync..."
  		rsync ${rsync_options} -e "ssh -p $port" "${dir}" "${tmpdir}/unfinished_backup/$this_backup/${hostname}" 2>&1 >/tmp/bontmia_rsync_output.$$ || {
  		    echo
  		    echo "  Still no luck.  Rsync failed with returncode $?"
  		    echo "  $dir"
  		    echo "  Please check that there is room for all the data"
  		    echo "  The last 10 lines of output from rsync:"
  		    tail -10 /tmp/bontmia_rsync_output.$$.2
  		    echo
  		    echo
  		    echo "  Continuing with the next backup source"
  		    echo
  		    exit_status="1"
  		}
  	    }
  	fi
      done
      
      delete_outside_sync "${tmpdir}/unfinished_backup/$this_backup/"
      
      moving_complete_backup_into_archive
  
      if test "x$dryrun" == "xno"; then
  	chmod -R u+w ${tmpdir}/unfinished_backup
  	rm -rf "$tmpdir/unfinished_backup"
      fi
  }
  
  
  delete_old_backup()
  {
      echo
      echo "Calculates which backups to save"
      echo "($filter_minutes minutes, $filter_hours hours, $filter_days days, $filter_weeks weeks, $filter_monthly months, $filter_years years)"
  
      # by saving the x last, the backups will not be deleted even if no
      # new backups is created
  
  #    archives_to_save=$( (
  
      archives_with_filter=$(
  	    if ! test "0$filter_minutes" -ge 0 2>/dev/null; then
  		print_usage
  	    else
  		filter_template "" "" "$filter_minutes" "-u" "minutes"
  	    fi
  
  	    if ! test "0$filter_hours" -ge 0 2>/dev/null; then
  		print_usage
  	    else
  		filter_template "" "" "$filter_hours" "-u -k 1,1 -t :" "hours"
  	    fi
  
  	    if ! test "0$filter_days" -ge 0 2>/dev/null; then
  		print_usage
  	    else
  		filter_template "" "" "$filter_days" "-u -k 1,3 -t /" "days"
  	    fi
  
  	    if ! test "0$filter_weeks" -ge 0 2>/dev/null; then
  		print_usage
  	    else
  		filter_template "%Y-%V" "" "$filter_weeks" "-u -k 3 -t @" "weeks"
  	    fi
  
  	    if ! test "0$filter_monthly" -ge 0 2>/dev/null; then
  		print_usage
  	    else
  		filter_template "%B" "" "$filter_monthly" "-u -k 1,2 -t /" "month"
  	    fi
  
  	    if ! test "0$filter_years" -ge 0 2>/dev/null; then
  		print_usage
  	    else
  		filter_template "%Y" "" "$filter_years" "-u -k 1,1 -t /" "years"
  	    fi
      )
      
      archives_to_save=$(echo "$archives_with_filter" | cut -f1 -d'@' | sort -u)
  
      (
  	cd ${backup_destination}
  	for archive in */*/*/*; do
  	    if ! echo "${archives_to_save}" | grep "^${archive}$" >/dev/null; then
  		echo "  Removing ${backup_destination}/$archive"
  		if test "x$dryrun" == "xno"; then
  		    chmod -R u+w ${archive}
  		    rm -rf ${archive}
  		    rmdir -p $(echo ${archive} | cut -f1-3 -d'/') 2>/dev/null
  		fi
  	    else
  		echo -n "  Saving ${backup_destination}/$archive by filters:  "
  		filters=$(echo "$archives_with_filter" | grep "${archive}" | cut -f2 -d'@')
  		echo $filters
  	    fi
  	done
      )
  }
  
  
  delete_outside_sync()
  {
      # Since we do a copy from last sync with cp -l, we have to
      # remove the extras here
  
      echo
      echo "Deletes files that should not be in the latest snapshot"
      if test "x$dryrun" == "xno"; then
  	(
  	    cd $1
  	    IFS='
  '
  	    for f in $(find -mindepth 1 -depth | egrep -v "$bdirmatch"); do
  		if test -e "$f" ||
  		    test -h "$f"; then
  		    rmdir "$f" 2>/dev/null
  		    rm -f "$f" 2>/dev/null
  		fi
  	    done
  	    unset IFS
  	)
      fi
  }
  
  
  knead_source_path()
  {
      source="$1"
  
      # remove trailing '/'
      if echo "$source" | grep "/$" >/dev/null; then
  	source=$(echo "$source" | sed -e 's/\/$//')
      fi
      
      # make the path absolute
      if ! echo "$source" | grep ":" >/dev/null &&
  	! echo "$source" | grep "^/" >/dev/null; then
  	if echo "$source" | grep "^./" >/dev/null; then
  	    source=$(echo "$source" | sed -e 's/^\.\///')
  	fi
  	source="$current_dir/$source"
      fi
  
      echo "$source"
  }
  
  
  knead_dest_path()
  {
      dest="$1"
  
      if echo "$dest" | grep ":" >/dev/null; then
  	# The destination can not be remote
  	print_usage
      fi
  
      # remove trailing '/'
      if echo "$dest" | grep "/$" >/dev/null; then
  	dest=$(echo "$dest" | sed -e 's/\/$//')
      fi
  
      # make the path absolute
      if ! echo "$dest" | grep "^/" >/dev/null; then
  	if echo "$dest" | grep "^./" >/dev/null; then
  	    dest=$(echo "$dest" | sed -e 's/^\.\///')
  	fi
  	dest="$current_dir/$dest"
      fi
  
      echo "$dest"
  }
  
  
  check_program()
  {
      program="$1"
      type -a "$program" >/dev/null 2>&1 } || {
  	echo "You need $program installed and in the path"
  	echo "Aborting"
  	unlock_destination
  	exit 1
      }
  }
  
  
  check_for_programs()
  {
      check_program rsync
      check_program host
      check_program date
      find /tmp -maxdepth 1 -mindepth 1 >/dev/null 2>/dev/null || {
  	echo "'find' on your system does not support the '--maxdepth' and"
  	echo "'--mindepth' options"
  	echo "You might not have the GNU version of 'find'"
  	echo "Aborting"
  	unlock_destination
  	exit 1
      }
      date -d 20030303 >/dev/null 2>/dev/null || {
  	echo "'date' on your system does not support the '--date' option"
  	echo "You might not have the GNU version of 'date'"
  	echo "Aborting"
  	unlock_destination
  	exit 1
      }
      check_program cp
      touch /tmp/bontmia_test_cp_file.$$ || {
  	echo "Could not create a temporary file on /tmp"
  	echo "Please make sure that /tmp is writable"
  	echo "Aborting"
  	unlock_destination
  	exit 1
      }
      cp -l /tmp/bontmia_test_cp_file.$$ /tmp/bontmia_test_cp_file.$$.2 >/dev/null 2>&1 || {
  	echo "cp on your system does not support the -l option"
  	echo "You might not have the GNU version of 'cp'"
  	echo "Aborting"
  	unlock_destination
  	exit 1
      }
      rm -f /tmp/bontmia_test_cp_file.$$ /tmp/bontmia_test_cp_file.$$.2
  }
  
  
  #################################################################
  
  
  if test "x$*" == x; then
      print_usage
  fi
  
  current_dir=$(pwd)
  
  filter_minutes="3"
  filter_hours="24"
  filter_days="7"
  filter_weeks="4"
  filter_monthly="12"
  filter_years="0"
  bwlimit=""
  backup_dirs=""
  exit_status="0"
  do_del_old="no"
  port="22"
  compression=""
  rotation=""
  dryrun="no"
  
  while test "x$*" != x; do
      case "$1" in 
  	( "--dryrun" )
  	    shift
  	    dryrun="yes";;
  	( "--rotation" )
  	    shift
  	    rotation=$(echo "$1" | perl -e '$_=<STDIN>; m/^([0-9]*)minutes([0-9]*)hours([0-9]*)days([0-9]*)weeks([0-9]*)month([0-9]*)years$/ || exit 1;                   
  print "minutes:$1\n";
  print "hours:$2\n";
  print "days:$3\n";
  print "weeks:$4\n";
  print "month:$5\n";
  print "years:$6\n";
  ') || {
  		echo "the option to rotation were not right!"
  		print_usage
  }
  	    filter_minutes=$(echo "$rotation" | grep minutes | cut -f2 -d':')
  	    filter_hours=$(echo "$rotation" | grep hours | cut -f2 -d':')
  	    filter_days=$(echo "$rotation" | grep days | cut -f2 -d':')
  	    filter_weeks=$(echo "$rotation" | grep weeks | cut -f2 -d':')
  	    filter_monthly=$(echo "$rotation" | grep month | cut -f2 -d':')
  	    filter_years=$(echo "$rotation" | grep years | cut -f2 -d':')
  	    do_del_old="yes";
  	    shift;;
  	( "--port" )
  	    shift
  	    port="$1"
  	    shift;;
  	( "--bwlimit" )
  	    shift
  	    bwlimit="--bwlimit=$1"
  	    shift;;
  	( "--temp" )
  	    shift
  	    tmpdir="$1"
  	    shift;;
  	( "--compression" )
  	    compression="-z"
  	    shift;;
  	( "--dest" )
  	    shift;
  	    backup_destination="$(knead_dest_path "$1")";
  	    if ! test -d "$1"; then
  		echo "Destination dir $backup_destination does not exist or is not a directory"
  		exit 1
  	    fi
  	    shift;;
  	( [^-]* ) # The rest is sources
  	    if test "x$1" == "x"; then
  		echo "Missing source directories"
  		exit 1
  	    fi
  	    backup_dirs=""
  	    while test "x$*" != x; do
  		dir="$1"
  		if ! echo "$dir" | grep ":" >/dev/null; then
  		    remotehost="$(hostname)"
  		    dir="$remotehost:$(knead_source_path "$dir")"
  		else
  		    backuppath="$(echo "$dir" | cut -f2- -d':')"
  		    if echo "$dir" | grep "@" >/dev/null; then
  			remotehost=$(echo "$dir" | cut -f1 -d':' | cut -f2 -d'@')
  			remoteuser=$(echo "$dir" | cut -f1 -d':' | cut -f1 -d'@')
  		    else
  			remotehost=$(echo "$dir" | cut -f1 -d':')
  			remoteuser="$(whoami)"
  		    fi
  
  		    #remotehost=$(full_name $remotehost)
  
  		    if echo "$dir" | cut -f2- -d':' | grep "^/" >/dev/null; then
  			abolutepart=""
  		    else
  			absolutepart=$(ssh $remotehost pwd)"/"
  		    fi
  	
  		    dir="$remoteuser@$remotehost:$absolutepart$backuppath"
  		fi
  		backup_dirs="$backup_dirs $(knead_source_path "$dir")"
  		shift
  	    done;;
  	( * )
  	    print_usage
  	    exit 1;;
      esac
  done
  
  check_for_programs
  
  tmpdir=${tmpdir:-"$backup_destination"}
  rsync_options="-azv -T $tmpdir --force --relative --hard-links --delete $bwlimit" # --stats"
  
  # to speed up checking for files outside the backup areas
  bdirmatch=$(
      first="yes"
      echo -n "^("
      for d in $backup_dirs; do
  	dir="$d"
  	if test "$first" == "yes"; then
  	    if echo "$dir" | grep ":" >/dev/null; then
  		dir_wo_user=$(echo "$dir" | cut -f2- -d'@')
  		echo -n "./$dir_wo_user/"
  	    else
  		echo -n ".$dir/"
  	    fi
  	    first="no"
  	else
  	    if echo "$dir" | grep ":" >/dev/null; then
  		dir_wo_user=$(echo "$dir" | cut -f2- -d'@')
  		echo -n "|./$dir_wo_user/"
  	    else
  		echo -n "|.$dir/"
  	    fi
  	fi
      done
      echo -n ")"
  )
  
  echo "bdirmatch: $bdirmatch"
  
  lock_destination
  
  if test "x$backup_dirs" != x; then
      make_backup
  fi
  
  if test "x$do_del_old" == xyes; then
      delete_old_backup
  fi
  
  unlock_destination
  exit $exit_status
