#!/usr/bin/perl

#do NOT run this program setuid. It would have the power to copy
#basically any file with any permissions.

=head1 NAME

d2dbackup (1) -- disk to disk backup system

=head1 SYNOPSIS

d2dbackup {options} { -s | <sourcedir> }

=head1 OPTIONS

 -s                    read list of files to backup from stdin
 <sourcedir>           recursively back up that directory

 -g|--garbagecollect   Look through all backup disks and enforce
                         that all files obey current retention
                         policies for specified sourcedir

 --restore             Restore a directory recursively
 --verify              Check each last version against live file for diff
 --rdeleted            When restoring, restore deleted files too
                         (otherwise they are skipped).
 --clobber             Overwrite files found when restoring

 --report              print a report on disk usage instead of running
 --profile             show performance profiling information
 -n|-l                 list files that would be backed up; do not
                          actually do anything
 -d=<level>|--debug <level>  set debugging output to specified level.
                          see HOWTO.debug_levels
 -v|--verbose          verbose
 -q|--quiet            no output will be printed to STDOUT, instead mail
                          will be sent to ERRORMAIL, but only on errors
 -h|--help             print this help message

 -c|--config  <config file>   specify config file
 -f|--fsconfig <config file>  specify source-fs-specific config file

 --bdiskstem '/disk'          fixed portion of path to bdisks,
                                such that unix glob "/disk*" covers
                                all and only bdisks.

 --preference-factor <bdisk>:<integer>
                               set the preference factor for that
                                  BDisk. This option may be repeated
                                  for multiple BDisks.
 --blocksize 1024              specify size of df blocks
 --mincompress 4096            smaller than this wont be compressed
 --diskcheckparam .001         see full docs
 --maxdiskcheckcount 200       see full docs
 --maxversions 60              max number of versions to save of a file
 --maxsize 900100100           max total size of all versions of a file
                                  (see also full docs)
 --maxfilesize 5000000000      max size of a file to be backed up at all
 --eightbitfilenames           include files with names that have eight-bit
                                  characters in them (default is on)
 --excludedisks 2              number of consecutive versions required
                                   to be on disparate target disks
 --errormail root@your.dom     email address for errors
 --logfile d2dbackup.log       log file
 --include <regexp>            perl regular expressions to include or exclude
 --exclude <regexp>              matching files from backup  
 --dirinclude <regexp>         perl regular expressions to include or exclude
 --direxclude <regexp>           matching directories 
 --mapsrc2backup <sourcepath>:<backuppath>
                    add a mapping of source path to backup path.
                       Multiple such args are allowed on the
                       command line. Order matters.
 --nochecklastv     Do not bother to compare the modification time
                       of the last backup version of each file to
                       the mod time of the source.
                       see L<HOWTO.improve_performance>
 --diffremove       Cycle through the list of versions. Remove
                       the earlier of pairs of versions that
                       are identical.
 --slow 0.5         Sleep this number of seconds before processing each
                       directory in the source, to avoid overloading the
                       source system with IO requests
 -t|--sinceepoch <epoch time>     use this timestamp as the date
                                     after which to back up, rather than
                                     the date of the timestamp read file
 --timestampreadfile <filename>   file whose mod time is used as the
                                     date after which to back up
 --timestampwritefile <filename>  file to touch at the beginning
                                     of the backup run
 --timestampfile <filename>       sets both ...read and ...write
                                     to this value
 -A|--stampreadAtime              use the Access time rather than
                                    Modification time of the
                                    timestamp read file
                                    see L<HOWTO.snapshots>
 -M|--stampreadMtime (default)    use the Modification time of the
                                    timestamp read file

=head1 DESCRIPTION

This backup system operates by copying (compressed versions of) files
from a live or source directory into several target directories, each
of which is assumed to live on a different cheap disk drive. Each time
a backup is done, any files modified since the time of the last backup
are copied again, onto new drives if possible. As new versions are
written to the backup drives, old versions of those files are deleted,
according to configuration parameters for the max number of versions
stored for a given file or the max amount of file space the backups of
any single file may take up. These parameters may vary adaptively and
dynamically as the total amount of free space in the backup drive pool
varies.

See separate documentation in L<README.pod> for complete information.

=head1 CONFIGURATION

d2dbackup looks for its configuration file, d2dbackup.conf, in the following places, in order:

 /etc/d2dbackup.conf
 /usr/local/etc/d2dbackup.conf
 ./d2dbackup.conf

All other paths and variables are set in the configuration file.

If no source-filesystem-specific configuration file is specified with
the -f option on the command line, but a source file system is
specified, d2dbackup will look in the top level of the source file
system for a file named ".d2dbackup.fs.conf", and if found use that
for the source-filesystem-specific configuration.

=head1 SEE ALSO

d2dbackup.conf(5)

=head1 AUTHOR

Alex Aminoff, alex_aminoff@alum.mit.edu

=head1 COPYRIGHT

Copyright 2002-2004, shared by the National Bureau of Economic Research
and Alexander Aminoff

Permission is granted to copy, modify, and use this software 
under the Gnu General Public License, found in the file LICENSE.

=cut

sub usage {
    my $msg = shift;
    
    # This prints the perl pod docs from SYNOPSIS to DESCRIPTION.
    # It is ugly but it works.
    
    my $perlcmd = q[perl -ne '/SYNOPSIS/ and $show=1; /DESCRIPTION/ and exit 0; /^\s*$/ and next; $show and print $_;'];
    system("perldoc -t $0 | $perlcmd");
    die $msg;
}

%preference_factor = ();

# Not really configuration variables as such, just programmer-convenience
# globals, like #DEFINEs:
#$VERSIONGLOB = '\;*'; # shell pattern to pick up version number
#$VERSIONPAT = ';(\d+)'; # regexp pattern to pick up version number
#$VEXT = '\.gz|\.dirinfo|\.deleted'; # pattern for legal extensions on versions

# Defaults for configuration variables
$MINCOMPRESS = 4096; # size in bytes below which its not worth compressing
$NOCOMPRESSPAT = '\.gz|\.zip';
$DISKCHECKPARAM = .001; # Re-df the disk when accumulated writes gets around this factor of the remaining free space
$MAXDISKCHECKCOUNT = 200; # max number of disk writes/deletes before re-dfing

$MAXVERSIONS = 30; # max number of versions for a single file
$megabyte = 1024 * 1024;
$MAXSIZE = 2000 * $megabyte; # max size of all vs of a file on empty disk
$MAXFILESIZE = undef; # max size of a file to back up at all
$OLDTIME = 0;
$EXCLUDEDISKS = 1;
$LEAVEFREE = 4 * $megabyte;  # space to leave free on disk when its very full
$LOCKDIR = "/tmp";
$ERRORMAIL = 'root@localhost';
$LOGFILE = '/var/log/d2dbackup.log';
@DAYS_BUCKETS = (7,30,365); # buckets to count up files ages into
$NODIRS = 0;
$NOLOCKFILE=0; # use lock files
$PROFILE=0; # do not print a profiling report
$EIGHTBITFILENAMES=1; # skip nothing

# Initializing globals
@DIR_KEEP_RE = ();
@PRE_RE = ();
@POST_RE = ();
@FS_RE = ();
@SRC2BACKUP_SUBST = ();

$SOURCE = "";

# END CONFIG

use Data::Dumper;
use Sys::Hostname;
use Getopt::Mixed "nextOption";
use File::Find 1.04; # NOTE: only recent ver of File::Find allows preprocess
use Benchmark;
use Time::HiRes qw(gettimeofday tv_interval);
use File::Compare;
use Fcntl ':mode';

use NBER::D2Dbackup::BFile;
use NBER::D2Dbackup::BDisk;
use NBER::D2Dbackup::BDisks;
use NBER::D2Dbackup::Stoplist;

$|=1; #pipes are hot

my $config;
if ( -r "/etc/d2dbackup.conf" ) {
    $config = "/etc/d2dbackup.conf";
} elsif ( -r "/usr/local/etc/d2dbackup.conf" ) {
    $config = "/usr/local/etc/d2dbackup.conf";
} elsif ( -r "./d2dbackup.conf" ) {
    $config = "./d2dbackup.conf";
} else {
    # no default config file
    warn "No default config file found in /etc /usr/local/etc or current directory. Unless one is supplied to --config, d2dbackup will die.";
}

@pre_re_cmdline = ();          # this is an array because order matters
@dir_keep_re_cmdline = ();          # this is an array because order matters
@mapsrc2backup_cmdline=(); # this is an array because order matters
@errors=();
%preference_factor_cmdline=();
%opts=();

Getopt::Mixed::init("c=s config>c f=s altconfig>f fsconfig>f s
d=i debug>d v verbose>v q quiet>q
bdiskstem=s blocksize=i mincompress=i
preference-factor=s
diskcheckparam=f maxdiskcheckcount=i maxversions=i maxsize=i
maxfilesize=i
eightbitfilenames
errormail=s mapsrc2backup=s
logfile=s
t=i sinceepoch>t sincepoch>t 
timestampfile=s
timestampreadfile=s
timestampwritefile=s
excludedisks=i
include=s
exclude=s
dirinclude=s
direxclude=s
A stampreadAtime>A
M stampreadMtime>M
g garbagecollect>g
slow=s
restore rdeleted clobber
verify
nochecklastv diffremove report profile l n>l h help>h");

while (my($option, $value, $pretty) = nextOption()) {
    $option eq "d" and $debug = $value || 1, next;
    $option eq "h" and usage("help requested");
    $option eq "v" and $verbose=1, next;
    $option eq "q" and $quiet=1, next;
    if($option eq "c") {
	$config = $value;
	-r $config or die("cant read specified config file: $config");
	next;
    }
    $option eq "f" and $sourceconfig = $value, next;
    if($option eq "preference-factor") {
	my($disk,$num)=split(/:/,$value); # NOTE this may break under DOS/WINDOWS where a disk could be a drive letter
	$preference_factor_cmdline{$disk}=$num;
	next;
    }
    if ($option =~ /^(dir)?(include|exclude)$/ ) {
      logmsg(" processing option $option $value") if $debug == 4;
	my $keep;
	if ($2 eq "include") {
	    $keep = 1;
	} else {
	    $keep = 0;
	}
	if ($1 eq "dir") {
	    push (@dir_keep_re_cmdline, [ $value, $keep ]);
	} else {
	    push (@pre_re_cmdline, [ $value, $keep ]);
	}
    }
    if($option eq "mapsrc2backup") {
	push(@mapsrc2backup_cmdline,$value);
	next;
    }
    $opts{$option}=$value || 1;
}
 Getopt::Mixed::cleanup();

require($config);
if (! $sourceconfig && $SOURCE) {
    $sourceconfig = $SOURCE . "/.d2dbackup.fs.conf";
    -r $sourceconfig or $sourceconfig = undef;
}
if ($sourceconfig) {
    -r $sourceconfig or die("cant read source specific config file: $sourceconfig");
    require($sourceconfig);
}

$LOGFILE = $opts{logfile} || $LOGFILE;
openlog();

$SOURCE = $ARGV[0] if $ARGV[0];

logmsg("debug level: $debug") if $debug;
@ARGV > 1 and usage("can only archive one source file system at a time");

opt2configvar('bdiskstem','blocksize','mincompress',
	      'restore','verify','clobber','rdeleted',
	      'diskcheckparam','maxdiskcheckcount','maxversions','maxsize',
	      'maxfilesize','eightbitfilenames',
	      'errormail','timestampreadfile','timestampwritefile',
	      'nochecklastv','diffremove','source','excludedisks','profile'
	      );

$BDISKS = NBER::D2Dbackup::BDisks->new('preferences' => \%preference_factor); # will pick up stem etc from globals

$OLDTIME = $opts{'t'} || 0;
if ( $opts{'timestampfile'} ) {
    ($opts{'timestampreadfile'} || $opts{'timestampwritefile'}) and
	usage("cannot specify both --timestampfile and --timestampread/writefile");
    $TIMESTAMPREADFILE = $opts{'timestampfile'};
    $TIMESTAMPWRITEFILE = $opts{'timestampfile'};
}
my $srcforstamp = $SOURCE;
$srcforstamp =~ s!/!_!g;
$TIMESTAMPWRITEFILE =~ s/=SOURCE=/$srcforstamp/g;
logmsg( "TIMESTAMPREADFILE: $TIMESTAMPREADFILE") if $debug == 3;
logmsg( "TIMESTAMPWRITEFILE: $TIMESTAMPWRITEFILE") if $debug == 3;

if($opts{'A'}) {
    $opts{'M'} and usage('cannot specify both -A and -M (Atime/Mtime for read timestamp)');
    $opts{'t'} and usage('nonsensical to specify both -t and -A');
    $WHICHTIME="A";
} elsif($opts{'M'}) {
    $opts{'t'} and usage('nonsensical to specify both -t and -M');
    $WHICHTIME="M";
}
logmsg("WHICHTIME: $WHICHTIME") if $debug == 3;

foreach my $bdisk (keys %preference_factor_cmdline) {
    $preference_factor{$bdisk}=$preference_factor_cmdline{$bdisk};
}

$BDISKS->init();

if ($debug == 1) {
    # list bdisks
    my $msg = "bdisks: " . join(", ",
				map { $_->{name} }
				@{ $BDISKS->{bdisks} }
				);
    logmsg( $msg );
}

$LIST=$opts{'l'};
if ($opts{'report'}) {
    print diskreport();
    exit 0;
}
$SLOW=$opts{'slow'};

$WHAT = "Backup"; # Whether we are doing backup or garbaga collection
if ($opts{'g'}) {
    $GARBAGECOLLECT = 1;
    $WHAT = "Garbage Collection";
    $opts{'t'} and die "Nonsensical to specify a timestamp with garbage collection";
    $NOCHECKLASTV and die "Nonsensical to check last version against source which may not exist with garbage collection";
    $RESTORE and die "Nonsensical to specify both --garbagecollect and --restore";
}
if ($RESTORE) {
    $WHAT = "Restore";
} else {
    $CLOBBER and die "Nonsensical to specify --clobber unless you are restoring";
    $RDELETED and die "Nonsensical to specify --rdeleted unless you are restoring";
}
if ($VERIFY) {
    $WHAT = "Verify";
}

$SOURCE or usage("must specify one source file system either on the command line or in a config file");

logmsg("d2dbackup doing $WHAT on $SOURCE");

# reverse is becase mapsrc2backup with priority 1 uses unshift, so they
# would get entered backwards
foreach my $opt (reverse @mapsrc2backup_cmdline) {
    my($src,$backup)=split(/:/,$opt); # NOTE this may break under DOS/WINDOWS where a disk could be a drive letter with a colon
    mapsrc2backup($src,$backup,1);
}

sub mapsrc2backup {
    my($src,$backup,$priority) = @_;
    $src or die "mapsrc2backup: src arg missing";
    $backup or die "mapsrc2backup: backup arg missing";
    my $aryref = [ $src,$backup ];
    if ($priority) {
	# put it first
	unshift(@SRC2BACKUP_SUBST,$aryref);
    } else {
	push(@SRC2BACKUP_SUBST,$aryref);
    }
    $aryref;
}

# we replace each regexp by its compiled version
# replacement is done in place as a side-effect, nothing is returned.

my $stoplist = NBER::D2Dbackup::Stoplist->new
    ('FILE_RE' => [ @PRE_RE, @pre_re_cmdline ] ,
     'DIR_RE'  => [ @DIR_KEEP_RE, @dir_keep_re_cmdline ] ,
     debug => $debug == 3 ? 1 : 0,
     );

$stoplist->init();

%processed = ();
%deleted = ();
$starttime=time();
$lastcheckpoint=0;
$lockfile="";
$now = 1;
##$now = time(); # by system clock
$topdirnow = 1; # time by top directory max

# global variables for statistics collection:
$filecount = 0;
$errcount = 0;
$copycount = 0;
$copytime = 0;
$delcount = 0;
$copybytes = 0;
$curfile = "";
$curtarget = "";
%starttime = ();
%totaltime = ();
$bfile_count=0;
$maxsize_limited_count=0;
$maxvers_limited_count=0;
$versions_count=0;
$oneversion_notmaxsize_count=0;
$nosrc_count=0;
%older_count = ();

$SIG{INT} = \&interrupt_handler;

if ($opts{'s'}) {
    # read in files to be backed up from STDIN
    while(<>) {

	chomp($_);
	next if /^\s*$/; # blank line
	next if /^\s*#/; # comment
	-d $_ or dofile($_);
    }
} else {
    $RESTORE or -d $SOURCE or usage "Argument is not a directory: $SOURCE";
    
    unless($NOLOCKFILE) {
	$lockfile = $SOURCE;
	$lockfile =~ s|/|_|g;
	$lockfile .= ".garbagecollect" if $GARBAGECOLLECT;
	$lockfile = $LOCKDIR . "/" . $lockfile . ".lock";
	-e $lockfile and error("lock file found: $lockfile. A previous run of d2dbackup on directory $SOURCE probably failed or is still running. Aborting");
	system("touch $lockfile");
    }
    
    my @source_lstat = lstat($SOURCE);
    maximize_now( $source_lstat[8] );
    maximize_now( $source_lstat[9] );
    maximize_now( $source_lstat[10] );
    $topdirnow = $now;
    
    my @finddirs;
    if ($GARBAGECOLLECT || $RESTORE) {
	# Search through directory corresponding to $SOURCE on this host
	# on all backup disks
	my $globpat = $BDISKS->{glob} . NBER::D2Dbackup::BFile::src2backup($SOURCE);
	@finddirs = glob($globpat);
	@finddirs or error("No backup directories found for garbagecollect or restore for $SOURCE. Aborting.");
	logmsg("$WHAT on backups for $SOURCE");
    } else {
	# back up the source directory
	@finddirs = ($SOURCE);

	if($TIMESTAMPREADFILE) {
	    # Read the time from the timestamp read file
	    my $f = $TIMESTAMPREADFILE;
	    $f = $SOURCE . '/' . $f if $f !~ m|^/|;
	    my @lstat = lstat($f);
	    scalar(@lstat) or error("Could not stat read time stamp file $f");
	    if( $WHICHTIME eq "M") {
		$OLDTIME=$lstat[10];
	    } elsif( $WHICHTIME eq "A") {
		$OLDTIME=$lstat[8];
	    } else {
		error("WHICHTIME must be set to A or M");
	    }
	} # else OLDTIME was already set

	if($TIMESTAMPWRITEFILE) {
	    my $f = $TIMESTAMPWRITEFILE;
	    $f or error("no time stamp file set for writing");
	    $f = $SOURCE . '/' . $f if $f !~ m|^/|;
	    system("touch $f") == 0 or error("Could not update write stamp file $f");
	    if ( $f =~ m|$SOURCE| ) {
		# only use timestamps on the source file system to avoid
		# dangerous clock skew
		maximize_now( (lstat($f))[9] );   
	    }
	}
	logmsg("Backing up sourcedir $SOURCE since time $OLDTIME");
    }
    
    find({ wanted => \&wanted,
	   preprocess => \&preprocess,
	   no_chdir => 1 },
	 @finddirs);

    unlink $lockfile;

    endsummary("completed");
    exit 0; # always exit 0 if we got this far. exiting with a non-0
    # return code is reserved for when we get fatal errors.
}

sub preprocess {
    my(@dirlist)=@_;
    startprof('preprocess');
    my @return = ();
    for(@dirlist) {
	# NOTE: we have not done a stat on the items in this dir yet.
	# so we dont know if the things are a dir or a file.
	my $skip = 0;
	if ( $_ eq '.' || $_ eq '..' ) {
	    $skip = 1;
	} else {
	    unless( $GARBAGECOLLECT || $RESTORE ) {
		my $fn = join("/", $File::Find::dir, $_);
		if ( ! $EIGHTBITFILENAMES && /[^\000-\177]/ ) {
		    error("  File name has eight bit character: $fn",1);
		    $skip = 1;
		} elsif (! $stoplist->dir_keep_check($fn) ) {
		    $skip = 1;
		    logmsg("  PRUNING $fn") if $verbose || $debug == 4;
		}
	    }
	}
	unless($skip) {
	    push (@return,$_);
	}
    }
    if ($SLOW) {
	startprof('delay due to SLOW');

	if ($SLOW < 1) {
	    sleep 1 if rand(1) < $SLOW;
	} else {
	    sleep $SLOW;
	}
	endprof('delay due to SLOW');
    }

    endprof('preprocess');
    @return;
}

sub wanted {
    startprof('wanted');

    if ( $GARBAGECOLLECT && $deleted{$_} ) {
	# we deleted this version previously
	endprof('wanted');
	return(0);
    }
    
    $filecount++;
    
    if ( $verbose && time() > $lastcheckpoint + 300 ) {
	# Log a time checkpoint message with the current file
	logmsg("checkpoint: ", scalar(localtime()), " ", $_);
	$lastcheckpoint=time();
    }
    startprof('wanted.includecheck');
    if (! $GARBAGECOLLECT && ! $stoplist->includecheck($_) ) {
	logmsg( " $_ skipped by configured exclude regexp")
	    if $debug == 4;
	endprof('wanted.includecheck','wanted');
	return(0);
    }
    endprof('wanted.includecheck');

    startprof('wanted.stat');
    # THIS SHOULD BE THE ONE AND ONLY STAT EVER!
    my @lstat = lstat( $_ );
    endprof('wanted.stat');
    unless(scalar(@lstat)) {
	my $info = "";
	if ( -e $_ ) {
	    $info .= " file exists";
	} else {
	    $info .= " file does not exist";
	}
	if ( -l $_ ) {
	    $info .= " is a link";
	} else {
	    $info .= " is not a link";
	}
	if ( $deleted{$_} ) {
	    $info .= " was previously deleted by this run of d2dbackup ";
	}
	error("wanted: Could not stat ($info) $_",1);
	endprof('wanted');
	return(0);
    }
    my $isdir = 0;
    my $mode = $lstat[2];
    if ( S_ISDIR($mode) ) {
	$isdir = 1;
    }

    unless ( $NODIRS && $isdir ) {
	# when digging through backups, directories are files,
	# so skip actual directories
	if ( ( $GARBAGECOLLECT || $RESTORE || $VERIFY ) && $isdir ) {
	    endprof('wanted');
	    return(0);
	}
	# check against date. This happens very early which is why
	# using a read timestamp saves execution time. (we hope)
	unless ( $lstat[10] > $OLDTIME )  {
	    endprof('wanted');
	    return(0);
	}
	# check against size
	if ( $MAXFILESIZE && $lstat[7] > $MAXFILESIZE ) {
	    error("File larger than max absolute file size: $_\n",1);
	    endprof('wanted');
	    return(0);
	}
	if ($LIST) {
	    print $_, "\n";
	} else {
	    dofile($_, @lstat);
	}
    }
    endprof('wanted');
}

sub dofile {
    my($file,@lstat)=@_;
    startprof('dofile');
    startprof('dofile.1');
    logmsg( $file ) if $debug == 5 || $debug == 7 || $debug == 10;
    
    my $src;

    if( $GARBAGECOLLECT || $RESTORE) {
	$src = NBER::D2Dbackup::BFile::backup2src($file);
	logmsg( " src from backup in GC: $src" ) if $debug == 5;
	# During GC skip backup files for which the source has been processed.
	# Preprocess should handle this for us but it does not
	if ( $processed{$src} ) {
	    # eliminated
	    logmsg(" $src already processed") if $debug == 4;
	    endprof('dofile.1','dofile');
	    return(0);
	}
    } else {
	$src = $file;
    }
    $curfile = $file; # global
    endprof('dofile.1');

    startprof('dofile.2');
    startprof('dofile.2.constructor');
    my $bf = NBER::D2Dbackup::BFile->new( src => $src );
    
    endprof('dofile.2.constructor');
    
    if ( ! $GARBAGECOLLECT && ! $RESTORE ) {
	# stat is what we already have always when NOT GC
	$bf->stat(@lstat);
	$bf->copyfrom($src);
	$bf->srcexists(1); # since we found not during GC the source obviously exists
	startprof('dofile.2.typeok');
	my $answer = $bf->typeok();
	endprof('dofile.2.typeok');
	if( ! $answer ) {
	    # When garbage collecting, look at all files. Even if
	    # typeok rejects the file today, at some point in the past it was
	    # backed up. So we need to garbage collect it.
	    logmsg(" SKIPPING: $src was not a plain file") if $debug == 5;
	    endprof('dofile.2');
	    endprof('dofile');
	    return;
	}
	maximize_now($lstat[9]);
    }
    endprof('dofile.2');

    if($RESTORE) {
	if ( $bf->restore() ) {
	    $copycount++;
	    $copybytes += $bf->size();
	    if( -f $src ) {
		# do NOT memoize directories, otherwise they may get
		# purged from the File::Find.
		logmsg(" restored $src, memoizing") if $debug == 5;
		$processed{ $src } = 1;
	    }
	}
	endprof('dofile');	
	return;
    }
    
    # get all versions from file glob against backup disks
    startprof('dofile.3.versions');
    my @vs = $bf->versions();
    endprof('dofile.3.versions');

    my $skip = 0;
    if ($VERIFY) {
	unless ($bf->verify) {
	    # NOT EQUAL. Danger Will Robinson.
	    my $lastv = $bf->lastversion;
	    logmsg("WARNING. Source $src and last backup $lastv differ although mod times are the same.");
	}
	endprof('dofile');
	return(1);
    }
    
    startprof('dofile.4');

    if ($GARBAGECOLLECT) {
	# create a new duplicate version only if 
	# there is only 1 version now
	# Note: there is no good way of eliminating -e here.
	# However once the extra copy has been written we will not ever
	# check -e again because of the short-circuit &&.
	my $lastv = $bf->lastversion();
	unless($lastv) {
	    error("dofile: no last version found for " . $file , 1);
	    endprof('dofile');
	    return(0);
	}
	startprof('dofile.4.copyfromstat');
	# STAT THE LAST BACKUP VERSION HERE
	my @cfstat = lstat($lastv);
	unless(scalar(@cfstat)) {
	    error("dofile: Could not stat last version $lastv",1);
	    endprof('dofile.4','dofile');
	    return(0);
	}
	endprof('dofile.4.copyfromstat');
	$bf->stat( @cfstat );

	logmsg(" last version: $lastv\n") if $debug == 5;
	if ( $bf->srcexists() ) {
	    logmsg(" skipping because the src exists under GC") if $debug == 5;
	    # Note that under garbage collection we NEVER have to
	    # stat the source. This saves time.
	    $skip = 1;
	} else { # it was deleted on the src
	    if ( NBER::D2Dbackup::BFile::visdeleted($lastv) && @vs > 1 ) {	
		logmsg(" skipping because it is deleted and already marked and we already have at least 2 versions") if $debug == 5;
		$skip = 1;
	    } else {
		# copying from last version
		$bf->copyfrom($lastv);
	    }
	}
	$processed{$src}=1;
    } elsif (! $NOCHECKLASTV) {
	startprof('dofile.4.lastvstale');
	if (! $bf->lastvstale() ) {
	    logmsg(" last backup version is up to date") if $debug == 5;
	    $skip = 1;
	}
	endprof('dofile.4.lastvstale');
    }
    endprof('dofile.4');

    startprof('dofile.5');
    if ($debug == 5) {
	if ($GARBAGECOLLECT) {
	    # size and mtime do NOT get logged under GC because we
	    # have not stated the source.
	    logmsg(" versions: ", scalar(@vs) );
	} else {
	    logmsg(" size: ",$bf->size(),
		   " mtime: ",$bf->mtime(),
		   " num versions: ", scalar(@vs)
		   );
	}
    }
    
    unless($skip) {
	startprof('dofile.5.newversion');
	$bf->newversion(); # Copy a new version
	endprof('dofile.5.newversion');
	$copycount++;
	$copybytes += $bf->size;
	logmsg(" newversion: ", $bf->newversion() ) if $debug == 5;
    }
    $bf->count() if $GARBAGECOLLECT;
    endprof('dofile.5');
    startprof('dofile.6.purgeold');
    $bf->purgeold(); # purge old versions
    endprof('dofile.6.purgeold','dofile');
}

sub opt2configvar {
    # Based on the results of Getopt::Mixed, sets global variables
    # in all caps
    my(@vars)=@_;
    foreach my $var (@vars) {
	my $vcap = $var;
	$vcap =~ tr/a-z/A-Z/;
	${$vcap} = $opts{$var} if defined( $opts{$var} );
        logmsg( "$vcap: ${$vcap}") if $debug == 3;
    }
}

sub diskreport {
    my $txt = "DISK STATUS REPORT";
    $txt .= " on " . `/bin/date`;
    $txt .= " Disks:       pref     free        total    %free\n";
    foreach my $o (@{ $BDISKS->{bdisks} }) {
	my $free = int($o->bytesfree/1000);
	my $blocks = int($o->bytestotal/1000) || 1; 
	my $foo = sprintf "%12s %4d %12d %12d %2.0f%\n", $o->{name}, $o->{preference}, $free, $blocks, int(100*$free/$blocks);
	$txt .= $foo;
    }
    $txt .= " ADAPTIVE_MAXSIZE: $BDISKS->{adaptive_maxsize}\n";
    $txt;
}

sub pp {
    my $in = shift;

    if ($in > .02) {
	int($in * 100) . '%';
    } elsif ($in == 0) {
	'0%';
    } else {
	my $smallzeros = - int( log($in) / log(10) );
	sprintf("%.${smallzeros}f%",$in * 100);
    }
}

sub gc_statistics {

    my $txt = "BACKUP STORAGE STATISTICS REPORT";
    $txt .= "\n total number of files ever backed up: " .
	$bfile_count;

    $bfile_count ||= 1;
    $txt .= "\n average number of versions: " . 
	sprintf('%.1f', $versions_count / $bfile_count );
    $txt .= "\n files limited by MAXVERSIONS: " .
	pp( $maxvers_limited_count / $bfile_count );
    $txt .= "\n files limited by MAXSIZE: " .
        pp( $maxsize_limited_count / $bfile_count );
    $txt .= "\n files deleted from source fs: " .
	pp( $nosrc_count / $bfile_count );
    $txt .= "\n files never changed since beginning of backups: " .
	pp( $oneversion_notmaxsize_count / $bfile_count );

    $txt .= "\n last backup version age in days: ";
    
    my $previous_total = $bfile_count;
    my $previous_age = 0;
    foreach my $days (@DAYS_BUCKETS) {
	my $inbucket = $previous_total - $older_count{$days};
	$previous_total = $older_count{$days};
	$txt .=  "\n   $previous_age to $days days old: " .
	    pp( $inbucket / $bfile_count );
	$previous_age = $days;
    }
    $txt .=  "\n   more than $previous_age days old: " .
	pp( $older_count{$previous_age} / $bfile_count );
    $txt .= "\n";
    $txt;
}

sub endsummary {
    my $how = shift;
    
    my $txt = "";
    $txt .= diskreport() if $verbose;
    $txt .= gc_statistics() if $GARBAGECOLLECT && $how eq "completed";

    my $totaltime = time() - $starttime;
    my $pcopytime = sprintf('%.2f',$copytime);
    my $mbytes = sprintf('%.2f',$copybytes / 1048576);

    $txt .= "$WHAT of $SOURCE $how\n";
    $txt .= " system time: ". scalar localtime(time) . "\n";
    $txt .= " latest file timestamp: ". scalar localtime($now) . "\n";
    $txt .= " files checked: $filecount\n";
    my $action = 'copied'; # GC
    $action = 'restored' if $WHAT eq 'Restore';
    $action = 'backed up' if $WHAT eq 'Backup';
    $txt .= " files $action: $copycount\n";
    $txt .= " megabytes $action: $mbytes\n";
    $txt .= " old versions deleted: $delcount\n";
    $txt .= " non-fatal errors: $errcount\n";
    $txt .= " Total time elapsed (secs): $totaltime\n";
    $txt .= " Time spent copying (secs): $pcopytime\n";
    $txt .= profile_report() if $PROFILE;

    if ( $quiet ) {
	$ERRORMAIL and @errors and mail_errors($txt,$how);
    }
    logmsg($txt);
}

sub mail_errors {
    my($lastmsg,$how) = @_;
    my $errortxt = join("\n",@errors);
    
    open(MAIL,
	 qq[| mail -s "Errors in d2dbackup run which $how" $ERRORMAIL] )
	or die "Could not run /bin/mail for write";
    print MAIL "ERRORS:\n\n";
    print MAIL $errortxt;
    print MAIL "\n\nSUMMARY:\n";
    print MAIL $lastmsg;
    close(MAIL);
}

sub startprof {
    my $starttime = [ gettimeofday ];
    for (@_) {
	$starttime{$_} = $starttime;
    }
}

sub endprof {
    my $endtime = [ gettimeofday ];
    for (@_) {
	$totaltime{$_} += tv_interval($starttime{$_},$endtime);
    }
    return 1; # so that you can say test or endprof() && return
}

sub profile_report {
    my $t = "Profile of time spent by category:\n";
    for (sort keys %totaltime) {
	$_ =~ /\w+\.\w+/ and $t .= "  ";
	$t .= $_ .  "\t" . sprintf('%.3f',$totaltime{$_}) . "\n";
    }
    $t;
}

sub maximize_now {
    my($newval)=@_;
    return if $RESTORE;
    if ($newval > $now) {
	if ( $curfile && $newval > $topdirnow + 3600 * 24 * 3 ) {
#We used to have this error message in here, but we got complains of
# lots of error messages. In the end it is not
# d2dbackup's job to tell you that your time stamps are messed up.
# Its sort of like "dontblamesendmail".
#	    error( "the source or a backup version of $curfile has date more than 3 days in the future relative to top level directory. Not updating \$now.",1);
	    return;
	}
	$now = $newval;
	#logmsg( "\$now updated to $now\n" ) if $debug;
    }
}

sub interrupt_handler {
    $SIG{INT}='DEFAULT';
    logmsg("TERMINATED by interrupt signal");
    print "\n\n\n\nInterrupt signal caught in d2dbackup\n";
    unlink($curtarget) if $curtarget; # get rid of last version which we
    # killed in the process of writing
    if ( -f $lockfile && -t STDIN ) {
	print "Lock file $lockfile found. Delete? [n]:";
	my $ans = <STDIN>;
	$ans =~ /^y/i and unlink($lockfile) or die "Could not delete $lockfile";
    }
    endsummary("ABORTED");
    exit 1;
}

=head1 FUNCTIONS

These functions appear in the main d2dbackup script and are documented
here due to their importance and criticality.

=over

=item function error(<message>,<warningflag>)

Die with an error message. Might send email instead or in addition.
warningflag, if set, causes function to return rather than die.

=cut

sub error {
    my($msg,$warning) = @_;
    if ($warning) {
	$msg = "Warning: $msg";
    } else {
	$msg = "FATAL ERROR: $msg";
    }
    logmsg($msg);
    push(@errors,$msg);
    if ($warning) {
	$errcount++;
    } else {
	unlink $lockfile;
	endsummary('DIED');
	die $msg;
    }
    1;
}

sub logmsg {
    my($sec,$min,$hr,$mday,$mon)=localtime;
    my $month = $mon + 1;
    my $time = sprintf("%02d:%02d:%02d",$hr,$min,$sec);
    my $gc = "gc" if $GARBAGECOLLECT;
    my $ds = "$month-$mday $time [$$]$gc";
    print LOG $ds, ": ", @_, "\n";
    print STDOUT @_, "\n" unless $quiet;
}

sub openlog {
    if ( $LOGFILE =~ /^syslog/ ) {
	die "syslog logging not yet supported. Please specify a log file";
    } else {
	open(LOG,">>$LOGFILE") or die "could not open log file $LOGFILE for append";
	$SIG{'__WARN__'} = sub { error($_[0],1); };
	$SIG{'__DIE__'}  = sub { error($_[0],0); };
    }
}

