#!/usr/bin/env perl
#### !/usr/bin/perl -T
#
# $Id:$
#
# Wrapper script for dmget, to control excessive dmgetting of files.
# Break dmgets up into groups, checking to see that a group will not
# fill the free space.
#
# Copyright CSIRO 2003-
#
# Robert C. Bell
# Deputy Manager, Bureau of Meteorology / CSIRO
# High Performance Computing and Communications Centre (HPCCC)
# Street: HPCCC 24th Floor, 150 Lonsdale Street, Melbourne Vic 3000, Australia
# Postal: HPCCC 24th Floor, GPO Box 1289K, Melbourne Vic 3001, Australia
# Phone +61 3 9669 8102, fax +61 3 9669 8112, mobile 0428 108 333
# CSIRO Mathematical and Information Sciences, Supercomputing Support Group
# e-mail: official: Robert.Bell@csiro.au
#           direct: csrcb@hpc.csiro.au
# mobile telephone: 0428 108 333   + 61 428 108 333
#
# 2003-03-06 12:37:49+11 Thu
# Initial version.
#
# 2003-03-28 10:04:14+11 Fri
#	Earlier - fix from Gareth, to avoid calling dmget with zero arguments.
#	Changed threshold, to avoid recalling more than 50% rather
#	than 90% of free space.
#
# 2003-03-28 12:46:04+11 Fri
#	Performance improvement: removed awks, and used C shell
#	arrays.  Test case down from 103s to 51 s (compared with 1.5 s
#	for native dmget.
#	Then reduced ls to one invocation.
#	 - test down to 28 s.
#
# Mon Jul 28 12:33:25 AEST 2003
#	csgjw perl version, "ls -ld" takes most non-dmget time
#
# Mon Oct 11 13:21:13 AEST 2004
#       gareth, modified for Altix/linux, dmls, df -kP, comments, debug=0,1,2
#
# 2007-10-19 09:29:31+1000 Fri
#	Gareth and Rob - altered the script to work out free space on
#	/cs/datastore rather than /home !!
#
# Fri May 16 09:30:22 AEST 2008
#	psde - syslog calls
#
# Fri Jul 18 10:46:00 AEST 2008
#	psde - set maximum number of files per group; follow symlinks
#
# Thu Aug 21 14:41:11 AEST 2008
#	psde - adapt to new output from dmls from DMF 3.7.0; sort by time
#
# Thu Sep  4 11:41:34 AEST 2008
#	psde - dmget binary is now /usr/bin/real-dmget and /usr/bin/dmget
#	is a symlink to this script.
#
# Mon Sep  8 12:46:03 AEST 2008
#	psde - don't recall REG and DUL files; code tidyup
#
# Mon Oct  6 13:49:54 AEST 2008
#	psde - don't let dmf-compat cause this script to use sgi_dmls
#
# Sat Feb  7 08:55:17 AEDT 2009
#	psde - increase the size limit for Stuart Matthews - see Req# 10441
#
# Tue Mar  3 17:24:49 AEDT 2009
#	psde - Removed Stuart's special treatment
#
# Tue May 26 16:46:39 AEST 2009
#	psde
#
#	Introduce batching recalls by tape VSN.  That is, batches
#	are now normally completed only at the transition between one tape
#	and another.  Where one file starts on one tape and ends on another,
#	other files from those tapes are recalled in succession, hoping
#	to avoid an unnecessary dismount/remount of a tape.  The
#	filesystem filling is the one case where a batch is completed
#	partway through a tape.  If the files being reloaded are small
#	and/or few, then a batch may use more than one tape.  (These
#	definitions are tuneable.)
#
#	For interactive users only, a little feedback is given to help set
#	their expectations
#
#	Fixed the existing bug where a large number of pathnames presented
#	on stdin makes it fail with "Argument list too long".  (ASCsystems#120)
#
#	Support dmget's "-h" parameter (root user only)
#
#	Support $debug values 0 - 4 and let it be invoked by a "-d" param.
#
# Tue Jun 23 10:47:12 AEST 2009
#	psde - add extra logging and tweak the parameters
#
# Wed Jun 24 14:28:38 AEST 2009
#	psde - add "-l" to output an ordered list of filenames/fhandles
#	instead of actually recalling them
#
# August 2009
#	psde - add support of the "-a" option added in DMF 4.1
#
# Thu Sep 10 11:40:59 AEST 2009
#	psde
#
#	Take over the upward movement of recalled files to the DCM, to
#	avoid double processing from tape (once for the recall and again for
#	the dmmove initialed by dmtrace.pl)
#
#	reinstate recall of DUL files - the user may have specified the "-a"
#	option which should not be ignored by this wrapper.  But such a
#	file is eligible for a move to the DCM - users would expect a dmget of
#	a DUL file to be instant, and moving a copy to the DCM could involve a
#	tape mount.
#
# Thu Sep 24 10:42:42 AEST 2009
#	psde
#
#	Add a -C option to disable caching (undocumented).
#
#	Abort with Usage message if bad parameters are given.
#
# Thu Nov  5 15:47:55 AEDT 2009
#	psde
#
#	Remove upward movement code.  It moves by BFID which ignores the disk
#	copy of a DUL file, and it uses a setuid program that we'd rather do
#	without.
#
#	"dmget -l" was ignoring DUL files again.  Fixed.
#
# Fri Nov 13 13:31:07 AEDT 2009
#	psde
#
#	For the "-a" option, do the touches inside the wrapper instead of
#	inside DMF.
#

# Comments now in SVN repository
#  $HeadURL:$

use strict;
use Getopt::Long;
use Sys::Syslog;
use Data::Dumper;
use FileHandle;

use vars qw($debug);

$debug = 0;

use vars qw(
	$per_file_charge $per_GB_charge $batch_threshold
	@defer_vgs $dmget $dmbfid2vsn
	$get_oldest_recall
	$max_arg_count $log @options @filegroup
	%req_file %req_space %req_size %req_state %req_vg @req_vsn1 @req_vsn2
	$vg_tier
	$kiB_free $quarter_MB_free $dmbfid2vsn_cmd $bfid2vsn_count $file_count
	$recall_count
	$tot_MB $tot_files $nbatches
	$cur_batch_MB $cur_batch_files);

# Turn on syslog - 1: just totals  2: each batch as well
$log = 2;

# VGs for second copies:
#@defer_vgs = qw(ter te2);
@defer_vgs = qw(te2);

# Tunables for triggering the end of a batch.
# This is only tested at VSN transitions, quite deliberately.
$per_file_charge = 2.5;
$per_GB_charge = 10;
$batch_threshold = 100;


$dmget = '/usr/bin/real-dmget';
$dmbfid2vsn = '/usr/local/sbin/dmbfid2vsn';
$get_oldest_recall = '/usr/local/sbin/get_oldest_recall';
$max_arg_count = 20000;

undef %ENV;

@options = ();
@filegroup = ();
$tot_MB = $tot_files = $nbatches = 0;
$cur_batch_MB = $cur_batch_files = 0;


my ($a_opt, $n_opt, $l_opt, $h_opt, $v_opt, $B_opt);
GetOptions('d=i' =>\$debug, 'l' =>\$l_opt,
	'a' =>\$a_opt, 'h' =>\$h_opt, 'n' =>\$n_opt, 'v' =>\$v_opt,
	'B=s' =>\$B_opt) or
    die "Usage: $0 [-h | -a] [-l] [-n] [-v] [-B byterange_list] [filelist]\n";
if ($h_opt) {
    # The following wording comes from the genuine dmget program
    $> and die "You must be root to access an object by handle\n";
    push @options, "-h";
}
push @options, "-B $B_opt" if $B_opt;
push @options, "-a" if $a_opt;
push @options, "-v" if $v_opt;
$log = 0 if $l_opt;

STDOUT->autoflush(1);
STDERR->autoflush(1);

print "\$debug set to $debug\n" if $debug;
print "Recalls and syslog messages disabled\n" if $debug;
print "Debug messages enabled\n" if $debug;

print "Generated commands shown\n" if $debug > 1;

print "Verbose debug messages enabled\n" if $debug > 2;
print "Using small batch limits\n" if $debug > 2;

print "\n" if $debug;

if ($log && !$debug) {
    openlog('dmget-wr', 'pid', 'DMF');
}

# Measure free space in MB (hard coded to /cs/datastore)
#
# linux df -kP
#Filesystem         1024-blocks      Used Available Capacity Mounted on
#/dev/xscsi/pci01.03.0-1/target2/lun0/part3  26102900  23279860  2823040  90% /
open(DF, "/bin/df -kP /cs/datastore |");
<DF>; # ignore header
($kiB_free) = <DF> =~ /^\s*\S+\s+\d+\s+\d+\s+(\d+)\s+\S+\s+\S+\s*$/;
close DF;
$quarter_MB_free = int($kiB_free / 1024 / 4);
print "quarter_MB_free = $quarter_MB_free MB\n" if $debug;



sub get_oldest_recall {
    my $op = `$get_oldest_recall`;
    chomp $op;
    return ($? ? '' : $op);
} # get_oldest_recall

sub do_recall_batch {
    my $batch_time = time;
# print ("\n?? Inside do_recall_batch\n?? filegroup:\n");	print (Dumper(@filegroup));
# print ("\n");
    if (scalar(@filegroup)) {
	if ($l_opt) {
	    foreach my $f (@filegroup) {
		print "$f\n";
	    }
	} elsif ($debug) {
	    print "Cmd: '$dmget @options @filegroup'\n" if $debug > 1;
	} else {
	    system($dmget, @options, @filegroup) == 0 or exit $? >> 8;
	}
	@filegroup = ();
	$nbatches++;
	$tot_MB += $cur_batch_MB;
	$tot_files += $cur_batch_files;
	if ($log > 1 || $debug) {
	    if ($debug) {
		printf(
		    "Syslog: User: %s, Secs: %u, Batch#:  %u, Files: %u, MB: %.0f\n",
		    getpwuid($>).'', (time - $batch_time),
		    $nbatches, $cur_batch_files, $cur_batch_MB);
	    } else {
		syslog('debug',
		    'User: %s, Secs: %u, Batch#:  %u, Files: %u, MB: %.0f',
		    getpwuid($>).'', (time - $batch_time),
		    $nbatches, $cur_batch_files, $cur_batch_MB);
	    }
	}
	print "\n" if $debug > 1;
	$cur_batch_MB = $cur_batch_files = 0;
    }
} # do_recall_batch

sub queue_bfid {
    my $bfid = $_[0];
    my $vg = $req_vg{$bfid};
    my $state = $req_state{$bfid};
    my $size = $req_size{$bfid};
    my $space = $req_space{$bfid};
    my $space_diff = ($size > $space ? $size - $space : 0);
    if ($state =~ /OFL|PAR/) { # only charge for OFL & PAR files
	$cur_batch_files++;
	# NB: following ignores the possibility of sparse files
	$cur_batch_MB += $space_diff / 1024 / 1024;
    }
    if ($state ne "REG") {
	push @filegroup, $req_file{$bfid};
	print "Queued for recall: BFID: $bfid\tfile: $req_file{$bfid}\n" if $debug > 2;
    }
    delete $req_file{$bfid};

    # If we are threatening the filesystem, do the batch prematurely
    # without waiting for a VSN transition.  Ditto if we risk exceeding the
    # maximum command length.
    if (    $cur_batch_MB >= $quarter_MB_free ||
	    $cur_batch_files >= $max_arg_count) {
	printf("\nFlushing prematurely - quarter_MB_free = %.3f MB; " .
		"cur_batch_MB = %.3f MB; cur_batch_files = %d\n",
		$quarter_MB_free, $cur_batch_MB, $cur_batch_files)
	    if $debug;
	do_recall_batch();
    }
} # queue_bfid

$dmbfid2vsn_cmd = $dmbfid2vsn;
$bfid2vsn_count = 0;

sub do_bfid2vsn_batch {
    if ($bfid2vsn_count) {
	print "Cmd: '$dmbfid2vsn_cmd'\n" if $debug > 1;
	open(DMBFID2VSN, "$dmbfid2vsn_cmd|") or die "dmbfid2vsn failed: $!";
	while (<DMBFID2VSN>) {
	    chomp;
	    print "\ndmbfid2vsn o/p: $_\n" if $debug > 2;
	    my ($bfid, $vg, $vsn) = split;
	    print "BFID $bfid: vg $vg, vsn $vsn\n" if $debug > 2;
	    $req_vg{$bfid} = $vg;
	    $vg_tier = grep /^$vg$/, @defer_vgs;
	    if (!$req_vsn1[$vg_tier]{$bfid}) {
		$req_vsn1[$vg_tier]{$bfid} = $vsn;
	    } else {
		$req_vsn2[$vg_tier]{$bfid} = $vsn;
		# Yes, we're deliberately ignoring the possibility of > 2 VSNs
	    }
	}
	close DMBFID2VSN;
	$dmbfid2vsn_cmd = $dmbfid2vsn;
	$bfid2vsn_count = 0;
    }
} # do_bfid2vsn_batch


# Warn interactive users
if (!scalar(@ARGV) && -t STDIN) {
    print STDERR "\nPlease enter filenames interactively, one per line:\n";
}

my @dmattr_options = ();
if ($h_opt) {
    push @dmattr_options, "-h";
    push @dmattr_options, "-abfid,size,space,state,fhandle";
} else {
    push @dmattr_options, "-abfid,size,space,state,path";
}
push @dmattr_options, "-d:";
# This dmattr command may have files specified as arguments or from stdin, as
# chosen by the user.
print("Cmd: '/usr/bin/dmattr @dmattr_options @ARGV'\n")
    if $debug > 1;
# force no shell to allow for strange filenames
open(DMATTR, "-|", "/usr/bin/dmattr", @dmattr_options, @ARGV)
	or die "dmattr failed: $!";
# User-provided filenames not used past this point.  Instead, we use the
# details sanitised by dmattr.  A useful side-effect is that this means we
# only make a single pass over stdin, removing the need to make a copy of it.
undef @ARGV;
# STDIN from /dev/null for subsequent commands
close STDIN; open STDIN, "</dev/null";
$file_count = $recall_count = 0;
my $now = time;
print ("\nOnline files\n\n") if $debug;
while (<DMATTR>) {
    chomp;
    print "\ndmattr o/p: $_\n" if $debug > 2;

    my ($bfid, $size, $space, $dmstate, $file_or_hdl) = split /:/, $_, 5;
    next if !$file_or_hdl;
    $file_count++;

    # As an optimisation, if a specified file is (or will soon be)
    # on disk and "-a" was specified, do the "touch -a" here rather
    # than have DMF do it.  (This refers to references by pathname only;
    # you shouldn't touch an fhandle!)
    #
    # If this is a "-l" run, then any file which would not be sent to
    # the daemon while recalling should have its path/handle output
    # here.  (In particular, REG files should have this done because
    # they will not make it past the "sort by VSN" phase because they
    # have no tapes associated with them.  For safety, do the same for
    # MIG although there's a race condition if it's being migrated by
    # "dmput -r".)
    #
    # Then ignore such a file at this point, not sending it to DMF.

    if ($h_opt) {
	if ($dmstate =~ /REG|MIG/ || (!$a_opt && $dmstate =~ /DUL|MIG/)) {
	    print "$file_or_hdl\n" if $l_opt;
	    next;
	}
    } else {
	#??    if (    $dmstate =~ /REG|DUL|UNM|MIG/ ||
	#??	    $bfid eq "dmattr information not available") {
	if ($dmstate =~ /REG|DUL|UNM|MIG/) {
	    print "$file_or_hdl\n" if $l_opt;
	    if ($a_opt) {
		utime $now, (stat($file_or_hdl))[9], $file_or_hdl;
		print "Touched $file_or_hdl\n" if $debug > 0;
	    }
	    next;
	}
    }

    $recall_count++;
    print "BFID $bfid: size $size, space $space, dmstate $dmstate, " .
	    ($h_opt ? "fhdl" : "file") . " $file_or_hdl\n"
	if $debug > 2;

    $req_file{$bfid} = $file_or_hdl;
    $req_space{$bfid} = $space;
    $req_size{$bfid} = $size;
    $req_state{$bfid} = $dmstate;
    $req_vsn1[0]{$bfid} = $req_vsn2[0]{$bfid} = "";
    $req_vsn1[1]{$bfid} = $req_vsn2[1]{$bfid} = "";

    if ($bfid ne "0") {
	$dmbfid2vsn_cmd .= " $bfid";
	# Batch dmbfid2vsn requests to avoid problems due to large numbers of
	# files being specified via stdin.
	if (++$bfid2vsn_count >= $max_arg_count) {
	    do_bfid2vsn_batch();
	}
    }
}
close DMATTR;
do_bfid2vsn_batch();

if ($recall_count && -t STDERR) {
    if ($file_count == 1) {
	print STDERR "You " . ($l_opt ? "would be" : "are") .
		" recalling the only file specified.\n";
    } else {
	print STDERR "You " . ($l_opt ? "would be" : "are") .
		" recalling $recall_count of the $file_count files specified.\n";
    }
    my $oldest_recall = get_oldest_recall();
    if ($oldest_recall && !$l_opt) {
	print STDERR "The oldest currently queued recall request has been waiting for $oldest_recall.\n";
    }
}

if ($debug > 3) {
    print ("\n??????????????????????????????????????????????????\n");
    print ("\n%req_file:\n");	print (Dumper(%req_file));
    print ("\n%req_vsn1:\n");	print (Dumper(@req_vsn1));
    print ("\n%req_vsn2:\n");	print (Dumper(@req_vsn2));
    print ("\n%req_space:\n");	print (Dumper(%req_space));
    print ("\n%req_size:\n");	print (Dumper(%req_size));
    print ("\n??????????????????????????????????????????????????\n");
}

# 0. Dualstate files by fhandle (only if "-a" and "-h" specified)

if ($a_opt && $h_opt) {
    print ("\nDualstate files by fhandle\n\n") if $debug;
    foreach $b (keys %req_file) {
	if ($req_state{$b} eq "DUL") {
	    print   "Queueing dualstate: BFID: $b\tfile: $req_file{$b}\n"
		if $debug > 2;
	    queue_bfid($b);
	}
    }
    # Process these now; they ought to be quite quick
    do_recall_batch();
}

# 1. Recall non-tape files (FTP/disk MSP files or MIG files)
#    Could also be a result of dmbfid2vsn failing, which is why we don't
#    do it as a single batch, which would otherwise be appealing.

print ("\nMSP files\n\n") if $debug;
foreach $b (keys %req_file) {
    if (!$req_vsn1[0]{$b} && !$req_vsn1[1]{$b}) {
	print   "Queueing non-tape: BFID: $b\tfile: $req_file{$b}\n"
	    if $debug > 2;
	queue_bfid($b);
	if (    $cur_batch_MB / 1024 * $per_GB_charge +
		$cur_batch_files * $per_file_charge >= $batch_threshold) {
	    do_recall_batch();
	}
    }
}
# Process these now; they ought to be quite quick
do_recall_batch();

# 2. Recall tape files, non-deferred VGs before (if nec) deferred ones

foreach $vg_tier (0, 1) {	# first primary VGs, then secondary
    print ($vg_tier ? "\nSecondary VG copies\n\n" : "\nPrimary VG copies\n\n")
	if $debug;
    my (%vsn1, %vsn2, $v1, $v2, $vsn, @vsn_list);

    # Find all tapes required
    foreach $b (keys %req_file) {
	if ($req_vsn1[$vg_tier]{$b} && $req_state{$b} ne "DUL") {
	    print   "Tape" . ($req_vsn2[$vg_tier]{$b} ? "s" : "") .
		    " found: BFID: $b\tVSN: $req_vsn1[$vg_tier]{$b} " .
		    "$req_vsn2[$vg_tier]{$b}\tfile: $req_file{$b}\n"
		if $debug > 2;
	    $vsn1{$req_vsn1[$vg_tier]{$b}}++;
	    $vsn2{$req_vsn2[$vg_tier]{$b}}++;
	} else {
	    print   "Tape not found: BFID: $b\tfile: $req_file{$b}\n"
		if $vg_tier == 0 && $debug > 2;
	}
    }
    delete $vsn2{""};

    # Find all stradlers, and adjacently record their two VSNs.  We'll wear
    # the inefficiency of where a file straddles > 2 tapes, due to its rarity.
    foreach $v2 (keys %vsn2) {
	if (!grep /^$v2$/, @vsn_list) { push @vsn_list, $v2; }
	delete $vsn1{$v2};

	foreach $b (keys %req_file) {
	    if ($req_vsn2[$vg_tier]{$b} eq $v2) {
		$v1 = $req_vsn1[$vg_tier]{$b};
		if (!grep /^$v1$/, @vsn_list) { push @vsn_list, $v1; }
		delete $vsn1{$v1};
	    }
	}
    }

    # Add the tapes used by non-stradlers
    foreach $v1 (keys %vsn1) {
	if (!grep /^$v1$/, @vsn_list) {
	    push @vsn_list, $v1;
	}
    }

    if (scalar(@vsn_list)) {
	if ($log > 1) {
	    if ($debug) {
		printf(
		    "Syslog: User: %s, Tapes: %u, Files: %u max%s\n",
		    getpwuid($>).'', scalar(@vsn_list), $recall_count);
	    } else {
		syslog('debug',
		    'User: %s, Tapes: %u, Files: %u max%s',
		    getpwuid($>).'', scalar(@vsn_list), $recall_count);
	    }
	}
	if (-t STDERR) {
	    print STDERR scalar(@vsn_list) . " tape mount" .
		    (scalar(@vsn_list) == 1 ? " " : "s ") .
		    "may be required.\n";
	}
    }

    # In order of VSNs established above, queue the files
    foreach $vsn (@vsn_list) {
	print ("\n") if $debug > 2;
	print ("VSN: $vsn\n") if $debug;
	# For a given tape, assume files are written in increasing BFID order
	foreach $b (sort keys %req_file) {
	    if ($req_vsn1[$vg_tier]{$b} eq $vsn) {
		queue_bfid($b);
	    }
	}
	# VSN transition - see if batch threshold has been reached
	if (    $cur_batch_MB / 1024 * $per_GB_charge +
		$cur_batch_files * $per_file_charge >= $batch_threshold) {
	    do_recall_batch();
	}
    }
}
# Do the final small batch
do_recall_batch();

#if ($log && $tot_files) {
if ($log) {
    if ($debug) {
	printf(
	    "Syslog: User: %s, Secs: %u, Batches: %u, Files: %u, MB: %.0f\n",
	    getpwuid($>).'', (time - $^T), $nbatches, $tot_files, $tot_MB);
    } else {
	syslog('debug',
	    'User: %s, Secs: %u, Batches: %u, Files: %u, MB: %.0f',
	    getpwuid($>).'', (time - $^T), $nbatches, $tot_files, $tot_MB);
	closelog();
    }
}
#end dmget.pl
