#!/bin/tcsh -f
# JLdL 08Dec13.
#
# Copyright (C) 2005-2013 by Jorge L. deLyra <delyra@fma.if.usp.br>.
# This program may be copied and/or distributed freely. See the
# _ terms and conditions in /usr/share/doc/<package>/copyright.
#
# This program is meant for filesystem maintenance after a package installation
# _ or upgrade in each node, for example by means of multi-apt-get-chroot;
# _ it acts on the files within a given set of packages. 
#
# Record the name this script was called with.
set name = `basename $0`
#
# Initialize variables for the configuration file.
set conflag = 0
set confile = "/etc/cluster.conf"
#
# Initialize a variable for the verbosity level.
set verbout = 1
#
# Initialize a variable for a full scan of the pairs of nodes.
set fulflg = 0
#
# Initialize a variable for double-scanning the pairs of nodes.
set dblflg = 0
#
# Initialize a variable for the list of packages.
set packages = ""
#
# Initialize variables for pausing at the end.
set pseflag = 0
set pseargm = 0
set psetime = 0
#
# Process the command-line arguments.
foreach cla ( $* )
    #
    # Detect options.
    if ( "`echo -n $cla | cut -c 1`" == "-" ) then
	#
	# If we got here with the argument flag up, there is an error.
	if ( $conflag == 1 ) then
	    echo "${name}: ERROR: option -C requires an argument"
	    exit 1
	else if ( $pseargm == 1 ) then
	    #
	    # In this case we just assume that the argument is
	    # _ missing and attribute the default value of 0.
	    set psetime = 0
	    #
	    # Lower the pause argument flag.
	    set pseargm = 0
	endif
	#
	# Now process the options.
	switch ( $cla )
	case "-h":
	case "--help":
	    #
	    # Print a usage message.
	    echo "usage: $name [-C <config>] [-q] [-f] [-d] [-E|-P [n]] <pack> [<pack> ...]"
	    echo "       -C: use alternate configuration file <config>"
	    echo "       -q: no progress reporting, only loggable output"
	    echo "       -f: scan each pair of nodes once, in increasing order"
	    echo "       -d: scan each pair of nodes twice, once in each order"
	    echo "       -E: exit immediately after executing the task"
	    echo "       -P: pause for n seconds after execution and then exit"
	    echo "               If n=0 then wait for [Enter] after execution"
	    echo "       hard-link together identical files on diferent nodes;"
	    echo "       each '<pack>' must be the name of a package installed"
	    echo "       on the nodes (_not_ the name of a .deb package file);"
	    echo "       the program will try to act only on the files within"
	    echo "       the packages entered; to get all the details run the"
	    echo "       command 'man $name'"
	    exit 0
	    breaksw
	case "-C":
	case "--Config-file":
	    #
	    # Raise the flag.
	    set conflag = 1
	    breaksw
	case "-q":
	case "--quiet":
	    #
	    # Lower the verbosity level.
	    set verbout = 0
	    breaksw
	case "-f":
	case "--full-scan":
	    #
	    # Raise the full-scan flag.
	    set fulflg = 1
	    breaksw
	case "-d":
	case "--double-scan":
	    #
	    # Raise the double-scan flag.
	    set dblflg = 1
	    breaksw
	case "-E":
	case "--Exit":
	    #
	    # Lower the pause flag.
	    set pseflag = 0
	    #
	    # Zero the pause time.
	    set psetime = 0
	    breaksw
	case "-P":
	case "--Pause":
	    #
	    # Raise the pause flag.
	    set pseflag = 1
	    #
	    # Raise the pause argument flag.
	    set pseargm = 1
	    breaksw
	default:
	    #
	    # Print an error message.
	    echo "${name}: ERROR: unknown option $cla; try -h to get help"
	    exit 1
	    breaksw
	endsw
    #
    # Process non-option arguments.
    else
	#
	# Get the arguments of options.
	if ( $conflag == 1 ) then
	    #
	    # Set the configuration file.
	    set confile = $cla
	    #
	    # Lower the flag.
	    set conflag = 0
	else if ( $pseargm == 1 ) then
	    #
	    # Check whether the argument is a number.
	    echo $cla | grep -q '^[0-9]*$'
	    #
	    # If it is, then set the pause time; otherwise, set the
	    # _ time to the default value and pass on the argument.
	    if ( $status == 0 ) then
		set psetime = $cla
	    else
		set psetime = 0
		set packages = ( $packages $cla )
	    endif
	    #
	    # Lower the pause argument flag.
	    set pseargm = 0
	else
	    #
	    # Get and accumulate the list of packages from the command line
	    # _ arguments; arguments are package names to act on.
	    set packages = ( $packages $cla )
	endif
    endif
end
#
# If we got here with an argument flag up, there is an error.
if ( $conflag == 1 ) then
    echo "${name}: ERROR: option -C requires an argument"
    exit 1
else if ( $pseargm == 1 ) then
    #
    # In this case we just assume that the argument is
    # _ missing and attribute the default value of 0.
    set psetime = 0
    #
    # Lower the argument flag.
    set pseargm = 0
endif
#
# Check that at least one package name was entered.
if ( "$packages" == "" ) then
    echo "${name}: ERROR: argument required: package name(s)"
    exit 1
endif
#
# The double-scan flag implies the full-scan flag.
if ( $dblflg == 1 ) then
    set fulflg = 1
endif
#
# Source the configuration file; this must define the following variables:
# _ virt_node; cluster_root; mount_points; hard_linked.
if ( -r $confile ) then
    source $confile
else
    echo "${name}: ERROR: cannot read configuration file $confile"
    exit 1
endif
#
# Do some simple error detection: check that the necessary
# _ variables are defined in the configuration file.
if ( ! $?virt_node ) then
    echo "${name}: ERROR: virt_node not defined in configuration file"
    exit 1
endif
if ( ! $?cluster_root ) then
    echo "${name}: ERROR: cluster_root not defined in configuration file"
    exit 1
endif
if ( ! $?mount_points ) then
    echo "${name}: ERROR: mount_points not defined in configuration file"
    exit 1
endif
if ( ! $?hard_linked ) then
    echo "${name}: ERROR: hard_linked not defined in configuration file"
    exit 1
endif
#
# Get the number of digits in the node numbers.
set ndig = `echo -n $virt_node | wc -c`
#
# Build the regular expression for the node numbers.
set node_digs = "[0-9]"
set idig = 1
while ( $idig < $ndig )
    set node_digs = "${node_digs}[0-9]"
    @ idig = $idig + 1
end
#
# Define a separator line.
set sep = "--------------------------------------------------------------------------------"
#
# Define the location of the library.
set libdir = /usr/lib/cluster
#
# Define a variable with the tab character.
set tab = "`echo -n '\t'`"
#
# Go to the root of the filesystem.
cd $cluster_root
#
# Define the list of all node directories.
set nds = `\ls -d $node_digs`
#
# Do some simple error detection: check that there are some node
# _ directories within the current working directory.
if ( "$nds" == "" ) then
    echo "${name}: ERROR: cannot find node directories in $cwd"
    exit 1
endif
#
# Go back to the original directory.
cd -
#
# Build the egrep target with the exclusion list; in order to be used with the
# _ Debian <package>.list files, this must be a global list, with the complete
# _ absolute paths to the files, starting from the root, including all the
# _ existing hard-linked node filesystems; start by avoiding empty lines.
set etarg = '^$'
#
# Loop over the hard-linked cluster filesystems.
foreach hl ( $hard_linked )
    #
    # Define the extension for the centralized exclusion files.
    if ( "$hl" == "." ) then
	set ext = root
    else
	set ext = $hl
    endif
    #
    # Add the required exclusions for the current filesystem,
    # _ if the required exclusion file is readable.
    set exclfile = $libdir/hard-link-required-exclusions.$ext
    if ( -r $exclfile ) then
	foreach file ( `cat $exclfile | grep -v '^[ $tab]*#'` )
	    if ( "$hl" == "." ) then
		set etarg = "$etarg|^/$file"
	    else
		set etarg = "$etarg|^/$hl/$file"
	    endif
	end
    endif
    #
    # Add any files or directories listed in the local file
    # _ "hard-link-exclusions", if it is readable.
    set exclfile = $cluster_root/$hl/hard-link-exclusions
    if ( -r $exclfile ) then
	foreach file ( `cat $exclfile | grep -v '^[ $tab]*#'` )
	    if ( "$hl" == "." ) then
		set etarg = "$etarg|^/$file"
	    else
		set etarg = "$etarg|^/$hl/$file"
	    endif
	end
    #
    # Otherwise, use the default exclusion list, if it is readable.
    else
	set exclfile = $libdir/hard-link-exclusions.$ext
	if ( -r $exclfile ) then
	    foreach file ( `cat $exclfile | grep -v '^[ $tab]*#'` )
		if ( "$hl" == "." ) then
		    set etarg = "$etarg|^/$file"
		else
		    set etarg = "$etarg|^/$hl/$file"
		endif
	    end
	endif
    endif
end
#
# Decide whether to loop over all nodes as templates, or to
# _ use just the first (virtual) node as the template,
# _ depending on the state of the full-scan flag.
if ( $fulflg == 0 ) then
    #
    # Use the first node as the template,
    set mnd = 1
else
    #
    # Loop over all nodes as templates.
    set mnd = $#nds
endif
#
# Initialize an outer counter for the templates.
set jnd = 1
#
# Loop over the template nodes.
while ( $jnd <= $mnd )
    #
    # Define the name of the template directory.
    set tpl = $nds[$jnd]
    #
    # Report the current template node.
    echo $sep
    echo current template is: $tpl
    #
    # Start a variable for the list of <package>.list files;
    # _ start it with a temporary file for the Debian
    # _ control files related to the packages.
    #
    # Define the name of the temporary file.
    set tfile = /tmp/hard-link-packages-control-files.list.`date +%s`
    #
    # Initialize the temporary file.
    cat /dev/null > $tfile
    #
    # Add the APT cache files to the temporary file; include any
    # _ package files stored in the archives/ subdirectory.
    find $cluster_root/var/$tpl/cache/apt/ -maxdepth 2 -type f | \
	sed -e "s|^$cluster_root/|/|g" -e "s|/$tpl/|/|g" >> $tfile
    #
    # Add the global Debian control files to the temporary file.
    find $cluster_root/var/$tpl/lib/dpkg/ -maxdepth 1 -type f | \
	sed -e "s|^$cluster_root/|/|g" -e "s|/$tpl/|/|g" >> $tfile
    #
    # Add the APT control files to the temporary file.
    find $cluster_root/var/$tpl/lib/apt/lists/ -maxdepth 1 -type f | \
	sed -e "s|^$cluster_root/|/|g" -e "s|/$tpl/|/|g" >> $tfile
    #
    # Initialize the variable for the list of list-files.
    set lsfls = $tfile
    #
    # Loop over the package arguments.
    foreach pack ( $packages )
	#
	# Check that the <package>.list file exists.
	if ( -f $cluster_root/var/$tpl/lib/dpkg/info/$pack.list ) then
	    #
	    # Get the <package>.list file within the template node.
	    set lsfls = ( $lsfls $cluster_root/var/$tpl/lib/dpkg/info/$pack.list )
	    #
	    # Add the package-specific control files to the temporary file.
	    find $cluster_root/var/$tpl/lib/dpkg/info/ -type f -name $pack.\* | \
		sed -e "s|^$cluster_root/|/|g" -e "s|/$tpl/|/|g" >> $tfile
	else
	    #
	    # If the <package>.list file is not found, just issue a warning.
	    echo "${name}: WARNING: package $pack not found in node $tpl"
	endif
    end
    #
    # Build the hard-link-candidates.<tpl> files, for this
    # _ template node, in all the filesystems.
    echo -n "finding hard-link candidate files within the package(s)... "
    $libdir/hard-link-packages.sub \
	"$lsfls" "$etarg" $cluster_root "$mount_points" $tpl "$hard_linked" $#nds
    echo "done."
    #
    # Delete the temporary file.
    \rm -f $tfile
    #
    # Define the collection of target directories, according
    # _ to the state of the double-scan flag.
    if ( $dblflg == 0 ) then
	#
	# Usually each pair of nodes must be tested only once,
	# _ so include only the node directories which come
	# _ after the template directory in the node list;
	# _ initialize an inner counter for the targets.
	set ind = $jnd
	set trg = ""
	while ( $ind < $#nds )
	    #
	    # Increment the inner (target) counter.
	    @ ind = $ind + 1
	    set trg = ( $trg $nds[$ind] )
	end
    else
	#
	# Sometimes it is useful to test each pair of nodes
	# _ twice, once in each order; in this case include
	# _ all the node directories except the template;
	# _ initialize an inner counter for the targets.
	set ind = 0
	set trg = ""
	while ( $ind < $#nds )
	    #
	    # Increment the inner (target) counter.
	    @ ind = $ind + 1
	    if ( $ind != $jnd ) set trg = ( $trg $nds[$ind] )
	end
    endif
    #
    # We must hard-link the files within each filesystem,
    # _ so loop over the hard-linked cluster filesystems.
    foreach hl ( $hard_linked )
	#
	# Go to the mount point of the hard-linked filesystem.
	cd $cluster_root/$hl
    	#
	# In the case of the root filesystem, check the system mount
	# _ points of the template; if any of them has any content
	# _ except for "." and "..", exit in error.
	if ( "$cwd" == "$cluster_root" ) then
	    foreach dir ( $mount_points )
		if ( `\ls -1a $tpl/$dir | wc -l` > 2 ) then
		    echo "${name}: ERROR: spurious content detected within mount point $cwd/$tpl/$dir"
		    echo "    you must clean it up before trying to run this again"
		    #
		    # Print a final separator and exit.
		    echo $sep
		    exit 1
		endif
	    end
	endif
	#
	# Get the number of candidate files in this filesystem.
	set ncand = `cat hard-link-candidates.$tpl | wc -l`
	#
	# Print out some progress report.
	echo "working on candidate files within filesystem $cwd ( $ncand )..."
	#
	# Define a lock file relating to this filesystem.
	set lock = /var/lock/hard-linking`echo -n $cwd | tr '/' '_'`
	#
	# Check for a lock file.
	if ( -f $lock ) then
	    #
	    # Write out an error message.
	    echo "`basename $0`: hard-linking already running on this filesystem"
	    #
	    # Skip to the next filesystem.
	    goto skiptonext
	endif
	#
	# Make a lock file.
	touch $lock
	#
	# On interruption, go to the clean exit label.
	onintr cleanexit
	#
	# Hard-link identical files which are not already links to the
	# _ same file; do this in a bash subroutine, passing to it the
	# _ necessary variables, in order to be able to deal with some
	# _ files which may have spaces (and other special characters)
	# _ within their names.
	if ( "$trg" != "" ) then
	    $libdir/hard-link-common-files.sub 0 $verbout $ncand $tpl "$trg"
	endif
	#
	# Remove the lock file.
	cleanexit:
	rm -f $lock
	#
	# Restore the default interruption behavior.
	onintr
	#
	# A label to continue the loop over filesystems.
	skiptonext:
	#
	# Go back to the original directory.
	cd -
    end
    #
    # Increment the outer (template) counter.
    @ jnd = $jnd + 1
end
#
# Print a final separator.
echo $sep
#
# If the pause flag is up, pause before exiting.
if ( $pseflag ) then
    #
    # If there is no pause time, wait for ever;
    # _ else wait for the given time.
    if ( "$psetime" == 0 ) then
	echo -n "Hit [Enter] to exit: "
	set iwait = $<
    else
	sleep $psetime
    endif
endif
