#!/bin/tcsh -f
# JLdL 08Dec13.
#
# Copyright (C) 2005-2013 by Jorge L. deLyra <delyra@fma.if.usp.br>.
# This program may be copied and/or distributed freely. See the
# _ terms and conditions in /usr/share/doc/<package>/copyright.
#
# This program is meant for filesystem maintenance after a package installation
# _ or upgrade in each node, for example by means of multi-apt-get-chroot;
# _ it can and should be used only for the hard-linked node filesystems.
#
# Record the name this script was called with.
set name = `basename $0`
#
# Initialize variables for the configuration file.
set conflag = 0
set confile = "/etc/cluster.conf"
#
# Initialize a variable for the verbosity level.
set verbout = 1
#
# Initialize variables for the subdirectory of the filesystems.
set subflg = 0
set subdir = ""
#
# Initialize a variable for a full scan of the pairs of nodes.
set fulflg = 0
#
# Initialize a variable for double-scanning the pairs of nodes.
set dblflg = 0
#
# Initialize a variable for cloning missing files and directories.
set clnflg = 0
#
# Initialize a variable for the list of hard-linked filesystems.
set hlfsdirs = ""
#
# Initialize variables for pausing at the end.
set pseflag = 0
set pseargm = 0
set psetime = 0
#
# Process the command-line arguments.
foreach cla ( $* )
    #
    # Detect options.
    if ( "`echo -n $cla | cut -c 1`" == "-" ) then
	#
	# If we got here with an argument flag up, there is an error.
	if ( $conflag == 1 ) then
	    echo "${name}: ERROR: option -C requires an argument"
	    exit 1
	else if ( $subflg == 1 ) then
	    echo "${name}: ERROR: option -s requires an argument"
	    exit 1
	else if ( $pseargm == 1 ) then
	    #
	    # In this case we just assume that the argument is
	    # _ missing and attribute the default value of 0.
	    set psetime = 0
	    #
	    # Lower the pause argument flag.
	    set pseargm = 0
	endif
	#
	# Now process the options.
	switch ( $cla )
	case "-h":
	case "--help":
	    #
	    # Print a usage message.
	    echo "usage: $name [-C <config>] [-q] [-s <subdir>] [-f] [-d] [-E|-P [n]] [<dir> <dir> ...|all]"
	    echo "       -C: use alternate configuration file <config>"
	    echo "       -q: no progress reporting, only loggable output"
	    echo "       -s: act only on sub-directory <subdir> of a filesystem"
	    echo "       -f: scan each pair of nodes once, in increasing order"
	    echo "       -d: scan each pair of nodes twice, once in each order"
	    echo "       -E: exit immediately after executing the task"
	    echo "       -P: pause for n seconds after execution and then exit"
	    echo "               If n=0 then wait for [Enter] after execution"
	    echo "       -c: clone files and directories found missing in a node"
	    echo "               WARNING: this option may cause some damage,"
	    echo "                        read the manual page before using."
	    echo "       hard-link together identical files on diferent nodes;"
	    echo "       each '<dir>' must be the mount point of a hard-linked"
	    echo "       cluster filesystem; you may use the keyword 'all' to"
	    echo "       do all of them; to get all the details run the"
	    echo "       command 'man $name'"
	    exit 0
	    breaksw
	case "-C":
	case "--Config-file":
	    #
	    # Raise the flag.
	    set conflag = 1
	    breaksw
	case "-q":
	case "--quiet":
	    #
	    # Lower the verbosity level.
	    set verbout = 0
	    breaksw
	case "-s":
	case "--sub-directory":
	    #
	    # Raise the subdirectory flag.
	    set subflg = 1
	    breaksw
	case "-f":
	case "--full-scan":
	    #
	    # Raise the full-scan flag.
	    set fulflg = 1
	    breaksw
	case "-d":
	case "--double-scan":
	    #
	    # Raise the double-scan flag.
	    set dblflg = 1
	    breaksw
	case "-E":
	case "--Exit":
	    #
	    # Lower the pause flag.
	    set pseflag = 0
	    #
	    # Zero the pause time.
	    set psetime = 0
	    breaksw
	case "-P":
	case "--Pause":
	    #
	    # Raise the pause flag.
	    set pseflag = 1
	    #
	    # Raise the pause argument flag.
	    set pseargm = 1
	    breaksw
	case "-c":
	case "--clone-missing":
	    #
	    # Raise the clone-missing flag.
	    set clnflg = 1
	    breaksw
	default:
	    #
	    # Print an error message.
	    echo "${name}: ERROR: unknown option $cla; try -h to get help"
	    exit 1
	    breaksw
	endsw
    #
    # Process non-option arguments.
    else
	#
	# Get the arguments of options.
	if ( $conflag == 1 ) then
	    #
	    # Set the configuration file.
	    set confile = $cla
	    #
	    # Lower the flag.
	    set conflag = 0
	else if ( $subflg == 1 ) then
	    #
	    # Set the sub-directory to be hard-linked.
	    set subdir = $cla
	    #
	    # Lower the flag.
	    set subflg = 0
	else if ( $pseargm == 1 ) then
	    #
	    # Check whether the argument is a number.
	    echo $cla | grep -q '^[0-9]*$'
	    #
	    # If it is, then set the pause time; otherwise, set the
	    # _ time to the default value and pass on the argument.
	    if ( $status == 0 ) then
		set psetime = $cla
	    else
		set psetime = 0
		set hlfsdirs = ( $hlfsdirs $cla )
	    endif
	    #
	    # Lower the pause argument flag.
	    set pseargm = 0
	else
	    #
	    # Get and accumulate the list of filesystems from the command line
	    # _ arguments; arguments are filesystem directories to act on.
	    set hlfsdirs = ( $hlfsdirs $cla )
	endif
    endif
end
#
# If we got here with an argument flag up, there is an error.
if ( $conflag == 1 ) then
    echo "${name}: ERROR: option -C requires an argument"
    exit 1
else if ( $subflg == 1 ) then
    echo "${name}: ERROR: option -s requires an argument"
    exit 1
else if ( $pseargm == 1 ) then
    #
    # In this case we just assume that the argument is
    # _ missing and attribute the default value of 0.
    set psetime = 0
    #
    # Lower the argument flag.
    set pseargm = 0
endif
#
# The variable subdir must be a relative path.
if ( "`echo -n $subdir | cut -c 1`" == "/" ) then
    echo "${name}: ERROR: argument of '-s' must be a relative path"
    exit 1
endif
#
# Eliminate an eventual './' from the beginning of subdir.
if ( "`echo -n $subdir | cut -c 1,2`" == "./" ) then
    set subdir = `echo $subdir | cut -c 3-`
endif
#
# Check whether the option --clone-missing was used and no
# _ sub-directory to hard-link files within was entered.
if ( "$subdir" == "" && $clnflg == 1 ) then
    #
    # Issue a warning and provide a safety exit.
    echo "WARNING: option --clone-missing was used, but no sub-directory"
    echo "to hard-link files within was entered; this may cause problems"
    echo -n "in some cases. Are you sure you want to do this? [no]: "
    set imsure = $<
    #
    # Require an explicit lowcase yes in order to continue.
    if ( "$imsure" != yes ) then
	echo "${name}: quitting: you did not enter 'yes'"
	exit 0
    endif
endif
#
# The clone-missing flag implies the double-scan flag.
if ( $clnflg == 1 ) then
    set dblflg = 1
endif
#
# The double-scan flag implies the full-scan flag.
if ( $dblflg == 1 ) then
    set fulflg = 1
endif
#
# The default value of the list of hard-linked filesystems
# _ is the current working directory.
if ( "$hlfsdirs" == "" ) then
    set hlfsdirs = .
endif
#
# Source the configuration file; this must define the following variables:
# _ virt_node; cluster_root; mount_points; hard_linked.
if ( -r $confile ) then
    source $confile
else
    echo "${name}: ERROR: cannot read configuration file $confile"
    exit 1
endif
#
# Do some simple error detection: check that the necessary
# _ variables are defined in the configuration file.
if ( ! $?virt_node ) then
    echo "${name}: ERROR: virt_node not defined in configuration file"
    exit 1
endif
if ( ! $?cluster_root ) then
    echo "${name}: ERROR: cluster_root not defined in configuration file"
    exit 1
endif
if ( ! $?mount_points ) then
    echo "${name}: ERROR: mount_points not defined in configuration file"
    exit 1
endif
if ( ! $?hard_linked ) then
    echo "${name}: ERROR: hard_linked not defined in configuration file"
    exit 1
endif
#
# Get the number of digits in the node numbers.
set ndig = `echo -n $virt_node | wc -c`
#
# Build the regular expression for the node numbers.
set node_digs = "[0-9]"
set idig = 1
while ( $idig < $ndig )
    set node_digs = "${node_digs}[0-9]"
    @ idig = $idig + 1
end
#
# Make a list of the absolute paths to all the hard-linked filesystems.
set hls = ""
foreach hl ( $hard_linked )
    if ( "$hl" == "." ) then
	set hls = ( $hls $cluster_root )
    else
	set hls = ( $hls $cluster_root/$hl )
    endif
end
#
# Map the keyword 'all' onto the appropriate set of node filesystems.
if ( "$hlfsdirs" == "all" ) then
    set hlfsdirs = ( $hls )
endif
#
# Define a separator line.
set sep = "--------------------------------------------------------------------------------"
#
# Define the location of the library.
set libdir = /usr/lib/cluster
#
# Define a variable with the tab character.
set tab = "`echo -n '\t'`"
#
# Loop over the filesystem arguments.
foreach hlfsdir ( $hlfsdirs )
    #
    # Go to the root of the filesystem.
    cd $hlfsdir
    #
    # Do some simple error detection: check that this is being executed
    # _ within the mount-point of a hard-linked cluster filesystem.
    set hlflag = 0
    foreach hlfs ( $hard_linked )
	if ( "$hlfs" == "." ) then
	    set fsmp = $cluster_root
	else
	    set fsmp = $cluster_root/$hlfs
	endif
	if ( "$cwd" == "$fsmp" ) then
	    set hlflag = 1
	endif
    end
    if ( "$hlflag" == 0 ) then
	echo "${name}: ERROR: $cwd is not a hard-linked cluster filesystem"
	exit 1
    endif
    #
    # Define the list of all node directories.
    set nds = `\ls -d $node_digs`
    #
    # Do some simple error detection: check that there are some node
    # _ directories within the current working directory.
    if ( "$nds" == "" ) then
	echo "${name}: ERROR: cannot find node directories in $cwd"
	exit 1
    endif
    #
    # Define the extension for the centralized exclusion files.
    if ( "$cwd" == "$cluster_root" ) then
	set ext = root
    else
	set ext = `basename $cwd`
    endif
    #
    # Build the egrep exclusion target: start by avoiding empty lines.
    set etarg = '^$'
    #
    # Add the required exclusions for the current filesystem,
    # _ if the required exclusion file is readable.
    set exclfile = $libdir/hard-link-required-exclusions.$ext
    if ( -r $exclfile ) then
	foreach file ( `cat $exclfile | grep -v '^[ $tab]*#'` )
	    set etarg = "$etarg|^$file"
	end
    endif
    #
    # Add any files or directories listed in the local file
    # _ "hard-link-exclusions", if it is readable.
    set exclfile = hard-link-exclusions
    if ( -r $exclfile ) then
	foreach file ( `cat $exclfile | grep -v '^[ $tab]*#'` )
	    set etarg = "$etarg|^$file"
	end
    #
    # Otherwise, use the default exclusion list, if it is readable.
    else
	set exclfile = $libdir/hard-link-exclusions.$ext
	if ( -r $exclfile ) then
	    foreach file ( `cat $exclfile | grep -v '^[ $tab]*#'` )
		set etarg = "$etarg|^$file"
	    end
	endif
    endif
    #
    # Report the current filesystem.
    echo working on hard-linked cluster filesystem $cwd
    #
    # Define a lock file relating to this filesystem.
    set lock = /var/lock/hard-linking`echo -n $cwd | tr '/' '_'`
    #
    # Check for a lock file.
    if ( -f $lock ) then
	#
	# Write out an error message.
	echo "`basename $0`: hard-linking already running on this filesystem"
	#
	# Skip to the next filesystem.
	goto skiptonext
    endif
    #
    # Make a lock file.
    touch $lock
    #
    # On interruption, go to the clean exit label.
    onintr cleanexit
    #
    # Decide whether to loop over all nodes as templates, or to
    # _ use just the first (virtual) node as the template,
    # _ depending on the state of the full-scan flag.
    if ( $fulflg == 0 ) then
	#
	# Use the first node as the template,
	set mnd = 1
    else
	#
	# Loop over all nodes as templates.
	set mnd = $#nds
    endif
    #
    # Initialize an outer counter for the templates.
    set jnd = 1
    #
    # Loop over the template nodes.
    while ( $jnd <= $mnd )
	#
	# Define the name of the template directory.
	set tpl = $nds[$jnd]
	#
	# Report the current template node.
	echo $sep
	echo current template is: $tpl
	#
	# In the case of the root filesystem, check the system mount
	# _ points of the template; if any of them has any content
	# _ except for "." and "..", exit in error.
	if ( "$cwd" == "$cluster_root" ) then
	    foreach dir ( $mount_points )
		if ( `\ls -1a $tpl/$dir | wc -l` > 2 ) then
		    echo "${name}: ERROR: spurious content detected within mount point $cwd/$tpl/$dir"
		    echo "    you must clean it up before trying to run this again"
		    #
		    # Print a final separator and exit.
		    echo $sep
		    goto cleanexit
		endif
	    end
	endif
	#
	# Define the list of template sub-directories; the var/
	# _ filesystem requires special treatment, because
	# _ some of it is very node-specific.
	if ( "$cwd" == $cluster_root/var ) then
	    #
	    # The only subdirectories of var worth while trying
	    # _ to hard-link are the cache and lib directories;
	    # _ they are the two largest ones and most of their
	    # _ content can be hard-linked; the next larger one
	    # _ is the log subdirectory, but it is completely
	    # _ node-specific.
	    #
	    # Check if a sub-directory was entered.
	    if ( "$subdir" == "" ) then
		set tsd = ( $tpl/cache $tpl/lib )
	    #
	    # One may hard-link only a given subdirectory.
	    else if ( "`echo -n $subdir | cut -c 1-6`" == "cache/" || \
		      "`echo -n $subdir | cut -c 1-4`" == "lib/" ) then
		set tsd = ( $tpl/$subdir )
	    else
		echo "${name}: ERROR: bad value for the argument of '-s'"
		echo "    within the var filesystem it must start with 'cache/' or 'lib/'"
		goto cleanexit
	    endif
	else
	    #
	    # In all other cases one can try to link the whole
	    # _ filesystem, with the exception of a few files
	    # _ or directories listed in the exclusion files.
	    #
	    # Check if a sub-directory was entered.
	    if ( "$subdir" == "" ) then
		set tsd = ( $tpl )
	    else
		set tsd = ( $tpl/$subdir )
	    endif
	endif
	#
	# Define the collection of target directories, according
	# _ to the state of the double-scan flag.
	if ( $dblflg == 0 ) then
	    #
	    # Usually each pair of nodes must be tested only once,
	    # _ so include only the node directories which come
	    # _ after the template directory in the node list;
	    # _ initialize an inner counter for the targets.
	    set ind = $jnd
	    set trg = ""
	    while ( $ind < $#nds )
		#
		# Increment the inner (target) counter.
		@ ind = $ind + 1
		set trg = ( $trg $nds[$ind] )
	    end
	else
	    #
	    # Sometimes it is useful to test each pair of nodes
	    # _ twice, once in each order; in this case include
	    # _ all the node directories except the template;
	    # _ initialize an inner counter for the targets.
	    set ind = 0
	    set trg = ""
	    while ( $ind < $#nds )
		#
		# Increment the inner (target) counter.
		@ ind = $ind + 1
		if ( $ind != $jnd ) set trg = ( $trg $nds[$ind] )
	    end
	endif
	#
	# Find all common files with less than the expected number
	# _ of links in the template directory and save the list of
	# _ paths to a temporary file, but first cut off the first
	# _ directory in the path and filter out the exclusions.
	echo -n "finding hard-link candidate files in the template... "
	find $tsd -type f -and -links -$#nds | cut -d / -f 2- | \
	    egrep -v "$etarg" | sort >! hard-link-candidates.$tpl
	set ncand = `cat hard-link-candidates.$tpl | wc -l`
	echo "( $ncand )... done."
	#
	# Hard-link identical files which are not already links to the
	# _ same file; do this in a bash subroutine, passing to it the
	# _ necessary variables, in order to be able to deal with some
	# _ files which may have spaces (and other special characters)
	# _ within their names.
	if ( "$trg" != "" ) then
	    $libdir/hard-link-common-files.sub $clnflg $verbout $ncand $tpl "$trg"
	endif
	#
	# Increment the outer (template) counter.
	@ jnd = $jnd + 1
    end
    #
    # Remove the lock file.
    cleanexit:
    rm -f $lock
    #
    # Restore the default interruption behavior.
    onintr
    #
    # A label to continue the loop over filesystems.
    skiptonext:
    #
    # Print a final separator.
    echo $sep
    #
    # Go back to the original directory.
    cd -
end
#
# If the pause flag is up, pause before exiting.
if ( $pseflag ) then
    #
    # If there is no pause time, wait for ever;
    # _ else wait for the given time.
    if ( "$psetime" == 0 ) then
	echo -n "Hit [Enter] to exit: "
	set iwait = $<
    else
	sleep $psetime
    endif
endif
