#! /usr/bin/perl -w
# -*- cperl -*-

# use diagnostics;

# This is TVS, system for storing complete source codes for TeX documents
# Written and maintained by David Antos, xantos (at) fi.muni.cz
# Distributed free od charge but without ANY WARRANTY in hope you find
# it useful. See enclosed General Public License for details.

# Copyright (c) 2000 David Antos
# $Author: antos $
# $Id: tvs.pl,v 1.28 2000/08/16 16:56:41 antos Exp $

use strict;
use locale;
use Getopt::Long;
use Cwd qw(cwd abs_path);
use File::Basename;
use File::Copy;
use File::Path;

sub build_patterns {
# Builds patterns from specified file
# Input:  Filename --- pattern file
#         Pattern list reference
# Output: Destroys second parameter and fills it up.

  my ($pattern_file, $patts) = @_;

  if (not defined($pattern_file) or $pattern_file eq "") {
    @$patts = ();
    return;
  }
  if (open(PATTERNS, $pattern_file)) {
    print "\nOpened file \"$pattern_file\"\n" if ($::verbose);
    print "Building pattern list\n" if ($::verbose);

    my ($pat);
    while ($pat = <PATTERNS>) {
      chomp($pat);
      ($pat) = split(/[%#]/, $pat); # Erase comments --- they start with # or %
      next unless (defined($pat)); # Nothing to do
      # Delete white characters before and after non-white characters
      $pat =~ s/\s*(.*\S)\s*/$1/;
      next if ($pat eq ""); # Nothing to do
      push(@$patts, $pat);
      print "Found pattern \"$pat\"\n" if ($::verbose >= 2);
    }
    close(PATTERNS);
  }
  else {
    @$patts = ();
    warn "\nWARNING: File \"$pattern_file\" can't be opened.\n";
  }
} # build_patterns

sub parse_tex_log {
# Finds possible filenames in TeX log file, tests if they are real files.
# Omits files matching RE in @$ign_patts.
# Input:  Name of TeX logfile (with .log)
#         List of patterns to ignore (reference)
#         Hash to add matching names (ref.)
# Output: Adds names to %$filenms, keys are filenames,
#            values are filenames too

  my ($logname, $ign_patts, $filenms) = @_;

  open(TEXLOG,$logname) or die "\nCan't open logfile \"$logname\"\n";
  print "\nOpened logfile \"$logname\"\n";

  my($line, $filename, $oldfn, $pat);
  while ($line = <TEXLOG>) {
    chomp($line);
    # In log, anything between "(" or "<" and end-of-line may be a filename.
    # Parsing of it is quite stupid because there may be spaces
    # in filenames. I decided to use real brutal force.
    # So we go to the first ( or < in line, test the rest, cut the rightmost
    # character, test, and so on.
    # Even names ended with ),_,', etc. are tested to be files, but
    # --- who knows --- what if someone uses them [:-O]?
    # It's slow but it doesn't need much memory, which can be useful
    # for DOS/emTeX users. (Sorry, users of _real_ operating systems...)
    BRACKET:
    while ($line =~ /[\(\<]/) { # left "(" or "<" in line
      $line =~ s#^.*?[\(\<]+(.*)$#$1#; # cut to the first left bracket (incl.)
      $filename = $line;
      do {
        print "Testing name: \"$filename\"\n" if ($::verbose >= 2);
	if (-f $filename) { # it's a real file
	  print "> Found file: \"$filename\"\n" if ($::verbose);
	  $$filenms{$filename} = $filename;
          # If $filename matches any pattern, then delete it
	  foreach $pat (@$ign_patts) {
	    if ($filename =~ m#$pat$#) {
	      print "    ... ignored\n" if ($::verbose);
	      print "    ... matches pattern \"$pat\"\n" if ($::verbose>=2);
	      delete($$filenms{$filename});
	    }
	  }
	  # If there is only -, \w, _, \, /, : or .,
	  # it won't be a filename anymore
	  if ($filename =~ /^[-\w_\\\/\.:]*$/) { next BRACKET; }
          # (This optimalization may be omitted if you think it is unsafe.)
	}
      	$oldfn = $filename; # remember last
	$filename =~ s#^(.+).$#$1#; # cut the rightmost character
      } while($oldfn ne $filename); # while it changes
    } # while "(" or "<"
  }
  close TEXLOG;
} # parse_tex_log

sub get_format_name {
# Gets format name from log, parses first format=this_is_result in file
# If opt_fmtname was given, returns opt_fmtname
# Input:  Filename (with .log)
# Output: Format name (returns string) or "" if failed

  my ($filenm) = @_;
  my ($line);

  if ($::opt_fmtname ne "") {
    print "\nForced format \"$::opt_fmtname\". Make sure this is correct!\n";
    return $::opt_fmtname;
  }
  else {
    if (not open(TEXLOG, $filenm)) {
      print "\nCan't open logfile \"$filenm\" to get format name\n";
    }
    else {
      while ($line = <TEXLOG>) {
        chomp($line);
        if ($line =~ s#.*format=(\w*).*#$1#) { return $line; };
        # Success
      };
    };
  };
  return ""; # Failure
} # get_format_name

sub determine_storing_names {
# Reads keys from hash --- filenames to store.
# Checks for name conflicts and solves them.
# Input:  Ref. to hash, it's keys are filenames, values equal to keys
#         Ref. to list containing patterns to cut from paths
# Output: Hash, keys are keeping new names, values original ones

  my($filenms, $cuts) = @_;

  my (%storing_names); # New names => orig. (real) names
  my ($fn);

  foreach $fn (keys %$filenms) {
    my ($newfn) = $fn;
    print "Preparing name for file \"$fn\"\n" if ($::verbose);
    # Determine new storing name
    my ($pat);
    foreach $pat (@$cuts) {
      $pat =~ s#^(.*[^\\\/])[\\\/]?#$1#; # cut the rightmost / or \
      print " Checking pattern \"$pat\"\n" if ($::verbose >= 2);
      # cut filename if starts with $pat, only first pattern is used
      if ( $newfn =~ m#^$pat[\\\/](.*)# ) {
        $newfn = $1;
	print " Pattern matches.\n" if ($::verbose >= 2);
	last; # exit cycle
      };
    }; # foreach $pat
    # if new name doesn't conflict, add it
    if (not exists($storing_names{$newfn})) {
      print "Name \"$newfn\" will be used.\n" if ($::verbose);
      $storing_names{$newfn} = $fn;
    }
    else { # error, name conflicts
      print "\nError, name \"$newfn\" conflicts.\n";
      print "Cannot save files \"$fn\"\n";
      print "and \"$storing_names{$newfn}\"\n";
      print "with the same name \"$newfn\".\n";
      print "Please check setting of paths to cut. Shortenning of belonging\n";
      print "prefix should help. Consult the documentation when in trouble.\n";
      die "*** Name conflict detected, gave up ***\n";
    };
  };
  return %storing_names;
} # determine_storing_names

sub copy_files {
# Takes keys from hash and copies all files from it to working_dir/prefix
# Input:  Prefix, hash --- keys are filenames
# Output: None

  my($prefix, %filenms) = @_;
  my ($name, $prefname);

  foreach $name (keys %filenms) {
    $prefname = $name;
    # cut leading / or \
    $prefname =~ s#[\\\/]?(.*)#$1#;
    $prefname = $prefix.'/'.$prefname;
    # Prepare directories, umask may be switch in the future
    mkpath(dirname($prefname), $::verbose, 0700) unless ($::opt_dummy);
    copy($filenms{$name}, $prefname) unless ($::opt_dummy);
    print "Copying \"$filenms{$name}\"\n to \"$prefname\"\n" if ($::verbose);
  };
} #copy_files

sub main_pack {
# Main procedure for packing files
# Creates tree of source files
# Input:  Filename of TeX source, extension .log is tried
# Output: None
# Uses:   Variables $main::opt_...

  my ($source) = @_;
  my (%filenames); # Keys are filenames to store --- source
  my (@ignore_patterns); # Patterns to be ignored
  my (@path_cuts); # Paths to cut

  if ($::opt_ignore ne "") {
    print "\nBuilding ignore pattern list\n" if ($::verbose);
    &build_patterns($::opt_ignore, \@ignore_patterns);
  }
  else {
    @ignore_patterns = ();
  };

  if ($::opt_pathcuts ne "") {
    print "\nBuilding list of paths to cut\n" if ($::verbose);
    &build_patterns($::opt_pathcuts, \@path_cuts);
  }
  else {
    @path_cuts = ();
  };

  # If file $source doesn't exist try dtto.log
  if (not -f $source) {
    print "\nFile \"$source\" does not exist, trying \"${source}.log\"\n";
    $source = "${source}.log";
    if (not -f $source) { die "File \"$source\" does not exist\n"; }
  }

  %filenames=();
  # Get list of needed files from TeX log
  &parse_tex_log($source, \@ignore_patterns, \%filenames);
  %filenames = &determine_storing_names(\%filenames, \@path_cuts);
  &copy_files("$::opt_output",%filenames);

  # If format needed, get name
  if ($::opt_format ne "") {
    %filenames = (); # Delete all, it is done and copied
    my ($format_name) = &get_format_name($source);
    print "\nGot format name \"$format_name\"\n";
    # Delete last / or \
    $::opt_format =~ s/^(.*[^\\\/])[\\\/]?/$1/;
    &parse_tex_log($::opt_format."/".$format_name.'.log',
	  \@ignore_patterns, \%filenames);
    %filenames = &determine_storing_names(\%filenames, \@path_cuts);
    &copy_files("$::opt_output"."/FORMAT", %filenames);
  }; # if format wanted

  # If font sources needed, get them
  if ($::opt_dvips ne "") {
    %filenames = (); # Delete all
    print "\nGetting font source files from dvips log \"$::opt_dvips\"\n";
    &parse_tex_log($::opt_dvips, \@ignore_patterns, \%filenames);
    %filenames = &determine_storing_names(\%filenames, \@path_cuts);
    &copy_files("$::opt_output"."/FONTS", %filenames);
  }; # if dvips

} # main_pack

# -------------- Help and version info ---------
sub print_version {
# Prints version
# Input, output: none
  print "\nThis is TVS, TeX Versioning System\n";
  print 'Version 1.0 ($Id: tvs.pl,v 1.28 2000/08/16 16:56:41 antos Exp $)'.
    "\n";
  print "This is free software distributed under General Public License\n";
  print "but WITHOUT ANY WARRANTY\n";
} # print_version

sub print_usage {
# Prints usage info
# Input, output: none
  print "\nUsage:\n";
  print "tvs.pl -h | --help            print this help and exit\n\n";
  print "tvs.pl [options] filename[.log]\n\n";
  print "where options are:\n";
  print "  -v | --verbose              set verbosity level\n";
  print "  --dummy                     no real work\n";
  print "  -c | --config file          set configuration file\n";
  print "  -i | --ignore [file]        set ignore pattern file\n";
  print "  -p | --pathcuts [file]      set pathcuts patt. file\n";
  print "  -o | --output dir           output directory name\n";
  print "  -f | --format [dir]         pack format sources\n";
  print "  --fmtname [name]            forces format name\n";
  print "  -d | --dvips file           pack font sources -- experimental!\n";
  print "\n";
  print "Please consult the documentation. Note that packing source codes\n";
  print "is a delicate and sensitive operation.\n\n";
} # print_usage

# ------------------ Parsing config ------------
sub parse_config_file {
# Parses config file and sets options with omitted value on command line
# Input, output: none
# Changes: Values of %config hash (global in main)

  # Default name of config file
  if (not defined($::opt_config)) { $::opt_config = "tvsrc"; };

  if (not open(CONFIG, $::opt_config)) {
    warn "\nConfig file \"$::opt_config\" can't be opened.\n";
    warn "Config file ignored.\n";
  }
  else {
    # Parse config file
    my ($line, $name, $value);
    while ($line = <CONFIG>) {
      chomp($line);
      # Ignore anything after # or $
      ($line) = split(/[%#]/, $line); # Erase comments, they start with # or %
      next unless defined($line);
      # Delete white chars
      $line =~ s#\s*(.*\S)\s*#$1#;
      # Split on first space
      ($name, $value) = split(/\s/, $line, 2);
      next unless defined($name);
      next unless defined($value);
      # Set values
      if ($name ne "" and $value ne "") { $::config_file{$name} = $value; };
    }
  }
} # parse_config_file

# --------------- MAIN PROGRAM -----------------
# Handles parsing of command line arguments and calls actions.
# Input: see Getopt command

# Print header
&print_version();

# Set option variables
Getopt::Long::Configure("bundling","no_auto_abbrev");
# Allow write e.g. -vv, allow only full names of options

# These are global options. They are taken from commandline,
# if not set (or value omitted), config file is usually tried.
use vars qw($verbose $opt_help $opt_config $opt_dummy $opt_format);
use vars qw($opt_fmtname $opt_dvips $opt_ignore $opt_output $opt_pathcuts);

if (not GetOptions('v|verbose+',\$verbose,         # verbosity level 
                   'h|help',\$opt_help,            # print help and exit
		   'c|config=s',\$opt_config,      # config file name
		   'dummy',\$opt_dummy,            # do not really copy files

		   'f|format:s',\$opt_format,      # dir with iniTeX logs
		   'fmtname=s',\$opt_fmtname,      # format name (forced)
		   'd|dvips=s',\$opt_dvips,        # dvips log

		   'i|ignore:s',\$opt_ignore,      # file with names to ignore
		   'p|pathcuts:s',\$opt_pathcuts,  # file with paths to cut

		   'o|output=s',\$opt_output       # output directory name
                   )) { &print_usage(); die "*** Wrong options ***" };

use vars qw($filename); # File --- log of TeX or archive (if -x)
use vars qw(%config_file); # Content of config file --- global,
                           # used for setting $opt_... only
&parse_config_file();

# If $verbose undefined, try config file, else set it to 0
unless (defined($verbose)) {
  if (exists($config_file{"verbose"})) {
    $verbose = $config_file{"verbose"}; }
  else { $verbose = 0; };
};

# Print version info if -h or --version (and exit then)
if (defined($opt_help)) { &print_usage(); exit 0; };

# If dummy, warn
if (defined($opt_dummy)) {
  warn "\n*** WARNING: dummy mode selected ***\n";
  warn "Nothing will be really done!\n\n";
}
else { $opt_dummy = 0; };

# Check format option
if (defined($opt_format)){
  if ($opt_format eq "") {
    if (exists($config_file{"format"})) {
      $opt_format = $config_file{"format"}
    }
    else {
      &print_usage();
      die "*** Format option parameter is not specified ***\n";
    };
  };
}
else { $opt_format = ""; };

# Forced format name
if (not defined($opt_fmtname)) { $opt_fmtname = ""; };

# Dvips
if (not defined($opt_dvips)) { $opt_dvips = ""; };

# Ignore file
if (defined($opt_ignore)) {
  if ($opt_ignore eq "") {
    if (exists($config_file{"ignore"})) {
      $opt_ignore = $config_file{"ignore"};
    }
    else {
      &print_usage();
      die "*** Ignore parameter is not specified ***\n";
    };
  };
}
else { $opt_ignore = ""; };

# Pathcuts
if ((not defined($opt_pathcuts)) or ($opt_pathcuts eq "")) {
  if (exists($config_file{"pathcuts"})) {
    $opt_pathcuts = $config_file{"pathcuts"};
  }
  else { $opt_pathcuts = ""; };
}

# Output dir
unless (defined($opt_output)) {
  unless (exists($config_file{"output"})) { $opt_output = 'TVS-OUT'; }
  else { $opt_output = $config_file{"output"} }
};

# Set filename
unless (defined($ARGV[0])) {
  &print_usage;
  die "*** Please specify filename ***";
};
$filename = $ARGV[0];
# Allow TVS only to be run in source directory
my($working_directory) = cwd;
if ($working_directory ne abs_path(dirname($filename))) {
  &print_usage;
  die "*** Please run TVS in the source directory ***";
};

&main_pack($filename);

print "\nThank you for using TVS and supporting free software\n";

