#! /usr/bin/perl -w
# -*- perl -*-

# Author: Michael Hohmuth <hohmuth@inf.tu-dresden.de>
#
# This program is in the public domain.  No warranty, expressed or
# implied, is made by the Michael Hohmuth or TU Dresden as to the
# accuracy and functioning of the programs and related program material,
# nor shall the fact of distribution constitute any such warranty, and
# no responsibility is assumed by Michael Hohmuth or TU Dresden in
# connection therewith.

# This script repairs a PVS proof (.prf) file that is the result of a
# failed CVS merge (i.e., contains conflicts marked with
# "<<<<<<< ======= >>>>>>>").
#
# Run this script with no arguments to get a short help screen.
#
# The output of this script is a .prf file that contains everything of
# the CVS version, plus everything the conflicting private version added
# to the previous CVS version.  If the script detects a proof that
# changed incompatibly in the private version, it keeps both versions of
# the proof; the private version's proof is renamed to
# "<original_proof_name>-ALT-<number>".

no warnings 'recursion';

# Static configuration

$debug = 0;
$debug_parser = 0;

# Option parsing and dynamic configuration

use Getopt::Std;

$opt_d = 0;			# Debugging messages?
$opt_D = 0;			# Show tokenization during parsing?
$opt_i = 0;			# In-place file editing? (if input != stdin)
$opt_b = 0;			# Create backup file
$opt_h = 0;			# Get help?
$opt_s = 0;			# Prevent informative messages?
$opt_v = 0;			# Print even more informative messages?

getopts('dDihbsv');

usage() if $opt_h;
die "Cannot be both silent and verbose;" if $opt_v and $opt_s;

$debug = 1 if $opt_d;
$debug_parser = 1 if $opt_D;

# Check command line

usage() if scalar @ARGV < 1;

if ($opt_i) {			# Multifile in-place editing mode

  $backupfile = undef;

  while (scalar @ARGV) {
    $infile = $outfile = shift @ARGV;
    die "Cannot read from stdin in in-place editing mode;" if $infile eq "-";

    $backupfile = $outfile . "~" if $opt_b;

    process_file ($infile, $outfile, $backupfile);
  }

} else {			# Not using in-place editing -- process 1 file

  usage() if scalar @ARGV > 2;

  $infile = shift @ARGV;
  $infile = undef if $infile eq "-";

  $outfile = undef;
  if (scalar @ARGV) {
    $outfile = shift @ARGV;
    $outfile = undef if $outfile eq "-";
  }

  if ($opt_b)
    {
      die "No output filename specified -- cannot create backup file;"
	if !defined ($outfile);

      $backupfile = $outfile . "~";
    }

  process_file ($infile, $outfile, $backupfile);
}

exit 0;

######################################################################
#
# Debugging

sub debug_print($) {
  $debug && print STDERR $_[0] . "\n";
}

######################################################################
#
# Help screen

sub usage {
  print STDERR 
'usage: prfmerge [options] {in.prf | -} [out.prf | -]
       prfmerge -i [options] in1.prf [ in2.prf ...]
Options:  -i  In-place editing of one or multiple files; output goes input file
          -b  Back up out.prf as out.prf~ before writing, if out.prf exists
          -h  Print this help message
          -s  Silent: prevent informative messages
          -v  Verbose: print more informative messages (on stderr)
          -d  Print debug messages (on stderr)
          -D  Print parser-debugging messages (on stderr)
Reads from stdin if "-" is specified as input.
If no out.prf has been specified, output goes to stdout.
File names in.prf and out.prf can be identical.
';

  exit 0 if $opt_h;
  exit 1;
}

######################################################################
#
# Tokenizing and parsing lists
#

@read_text=();
@read_token=();

sub tokenize($) {
  my ($in) = @_;

  # We split using our tokens as the split pattern.  The remaining
  # fields should all be whitespace.
  my @infield = split /( "[^\"]*" | \( | \) | [^\)\s]+ )/xs, $in;

  while (scalar @infield > 1) {
    my $ws = shift @infield;
    my $token = shift @infield;

    die "parse error: '$ws' should be whitespace;" if $ws !~ /^\s*$/s;

    push @read_token, $token;
    push @read_text, $ws . $token;
  }
}

sub get_token() {
  return undef if @read_token == 0;

  my $t = [shift @read_token, shift @read_text];

  $debug_parser && print token($t) . "\n";

  return $t;
}

sub token($) {
  return $_[0]->[0];
}

sub text($) {
  return $_[0]->[1];
}

sub islist($) {			# Is this a sublist?
  return defined ($_[0]->[2]);	# XXX Ugly test!
}

sub istoken($) {
  return ! islist($_[0]);
}

sub unget_token($) {
  my $t = $_[0];
  unshift @read_token, token($t);
  unshift @read_text,  text ($t);
}

# (Recursively) read and return a list from the token stream produced
# by tokenize()

sub read_list();
sub read_list() {
  my @list;

  my $t = get_token();

  #print token($t) . "\n";

  return undef if ! defined($t);
  die "list does not start with '(';" if token($t) ne "(";

  push @list, $t;

  while (defined($t = get_token())) {

    #print token($t) . "\n";

    if (token($t) eq "(") {
      unget_token($t);
      $t = read_list();
    }

    push @list, $t;

    if (token($t) eq ")") {

      #print "returning\n";

      return \@list;
    }
  }

  die "unfinished list;";
}

# Read a sequence of lists (not a list itself) and return it as an
# array.
sub read_all() {
  my @list;

  while (my $l = read_list())
    {
      push @list, $l;
    }

  return @list;
}

######################################################################
#
# Comparing lists
#

sub list_equal($$);
sub list_equal($$) {
  my ($l1, $l2) = @_;

  if (istoken($l1) and istoken($l2)) {
    return token($l1) eq token($l2);
  }

  if (islist($l1) and islist($l2)) {
    my $len = scalar @$l1;

    return 0 if $len != scalar @$l2;

    for (my $i = 0; $i < $len - 1; $i++)
      {
	return 0 if ! list_equal($l1->[$i], $l2->[$i]);
      }

    return 1;
  }

  return 0;
}

######################################################################
#
# Printing lists
#

sub print_list($);
sub print_list($) {
  my ($listref) = @_;

  die "printing invalid list;" if !defined($listref);

  foreach $i (@$listref) {
    if (islist($i)) {		# Is this a sublist? 
      print_list ($i);
    } else {
      print text($i);
    }
  }
}

######################################################################
#
# Build theory and lemma index

sub build_index(@) {
  my @theories = @_;

  my %theory_index;

  foreach my $tl (@theories) {
    my $theory = token($tl->[1]);

    debug_print "found theory $theory";

    my %lemma_index;

    foreach my $l (@$tl[2 .. (scalar @$tl - 2)]) {
      my $lemma = token($l->[1]);

      debug_print "found lemma $lemma";

      my %proof_index;

      if (islist ($l->[3])) {
	foreach my $p (@$l[3 .. (scalar @$l - 2)]) {
	  my $proof = token($p->[1]);

	  debug_print "found proof $proof";

	  $proof_index{$proof} = $p;
	}
      }

      $lemma_index{$lemma} = { "def" => $l,  "proofs" => \%proof_index};
    }

    $theory_index{$theory} = { "def" => $tl, "lemmas" => \%lemma_index};
  }

  return \%theory_index;
}

######################################################################
#
# Process one file 

sub process_file {
  my ($infile, $outfile, $backupfile) = @_;

  # Read input (a PVS .prf file with CVS conflicts)
  {
    local @ARGV = ();
    push @ARGV, $infile  if defined $infile;

    local $/ = undef;
    $input = <>;
  }

  my $shortcut = 0;

  # Shortcut: Check if the input does not contain conflicts.
  if ($input !~ /\n=======\n/) {
    if (defined $infile &&
	defined $outfile &&
	$infile eq $outfile)
      {
	print STDERR "Warning: File $infile contains no conflicts -- skipping\n"
	  if $opt_v;
	return;
      }

    print STDERR "Warning: " . (defined $infile ? "File $infile" : "Input") .
      " contains no conflicts -- copying\n"
	if $opt_v;

    $shortcut = 1;

  } else {
    print STDERR "Processing $infile\n" if defined $infile and !$opt_s;

    process_input();
  }

  # Print output

  if (defined $backupfile && -f $outfile)
    {
      print STDERR "Backing up $outfile to $backupfile\n" if $opt_v;

      my $ok = rename "$outfile", "$backupfile";
      die "Cannot rename $outfile to $backupfile;" if !$ok;
    }

  if (defined $outfile)
    {
      open OUT, ">$outfile";
      *STDOUT = *OUT;
    }

  if (! $shortcut) {		# Did we process the input?
    print_list (\@new);		# Yes -- print output
    print "\n\n";
  } else {			# No -- print original input
    print $input;
  }
}

sub process_input {
  # Split into old and new version
  # Parse input into @old and @new arrays

  $oldver = $input;
  $oldver =~ s/<<<<<<< [^\n]* \n
               ( (?:.(?!=======))* \n )
               (?:.(?!>>>>>>>))* \n [^\n]+\n
              /$1/sgx;

  tokenize($oldver);
  @old = read_all();

  $newver = $input;
  $newver =~ s/<<<<<<< [^\n]* \n
               (?:.(?!=======))* \n [^\n]+\n
               ( (?:.(?!>>>>>>>))* \n ) [^\n]+\n
              /$1/sgx;

  tokenize($newver);
  @new = read_all();

  # Build theory index

  debug_print "Analyzing private version";
  $old_index = build_index(@old);

  debug_print "Analyzing CVS version";
  $new_index = build_index(@new);

  # Merging: Iterate through old version, finding content conflicting with
  # or nonexistent in the new version.  Merge into new version.
  # We always add new information at the end.  In consequence, newly added
  # proof scripts will never be made the default.

  foreach my $theory (keys %$old_index) {
    #print "found theory $theory\n";

    if (! defined $new_index->{$theory}) 
      {				# Nonexisting theory -- add
	push @new, $old_index->{$theory}->{def};
      }
    else
      {				# Theory exists -- dive deeper and merge
	my $old_lemmaindex = $old_index->{$theory}->{lemmas};
	my $new_lemmaindex = $new_index->{$theory}->{lemmas};
	
	foreach my $lemma (keys %$old_lemmaindex) {
	  #print "found lemma $lemma\n";

	  if (! defined $new_lemmaindex->{$lemma})
	    {			# Nonexisting lemma -- add
	      # Save the closing ")" before adding the new lemma.
	      my $new_lemmalist = $new_index->{$theory}->{def};
	      my $save = pop @$new_lemmalist;
	      push @$new_lemmalist, $old_lemmaindex->{$lemma}->{def};
	      push @$new_lemmalist, $save;
	    }
	  else
	    {			# Lemma exists -- dive deeper and merge
	      my $old_proofindex = $old_lemmaindex->{$lemma}->{proofs};
	      my $new_proofindex = $new_lemmaindex->{$lemma}->{proofs};

	      foreach my $proof (keys %$old_proofindex) {
		#print "found proof $proof\n";
		
		my $pname = $proof;
		my $pdef = $old_proofindex->{$proof};
		
		if (defined $new_proofindex->{$proof})
		  {		# Proof exists -- check for equivalence
		    debug_print "Proof $proof exists in both versions";

		    if (list_equal($new_proofindex->{$proof},
				   $old_proofindex->{$proof})) 
		      {
			debug_print "equal definition";
			next;
		      }

		    # The definition differs.  Check if the proof script
		    # is identical
		    my ($o_lbrace, $o_id, $o_desc, $o_created, $o_run,
			$o_script, $o_status, $o_deps, $o_rtime,
			$o_cputime, $o_interactive, $o_decproc,
			$o_rbrace) = @$pdef;

		    my ($n_lbrace, $n_id, $n_desc, $n_created, $n_run,
			$n_script, $n_status, $n_deps, $n_rtime,
			$n_cputime, $n_interactive, $n_decproc,
			$n_rbrace) = @{$new_proofindex->{$proof}};

		    if (list_equal($n_script, $o_script)) {
		      debug_print "equal proof script -- keeping CVS version";
		      next;	# XXX keep the newer version; better strategy?
		    }

		    # The definition differs.  Add the proof under a
		    # different name
		    my $ver = 1;
		    while (defined ($new_proofindex->{$proof . "-ALT-" . $ver}))
		      {
			$ver++;
		      }

		    $pname = $proof . "-ALT-" . $ver;

		    debug_print "adding private version as $pname";

		    my $desc = '"Alternative proof - kept by prfmerge"';

		    $pdef = [$o_lbrace, 
			     [$pname, $pname], [$desc, " $desc\n  "],
			     $o_created, $o_run,
			     $o_script, $o_status, $o_deps, $o_rtime,
			     $o_cputime, $o_interactive, $o_decproc, 
			     $o_rbrace];
		  }
		else
		  {
		    debug_print "Proof $proof exists only in private version";
		  }

		#print "adding as $pname\n";

		my $new_prooflist = $new_lemmaindex->{$lemma}->{def};
		my $save = pop @$new_prooflist;
		push @$new_prooflist, $pdef;
		push @$new_prooflist, $save;
	      }
	    }
	}
      }
  }
}