#! /usr/bin/perl -W
#
# Script to sync a CVS repository from a SVN repository.
# 
# I once needed that when I started some new project using SVN and then
# needed to integrate that into a CVS environment.
#
# Adam Lackorzynski <adam@os.inf.tu-dresden.de>
#
#
#
# There's no config file, everything is configured below.
#
# Note, minimum svn 1.2 required due to changes of 'svn export'!
#
# General remark:
#  this script got a bit hacky...rewrite...

use strict;
use XML::Simple;


my $SVNname = 'trunk';
my $SVNrepo = "svn://svn.example.com/project/$SVNname";
my $CVSname = 'cvsname';
my $CVSrepo = '/cvsroot';
my $WCdir   = '/permanent/cvs/working/copy/of/project';

# these dirs/files will not be commited to CVS (i.e. always deleted)
# these are _NO_ regexps!
my @filter_out = ( 
     #'trunk/this/should/not/go/into/cvs',
  );

my $CVSrepo_statusfile = "$CVSrepo/$CVSname/.svn_rev";

umask 0007;

# get latest revision
my $out = `svn log $SVNrepo -rHEAD`;
if ($? >> 8) {
  print "Error running svn:\n$out";
  exit 1;
}
if ($out !~ /^r(\d+)\s/m) {
  print "Unknown output of svn:\n$out";
  exit 1;
}

my $latest_svn_rev = $1; # head

# start_svn_rev only valid for latest_cvs_rev == 0
#  (e.g. when .svn_rev does not exist)
my $start_svn_rev  = 388;
#$start_svn_rev  = 0;
my $latest_cvs_rev = 0;

print "Latest revision of $SVNrepo: $latest_svn_rev\n";

if (open(A, $CVSrepo_statusfile)) {
  my $s = <A>;
  $latest_cvs_rev = $1 if $s =~ /^(\d+)/;
  close A;
}

print "Latest revision in $CVSrepo/$CVSname: $latest_cvs_rev\n";

sub prepare_cvs_repo() {
  # create cvs repo
  if (! -d "$CVSrepo/CVSROOT") {
    system("cvs -d $CVSrepo init");
    die "system" if $? >> 8;
  }
  # create empty module
  if (! -d "$CVSrepo/$CVSname") {
    mkdir "$CVSrepo/$CVSname" || die "Cannot mkdir $CVSrepo/$CVSname";
  }
}

sub prepare_wc_cvs() {
  if (! -d "$WCdir/$CVSname/CVS" &&
      ! -d "$WCdir/$SVNname/CVS") {
    system("mkdir -p -- $WCdir");
    die "mkdir" if $? >> 8;
    system("cd $WCdir && cvs -d $CVSrepo co $CVSname");
    die "cvs co" if $? >> 8;
    # now $CVSname contains CVS/Repository with the right module name
    # we can now rename the dir itself to the svn one
    rename "$WCdir/$CVSname", "$WCdir/$SVNname" || die "rename";
  }
}

sub mkdir_p($$) {
  my ($start, $path) = @_;

  while ($path =~ s,^([^/]+)/,,) {
    mkdir "$start/$1";
    $start .= "/$1";
  }
}

sub do_sleep() {
  my $t = 2;
  print "Sleeping $t secs.\n";
  sleep($t);
}

prepare_cvs_repo();
prepare_wc_cvs();

sub add_rev($$) {
  my ($from_rev, $to_rev) = @_;

  my $initial_cvs_fill = $from_rev == 0;

  print "XXXXXXXXXXXXXXXXXXXXXXXX- r$from_rev:$to_rev -XXXXXXXXXXXXXXXXXX\n";
  print "Upping $WCdir from r$from_rev to r$to_rev\n";

  # when directories are deleted we have to get rid of the CVS directories
  # in those dirs first, otherwise svn will refuse to kill it
  print " -> $WCdir/$SVNname\n";

  my %add_entry; # all added object (files and dirs)
  my %del_entry; # all delete objects (files and dirs)
  my %mod_entry; # only contains files/dirs not added nor deleted
  my $commit_msgs = '';
  my %commit_files;

  if ($latest_cvs_rev) {
    my $log_cmd = "svn log --xml -v -r ".($from_rev + 1).":$to_rev $SVNrepo 2>&1 ";
    print "log_cmd: $log_cmd\n";
    my $log_outp = `$log_cmd`;
    if ($? >> 8) {
      print "Hmm, error(", $? >> 8, "): $log_outp\n";
      print "length: ", length($log_outp), "\n";
      return 1;
    }

    #print "Log for $from_rev:$to_rev:\n$log_outp\n";

    my $xml = XMLin($log_outp, ForceArray => [ 'logentry', 'path' ]);

    if (!$xml->{logentry}) {
      print "No logentry for $from_rev:$to_rev.\n";
      return 0; # i.e. nothing changed under $SVNrepo/$SVNname
    }

    my $fmt = "\n" . "-" x 79 . "\nr%5s| %8s | %s\n\n%s\n";
    # loop over each logentry and gather everything
    # we compress multiple svn commits to one cvs commit
    foreach my $e (@{$xml->{logentry}}) {
      print "Logentry - rev: ".$e->{revision}."\n";

      printf($fmt, $e->{'revision'}, $e->{'author'}, $e->{'date'}, $e->{'msg'});

      $commit_msgs .= "\n".$e->{msg};
      #$commit_msgs .= "r".$e->{revision}.", ";

      foreach my $p (@{$e->{'paths'}->{'path'}}) {
	my ($action,$path) = ($p->{'action'},$p->{'content'});
	next if $path !~ /^\/$SVNname\//;
	next if $path =~ /^\/$SVNname$/;
	print ">$action $path\n";
	$path =~ s/^\/$SVNname\///;
	if ($action =~ /D/) {
	  delete $add_entry{$path};
	  delete $mod_entry{$path};
	  foreach my $a (keys %add_entry) {
	    delete $add_entry{$a} if $a =~ /^$path\//;
	  }
	  foreach my $m (keys %mod_entry) {
	    delete $mod_entry{$m} if $m =~ /^$path\//;
	  }

	  $del_entry{$path} = 1;
	} elsif ($action =~ /A/) {
	  delete $del_entry{$path};
	  delete $mod_entry{$path};
	  $add_entry{$path} = 1;
	} else {
	  $mod_entry{$path} = 1 unless defined $add_entry{$path};
	}
      }
    }
    $commit_msgs =~ s/'/'\\''/g;    # quote "

    {
      my $lp;
      my @export_entries;
      foreach my $i (sort ((keys %add_entry), (keys %mod_entry))) {
	#print "=== $i\n";
	if (!defined $lp || $i !~ /^$lp\//) {
	    $lp = $i;
	    push @export_entries, $lp;
            #print "EXP $lp\n";
	}
      }
      for my $ee (@export_entries) {
	# export only works for directories, so we can just
	# use $WCdir/$ee
	mkdir_p("$WCdir/$SVNname", $ee);
	my $svn_export_cmd = "svn export --force -r $to_rev $SVNrepo/$ee $WCdir/$SVNname/$ee";
	print "running: $svn_export_cmd\n";
	system($svn_export_cmd);
	if ($? >> 8) {
	  print "Export failed.\n";
	  return 1;
	}
	$commit_files{$ee} = 1;

        #my $svn_cat_cmd = "svn cat -r $to_rev $SVNrepo/$ee > $WCdir/$SVNname/$ee";
        #print "Running: $svn_cat_cmd\n";
        #system($svn_cat_cmd);
        #if ($? >> 8) {
        #  print "Failed to run svn cat.\n";
        #  return 1;
        #}
      }
    }
  } else {
    # initial CVS checkin, just do an ordinary export
    my $svn_export_cmd = "svn export --force -r $to_rev $SVNrepo $WCdir/$SVNname";
    print "running: $svn_export_cmd\n";
    system($svn_export_cmd);
    if ($? >> 8) {
      print "Failed to run svn export.\n";
      return 1;
    }

    # add all files in the root directory, as the CVS dir is already there
    foreach my $ff (<$WCdir/$SVNname/*>) {
      if (-f $ff) {
	$ff =~ s/^$WCdir\/$SVNname\///;
        #print "file in dir: $ff\n";
	$add_entry{$ff} = 1;
	$commit_files{$ff} = 1;
      }
    }

    $commit_msgs = 'Initial CVS commit';
  }

  # process @filter_out
  # delete all files/dirs and remove all traces, do this here in one place
  foreach my $filt (@filter_out) {
    if (-e "$WCdir/$filt") {
      system("rm -rf $WCdir/$filt");
      sub my_clean_hash($$) {
        my ($hash, $f) = @_;
	foreach my $i (keys %{$hash}) {
	  delete $$hash{$i} if $i =~ /^\/$f$/ || $i =~ /^\/$f\//;
	  if ($i =~ /^\/$f$/ || $i =~ /^\/$f\//) {
	    print "Filter-out: $i\n";
	  }
	}
      }
      my_clean_hash(\%commit_files, $filt);
      my_clean_hash(\%add_entry, $filt);
      my_clean_hash(\%mod_entry, $filt);
      my_clean_hash(\%del_entry, $filt);
    }
  }
  return 0 if scalar keys %commit_files == 0;

  # find all directories that were added with these exports
  open(F, "cd $WCdir/$SVNname && find . -type d -a '!' -name CVS |")
    || die "Cannot start 'find'";
  while (<F>) {
    chomp;
    if (! -d "$WCdir/$SVNname/$_/CVS") {
      # the toplevel directory always exists, so it's saved to strip the
      # "./" from the beginning
      s/^\.\///;
      $add_entry{$_} = 1;
      $commit_files{$_} = 1;
      #print "DIR TO ADD: $_\n";
      foreach my $ff (<$WCdir/$SVNname/$_/*>) {
	if (-f $ff) {
	  $ff =~ s/^$WCdir\/$SVNname\///;
	  #print "file in dir: $ff\n";
	  $add_entry{$ff} = 1;
	  $commit_files{$ff} = 1;
	}
      }
    }

  }
  close F;

  if (keys %add_entry) {
    # sorting ensures that directories are added before files in them
    my @a;
    foreach (sort keys %add_entry) {
      s/^\/$SVNname\///;
      if (!/^$/) {
        push @a, $_;
        $commit_files{$_} = 1;
      }
    }
    print "Adding ".(scalar @a)." objects.\n";
    while (@a) {
      my $o = '';
      while (@a && length($o) < 8000) {
	$o .= shift(@a).' ';
      }
      print("cd $WCdir/$SVNname && cvs add -ko $o\n");
      do_sleep();
      system("cd $WCdir/$SVNname && cvs add -ko $o");
    }
  }

  # delete gone entries
  my %del_dirs;
  {
    my %del_files; # hashes to weed out duplicates easily
    print "del entries: ".(keys %del_entry)."\n";
    foreach my $e (keys %del_entry) {
      if (-d "$WCdir/$SVNname/$e") {
	print "To del dir: $SVNname/$e\n";
	$del_dirs{$e} = 1;
      } elsif (-e "$WCdir/$SVNname/$e") {
	print "To del file: $SVNname/$e\n";
	$del_files{$e} = 1;
	$commit_files{$e} = 1;

        # files can be delete immediately
        unlink "$WCdir/$SVNname/$e" || die "Cannot delete '$WCdir/$SVNname/$e'";
      }
    }

    # if directories with files are deleted, the files may not be listed in
    # the log, find them manually
    if (%del_dirs) {
      for my $dir (keys %del_dirs) {
	open(D, "cd $WCdir/$SVNname && find $dir -type f -a '!' -name CVS |")
	  || die "Cannot start 'find'!";
	while (<D>) {
	  chomp;
	  if (! /\/CVS\/[^\/]+$/) {
	    print "Deleting $_\n";
	    unlink "$WCdir/$SVNname/$_" || die "Cannot delete $_";
	    $del_files{$_} = 1;
	    $commit_files{$_} = 1;
	  }
	}
	close D;
      }
    }

    if (%del_files) {
      print "Deleting ".(scalar keys %del_files)." files.\n";
      my @df = keys %del_files;
      while (@df) {
	my $files = '';
	while (@df && length($files) < 8000) {
	  $files .= shift(@df).' ';
	}
        print("cd $WCdir/$SVNname && cvs rm $files\n");
	do_sleep();
        system("cd $WCdir/$SVNname && cvs rm $files");
        if ($? >> 8) {
	  printf("Error removing files.\n");
        }
      }
    }
  }
  

  print "Doing CVS commits\n";
  # CVS does not support atomic commits, so we can do multiple commits;
  # instead of doing the commit in $WCdir/$SVNname we're listing all
  # files themselves, so that commits are faster for small commits in large
  # repos
  print "CI: ", join(" ", keys %commit_files), "\n";
  if (%commit_files) {
    print "Commiting ".(scalar keys %commit_files)." files.\n";
    my @cf = sort(keys %commit_files);
    while (@cf) {
      my $files = '';
      while (@cf && length($files) < 8000) {
	my $f = shift @cf;
	$files .= "$f " unless -d "$WCdir/$SVNname/$f";
      }
      print("cd $WCdir/$SVNname && cvs ci -m '$commit_msgs' $files\n");
      do_sleep();
      system("cd $WCdir/$SVNname && cvs ci -m '$commit_msgs' $files");
      if ($? >> 8) {
	printf("Error commiting.\n");
	return 1;
      }
    }
  }

  # CVS does not support deletion of directories, just remove them
  # from the working copy
  if (%del_dirs) {
    my $cmd = "rm -rf ".join(' ', map( { "$WCdir/$SVNname/$_" } keys %del_dirs));
    print "Deleting: $cmd\n";
    system($cmd);
  }

  if (open(A, ">$CVSrepo_statusfile")) {
    print A "$to_rev\n";
    close A;
  } else {
    die "Cannot create $CVSrepo_statusfile\n";
  }

  return 0;
}

# now move every diff from revision $latest_cvs_rev to $latest_svn_rev 
# from svn to cvs

if (!$latest_cvs_rev && $start_svn_rev) {
  add_rev(0, $start_svn_rev);
  $latest_cvs_rev = $start_svn_rev;
}

for my $i (($latest_cvs_rev + 1) .. $latest_svn_rev) {
  if (add_rev($latest_cvs_rev, $i)) {
    print "Failed to update to r$i\n";
    last;
  }
  $latest_cvs_rev = $i;
}

# the CVS repo is a read-only repo, make sure we reminding others of that
# XXX takes too long...
print "Chowning repo (can take looooong)...\n";
system("chmod -R g-w $CVSrepo/$CVSname");
warn "Could not change permissions in $CVSrepo/$CVSname" if $? >> 8;
