1#!/usr/bin/perl
   2# Copyright 2005, Ryan Anderson <ryan@michonline.com>
   3# Distribution permitted under the GPL v2, as distributed
   4# by the Free Software Foundation.
   5# Later versions of the GPL at the discretion of Linus Torvalds
   6#
   7# Scan two git object-trees, and hardlink any common objects between them.
   8use 5.008;
  10use strict;
  11use warnings;
  12use Getopt::Long;
  13sub get_canonical_form($);
  15sub do_scan_directory($$$);
  16sub compare_two_files($$);
  17sub usage();
  18sub link_two_files($$);
  19# stats
  21my $total_linked = 0;
  22my $total_already = 0;
  23my ($linked,$already);
  24my $fail_on_different_sizes = 0;
  26my $help = 0;
  27GetOptions("safe" => \$fail_on_different_sizes,
  28           "help" => \$help);
  29usage() if $help;
  31my (@dirs) = @ARGV;
  33usage() if (!defined $dirs[0] || !defined $dirs[1]);
  35$_ = get_canonical_form($_) foreach (@dirs);
  37my $master_dir = pop @dirs;
  39opendir(D,$master_dir . "objects/")
  41        or die "Failed to open $master_dir/objects/ : $!";
  42my @hashdirs = grep { ($_ eq 'pack') || /^[0-9a-f]{2}$/ } readdir(D);
  44foreach my $repo (@dirs) {
  46        $linked = 0;
  47        $already = 0;
  48        printf("Searching '%s' and '%s' for common objects and hardlinking them...\n",
  49                $master_dir,$repo);
  50        foreach my $hashdir (@hashdirs) {
  52                do_scan_directory($master_dir, $hashdir, $repo);
  53        }
  54        printf("Linked %d files, %d were already linked.\n",$linked, $already);
  56        $total_linked += $linked;
  58        $total_already += $already;
  59}
  60printf("Totals: Linked %d files, %d were already linked.\n",
  62        $total_linked, $total_already);
  63sub do_scan_directory($$$) {
  66        my ($srcdir, $subdir, $dstdir) = @_;
  67        my $sfulldir = sprintf("%sobjects/%s/",$srcdir,$subdir);
  69        my $dfulldir = sprintf("%sobjects/%s/",$dstdir,$subdir);
  70        opendir(S,$sfulldir)
  72                or die "Failed to opendir $sfulldir: $!";
  73        foreach my $file (grep(!/\.{1,2}$/, readdir(S))) {
  75                my $sfilename = $sfulldir . $file;
  76                my $dfilename = $dfulldir . $file;
  77                compare_two_files($sfilename,$dfilename);
  79        }
  81        closedir(S);
  82}
  83sub compare_two_files($$) {
  85        my ($sfilename, $dfilename) = @_;
  86        # Perl's stat returns relevant information as follows:
  88        # 0 = dev number
  89        # 1 = inode number
  90        # 7 = size
  91        my @sstatinfo = stat($sfilename);
  92        my @dstatinfo = stat($dfilename);
  93        if (@sstatinfo == 0 && @dstatinfo == 0) {
  95                die sprintf("Stat of both %s and %s failed: %s\n",$sfilename, $dfilename, $!);
  96        } elsif (@dstatinfo == 0) {
  98                return;
  99        }
 100        if ( ($sstatinfo[0] == $dstatinfo[0]) &&
 102             ($sstatinfo[1] != $dstatinfo[1])) {
 103                if ($sstatinfo[7] == $dstatinfo[7]) {
 104                        link_two_files($sfilename, $dfilename);
 105                } else {
 107                        my $err = sprintf("ERROR: File sizes are not the same, cannot relink %s to %s.\n",
 108                                $sfilename, $dfilename);
 109                        if ($fail_on_different_sizes) {
 110                                die $err;
 111                        } else {
 112                                warn $err;
 113                        }
 114                }
 115        } elsif ( ($sstatinfo[0] == $dstatinfo[0]) &&
 117             ($sstatinfo[1] == $dstatinfo[1])) {
 118                $already++;
 119        }
 120}
 121sub get_canonical_form($) {
 123        my $dir = shift;
 124        my $original = $dir;
 125        die "$dir is not a directory." unless -d $dir;
 127        $dir .= "/" unless $dir =~ m#/$#;
 129        $dir .= ".git/" unless $dir =~ m#\.git/$#;
 130        die "$original does not have a .git/ subdirectory.\n" unless -d $dir;
 132        return $dir;
 134}
 135sub link_two_files($$) {
 137        my ($sfilename, $dfilename) = @_;
 138        my $tmpdname = sprintf("%s.old",$dfilename);
 139        rename($dfilename,$tmpdname)
 140                or die sprintf("Failure renaming %s to %s: %s",
 141                        $dfilename, $tmpdname, $!);
 142        if (! link($sfilename,$dfilename)) {
 144                my $failtxt = "";
 145                unless (rename($tmpdname,$dfilename)) {
 146                        $failtxt = sprintf(
 147                                "Git Repository containing %s is probably corrupted, " .
 148                                "please copy '%s' to '%s' to fix.\n",
 149                                $tmpdname, $dfilename);
 150                }
 151                die sprintf("Failed to link %s to %s: %s\n%s" .
 153                        $sfilename, $dfilename,
 154                        $!, $dfilename, $failtxt);
 155        }
 156        unlink($tmpdname)
 158                or die sprintf("Unlink of %s failed: %s\n",
 159                        $dfilename, $!);
 160        $linked++;
 162}
 163sub usage() {
 166        print("usage: git relink [--safe] <dir>... <master_dir> \n");
 167        print("All directories should contain a .git/objects/ subdirectory.\n");
 168        print("Options\n");
 169        print("\t--safe\t" .
 170                "Stops if two objects with the same hash exist but " .
 171                "have different sizes.  Default is to warn and continue.\n");
 172        exit(1);
 173}