1#!/usr/bin/env perl
2# Copyright 2005, Ryan Anderson <ryan@michonline.com>
3# Distribution permitted under the GPL v2, as distributed
4# by the Free Software Foundation.
5# Later versions of the GPL at the discretion of Linus Torvalds
6#
7# Scan two git object-trees, and hardlink any common objects between them.
8
9use 5.006;
10use strict;
11use warnings;
12use Getopt::Long;
13
14sub get_canonical_form($);
15sub do_scan_directory($$$);
16sub compare_two_files($$);
17sub usage();
18sub link_two_files($$);
19
20# stats
21my $total_linked = 0;
22my $total_already = 0;
23my ($linked,$already);
24
25my $fail_on_different_sizes = 0;
26my $help = 0;
27GetOptions("safe" => \$fail_on_different_sizes,
28 "help" => \$help);
29
30usage() if $help;
31
32my (@dirs) = @ARGV;
33
34usage() if (!defined $dirs[0] || !defined $dirs[1]);
35
36$_ = get_canonical_form($_) foreach (@dirs);
37
38my $master_dir = pop @dirs;
39
40opendir(D,$master_dir . "objects/")
41 or die "Failed to open $master_dir/objects/ : $!";
42
43my @hashdirs = grep { ($_ eq 'pack') || /^[0-9a-f]{2}$/ } readdir(D);
44
45foreach my $repo (@dirs) {
46 $linked = 0;
47 $already = 0;
48 printf("Searching '%s' and '%s' for common objects and hardlinking them...\n",
49 $master_dir,$repo);
50
51 foreach my $hashdir (@hashdirs) {
52 do_scan_directory($master_dir, $hashdir, $repo);
53 }
54
55 printf("Linked %d files, %d were already linked.\n",$linked, $already);
56
57 $total_linked += $linked;
58 $total_already += $already;
59}
60
61printf("Totals: Linked %d files, %d were already linked.\n",
62 $total_linked, $total_already);
63
64
65sub do_scan_directory($$$) {
66 my ($srcdir, $subdir, $dstdir) = @_;
67
68 my $sfulldir = sprintf("%sobjects/%s/",$srcdir,$subdir);
69 my $dfulldir = sprintf("%sobjects/%s/",$dstdir,$subdir);
70
71 opendir(S,$sfulldir)
72 or die "Failed to opendir $sfulldir: $!";
73
74 foreach my $file (grep(!/\.{1,2}$/, readdir(S))) {
75 my $sfilename = $sfulldir . $file;
76 my $dfilename = $dfulldir . $file;
77
78 compare_two_files($sfilename,$dfilename);
79
80 }
81 closedir(S);
82}
83
84sub compare_two_files($$) {
85 my ($sfilename, $dfilename) = @_;
86
87 # Perl's stat returns relevant information as follows:
88 # 0 = dev number
89 # 1 = inode number
90 # 7 = size
91 my @sstatinfo = stat($sfilename);
92 my @dstatinfo = stat($dfilename);
93
94 if (@sstatinfo == 0 && @dstatinfo == 0) {
95 die sprintf("Stat of both %s and %s failed: %s\n",$sfilename, $dfilename, $!);
96
97 } elsif (@dstatinfo == 0) {
98 return;
99 }
100
101 if ( ($sstatinfo[0] == $dstatinfo[0]) &&
102 ($sstatinfo[1] != $dstatinfo[1])) {
103 if ($sstatinfo[7] == $dstatinfo[7]) {
104 link_two_files($sfilename, $dfilename);
105
106 } else {
107 my $err = sprintf("ERROR: File sizes are not the same, cannot relink %s to %s.\n",
108 $sfilename, $dfilename);
109 if ($fail_on_different_sizes) {
110 die $err;
111 } else {
112 warn $err;
113 }
114 }
115
116 } elsif ( ($sstatinfo[0] == $dstatinfo[0]) &&
117 ($sstatinfo[1] == $dstatinfo[1])) {
118 $already++;
119 }
120}
121
122sub get_canonical_form($) {
123 my $dir = shift;
124 my $original = $dir;
125
126 die "$dir is not a directory." unless -d $dir;
127
128 $dir .= "/" unless $dir =~ m#/$#;
129 $dir .= ".git/" unless $dir =~ m#\.git/$#;
130
131 die "$original does not have a .git/ subdirectory.\n" unless -d $dir;
132
133 return $dir;
134}
135
136sub link_two_files($$) {
137 my ($sfilename, $dfilename) = @_;
138 my $tmpdname = sprintf("%s.old",$dfilename);
139 rename($dfilename,$tmpdname)
140 or die sprintf("Failure renaming %s to %s: %s",
141 $dfilename, $tmpdname, $!);
142
143 if (! link($sfilename,$dfilename)) {
144 my $failtxt = "";
145 unless (rename($tmpdname,$dfilename)) {
146 $failtxt = sprintf(
147 "Git Repository containing %s is probably corrupted, " .
148 "please copy '%s' to '%s' to fix.\n",
149 $tmpdname, $dfilename);
150 }
151
152 die sprintf("Failed to link %s to %s: %s\n%s" .
153 $sfilename, $dfilename,
154 $!, $dfilename, $failtxt);
155 }
156
157 unlink($tmpdname)
158 or die sprintf("Unlink of %s failed: %s\n",
159 $dfilename, $!);
160
161 $linked++;
162}
163
164
165sub usage() {
166 print("Usage: $0 [--safe] <dir> [<dir> ...] <master_dir> \n");
167 print("All directories should contain a .git/objects/ subdirectory.\n");
168 print("Options\n");
169 print("\t--safe\t" .
170 "Stops if two objects with the same hash exist but " .
171 "have different sizes. Default is to warn and continue.\n");
172 exit(1);
173}