summaryrefslogtreecommitdiff
path: root/lib/App/annex_to_annex.pm
blob: 7336ffcc7a8a098b8e5d6303fb062c23c17486c9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
package App::annex_to_annex;
# ABSTRACT: use hardlinks to migrate files between git annex repos
#
# Copyright (C) 2019-2020  Sean Whitton <spwhitton@spwhitton.name>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

use 5.028;
use strict;
use warnings;

use autodie;
use subs qw(main exit);
use Digest::MD5::File qw(file_md5);
use File::Basename qw(dirname basename);
use File::Copy;
use File::Find;
use File::Spec::Functions qw(catfile rel2abs abs2rel);
use Try::Tiny;
use Git::Annex;

my $exit_main = 0;

CORE::exit main unless caller;

=func main

Implementation of annex-to-annex(1).  Please see documentation for
that command.

Normally takes no arguments and responds to C<@ARGV>.  If you want to
override that you can pass an arrayref of arguments, and those will be
used instead of the contents of C<@ARGV>.

=cut

sub main {
    shift if $_[0] and ref $_[0] eq ""; # in case main called as a class method
    local @ARGV = @{ $_[0] } if $_[0] and ref $_[0] ne "";

    # only support v7 because supporting v5 too would make things quite
    # complex.  require git-annex >=7.20191009 because it will refuse to
    # work in v5 repos, and because it supports `git annex find --unlocked`
    chomp(my %annex_version_fields = map { split ': ' } `git annex version`);
    die "I need git-annex >=7.20191009 and a v7 repository\n"
      unless $annex_version_fields{'git-annex version'} >= 7.20191009;

    die "need at least two arguments\n" unless @ARGV > 1;
    my $dest = rel2abs pop @ARGV;
    die "dest is not a directory\n" unless -d $dest;
    my $dest_device_id = (stat($dest))[0];
    my $dannex         = Git::Annex->new($dest);
    my $do_commit      = 0;
    if ($ARGV[0] eq '--commit') {
        $do_commit = 1;
        shift @ARGV;

        my @git_status = $dannex->git->RUN("status", { porcelain => 1 });
        die "git repo containing $dest is not clean; please commit\n"
          unless @git_status == 0;

        #<<<
        try {
            $dannex->git->symbolic_ref({ quiet => 1 }, "HEAD");
        } catch {
            die "$dest has a detached HEAD; aborting";
        };
        #>>>
    }
    my @sources = map rel2abs($_), @ARGV;

    # process one entry in @sources at a time because we can start up
    # annex batch processes for each of these as all files under each
    # entry in @sources will lie in the same annex
    foreach my $source (@sources) {
        my $dir   = dirname $source;
        my $annex = Git::Annex->new($dir);
        #<<<
        try {
            $annex->git->annex("status");
        } catch {
            die "$source does not appear to lie within an annex\n";
        };
        #>>>
        die "$source does not exist\n" unless -e $source;

        if ($do_commit) {
            my @git_status = $annex->git->RUN("status", { porcelain => 1 });
            die "git repo containing $source is not clean; please commit\n"
              unless @git_status == 0;

            #<<<
            try {
                $annex->git->symbolic_ref({ quiet => 1 }, "HEAD");
            } catch {
                die "$dest has a detached HEAD; aborting";
            };
            #>>>
        }

        my $base = basename $source;
        my @missing
          = $annex->git->annex("find", "--not", "--in", "here", $base);
        if (@missing) {
            say "Following annexed files are not present in this repo:";
            say for @missing;
            die "cannot continue; please `git-annex get` them\n";
        }

        # start batch processes
        my $lk   = $annex->batch("lookupkey");
        my $cl   = $annex->batch("contentlocation");
        my $find = $annex->batch("find", "--unlocked");

        find({
                wanted => sub {
                    my $rel    = abs2rel $File::Find::name, $dir;
                    my $target = catfile $dest,             $rel;
                    die "$target already exists!\n"
                      if -e $target and !-d $target;

                    my $key = $lk->ask($rel);
                    if ($key) {    # this is an annexed file
                        my $content = rel2abs $cl->ask($key), $annex->toplevel;
                        my $content_device_id = (stat $content)[0];
                        if ($dest_device_id == $content_device_id) {
                            link $content, $target;
                        } else {
                            _copy_and_md5($content, $target);
                        }
                        # add, and then maybe unlock.  we don't use `-c
                        # annex.addunlocked=true` because we want to
                        # hardlink from .git/annex/objects in the source
                        # to .git/annex/objects in the dest, rather than
                        # having the unlocked copy in dest be hardlinked
                        # to the source, or anything like that
                        system "git", "-C", $dest, "annex", "add",    $rel;
                        system "git", "-C", $dest, "annex", "unlock", $rel
                          if $find->ask($rel);

                        # if using the default backend, quick sanity check
                        if ($key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) {
                            my $key_sum = $1;
                            chomp(my $dest_key
                                  = `git -C "$dest" annex lookupkey "$rel"`);
                            if ($dest_key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) {
                                my $dest_key_sum = $1;
                                die
"git-annex calculated a different checksum for $target"
                                  unless $key_sum eq $dest_key_sum;
                            }
                        }
                    } else {    # this is not an annexed file
                        if (-d $File::Find::name) {
                            mkdir $target unless -d $target;
                        } else {
                            _copy_and_md5($File::Find::name, $target);
                            system "git", "-C", $dest,
                              "-c", "annex.gitaddtoannex=false", "add", $rel;
                        }
                    }
                    $annex->git->rm($File::Find::name)
                      unless -d $File::Find::name;
                },
                no_chdir => 1,
            },
            $source
        );
        $annex->git->commit({ message => "migrated by annex-to-annex" })
          if $do_commit;
    }
    $dannex->git->commit({ message => "add" }) if $do_commit;

  EXIT_MAIN:
    return $exit_main;
}

sub _copy_and_md5 {
    copy($_[0], $_[1]);
    die "md5 checksum failure after copying $_[0] to $_[1]!"
      unless file_md5($_[0]) eq file_md5($_[1]);
}

sub exit { $exit_main = shift // 0; goto EXIT_MAIN }

1;