summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2020-02-07 23:27:29 -0700
committerSean Whitton <spwhitton@spwhitton.name>2020-02-07 23:27:29 -0700
commit2ed222c8ea789702b2aeffd63e177abd08f21b80 (patch)
tree1d4a80921ebbdcf735674afa75db8d2dc38cd779
parent64f954528b4c62abdf2de391afb6fe2715c60046 (diff)
downloadp5-Git-Annex-2ed222c8ea789702b2aeffd63e177abd08f21b80.tar.gz
convert annex-to-annex into an App:: module
Signed-off-by: Sean Whitton <spwhitton@spwhitton.name>
-rwxr-xr-xbin/annex-to-annex150
-rw-r--r--lib/App/annex_to_annex.pm190
-rwxr-xr-xt/21_annex-to-annex.t52
3 files changed, 211 insertions, 181 deletions
diff --git a/bin/annex-to-annex b/bin/annex-to-annex
index ee16f05..6fc78ae 100755
--- a/bin/annex-to-annex
+++ b/bin/annex-to-annex
@@ -52,151 +52,5 @@ git-annex(1), annex-to-annex-dropunused(1), annex-to-annex-reinject(1)
=cut
-use 5.028;
-use strict;
-use warnings;
-
-use autodie;
-use Digest::MD5::File qw(file_md5);
-use File::Basename qw(dirname basename);
-use File::Copy;
-use File::Find;
-use File::Spec::Functions qw(catfile rel2abs abs2rel);
-use Try::Tiny;
-use Git::Annex;
-
-# only support v7 because supporting v5 too would make things quite
-# complex. require git-annex >=7.20191009 because it will refuse to
-# work in v5 repos, and because it supports `git annex find --unlocked`
-chomp(my %annex_version_fields = map { split ': ' } `git annex version`);
-die "I need git-annex >=7.20191009 and a v7 repository\n"
- unless $annex_version_fields{'git-annex version'} >= 7.20191009;
-
-die "need at least two arguments\n" unless @ARGV > 1;
-my $dest = rel2abs pop @ARGV;
-die "dest is not a directory\n" unless -d $dest;
-my $dest_device_id = (stat($dest))[0];
-my $dannex = Git::Annex->new($dest);
-my $do_commit = 0;
-if ($ARGV[0] eq '--commit') {
- $do_commit = 1;
- shift @ARGV;
-
- my @git_status = $dannex->git->RUN("status", { porcelain => 1 });
- die "git repo containing $dest is not clean; please commit\n"
- unless @git_status == 0;
-
- #<<<
- try {
- $dannex->git->symbolic_ref({ quiet => 1 }, "HEAD");
- } catch {
- die "$dest has a detached HEAD; aborting";
- };
- #>>>
-}
-my @sources = map rel2abs($_), @ARGV;
-
-# process one entry in @sources at a time because we can start up
-# annex batch processes for each of these as all files under each
-# entry in @sources will lie in the same annex
-foreach my $source (@sources) {
- my $dir = dirname $source;
- my $annex = Git::Annex->new($dir);
- #<<<
- try {
- $annex->git->annex("status");
- } catch {
- die "$source does not appear to lie within an annex\n";
- };
- #>>>
- die "$source does not exist\n" unless -e $source;
-
- if ($do_commit) {
- my @git_status = $annex->git->RUN("status", { porcelain => 1 });
- die "git repo containing $source is not clean; please commit\n"
- unless @git_status == 0;
-
- #<<<
- try {
- $annex->git->symbolic_ref({ quiet => 1 }, "HEAD");
- } catch {
- die "$dest has a detached HEAD; aborting";
- };
- #>>>
- }
-
- my $base = basename $source;
- my @missing = $annex->git->annex("find", "--not", "--in", "here", $base);
- if (@missing) {
- say "Following annexed files are not present in this repo:";
- say for @missing;
- die "cannot continue; please `git-annex get` them\n";
- }
-
- # start batch processes
- my $lk = $annex->batch("lookupkey");
- my $cl = $annex->batch("contentlocation");
- my $find = $annex->batch("find", "--unlocked");
-
- find({
- wanted => sub {
- my $rel = abs2rel $File::Find::name, $dir;
- my $target = catfile $dest, $rel;
- die "$target already exists!\n" if -e $target and !-d $target;
-
- my $key = $lk->ask($rel);
- if ($key) { # this is an annexed file
- my $content = rel2abs $cl->ask($key), $annex->toplevel;
- my $content_device_id = (stat $content)[0];
- if ($dest_device_id == $content_device_id) {
- link $content, $target;
- } else {
- copy_and_md5($content, $target);
- }
- # add, and then maybe unlock. we don't use `-c
- # annex.addunlocked=true` because we want to
- # hardlink from .git/annex/objects in the source
- # to .git/annex/objects in the dest, rather than
- # having the unlocked copy in dest be hardlinked
- # to the source, or anything like that
- system "git", "-C", $dest, "annex", "add", $rel;
- system "git", "-C", $dest, "annex", "unlock", $rel
- if $find->ask($rel);
-
- # if using the default backend, quick sanity check
- if ($key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) {
- my $key_sum = $1;
- chomp(my $dest_key
- = `git -C "$dest" annex lookupkey "$rel"`);
- if ($dest_key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) {
- my $dest_key_sum = $1;
- die
-"git-annex calculated a different checksum for $target"
- unless $key_sum eq $dest_key_sum;
- }
- }
- } else { # this is not an annexed file
- if (-d $File::Find::name) {
- mkdir $target unless -d $target;
- } else {
- copy_and_md5($File::Find::name, $target);
- system "git", "-C", $dest,
- "-c", "annex.gitaddtoannex=false", "add", $rel;
- }
- }
- $annex->git->rm($File::Find::name) unless -d $File::Find::name;
- },
- no_chdir => 1,
- },
- $source
- );
- $annex->git->commit({ message => "migrated by annex-to-annex" })
- if $do_commit;
-}
-$dannex->git->commit({ message => "add" }) if $do_commit;
-
-sub copy_and_md5 {
- copy($_[0], $_[1]);
- die "md5 checksum failure after copying $_[0] to $_[1]!"
- unless file_md5($_[0]) eq file_md5($_[1]);
-}
+use App::annex_to_annex;
+exit App::annex_to_annex->main;
diff --git a/lib/App/annex_to_annex.pm b/lib/App/annex_to_annex.pm
new file mode 100644
index 0000000..29cd3f7
--- /dev/null
+++ b/lib/App/annex_to_annex.pm
@@ -0,0 +1,190 @@
+package App::annex_to_annex;
+# ABSTRACT: use hardlinks to migrate files between git annex repos
+#
+# Copyright (C) 2019-2020 Sean Whitton <spwhitton@spwhitton.name>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or (at
+# your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use 5.028;
+use strict;
+use warnings;
+
+use autodie;
+use subs qw(main exit);
+use Digest::MD5::File qw(file_md5);
+use File::Basename qw(dirname basename);
+use File::Copy;
+use File::Find;
+use File::Spec::Functions qw(catfile rel2abs abs2rel);
+use Try::Tiny;
+use Git::Annex;
+
+my $exit_main = 0;
+
+CORE::exit main unless caller;
+
+=func main
+
+Implementation of annex-to-annex(1). Please see documentation for
+that command.
+
+=cut
+
+sub main {
+ # only support v7 because supporting v5 too would make things quite
+ # complex. require git-annex >=7.20191009 because it will refuse to
+ # work in v5 repos, and because it supports `git annex find --unlocked`
+ chomp(my %annex_version_fields = map { split ': ' } `git annex version`);
+ die "I need git-annex >=7.20191009 and a v7 repository\n"
+ unless $annex_version_fields{'git-annex version'} >= 7.20191009;
+
+ die "need at least two arguments\n" unless @ARGV > 1;
+ my $dest = rel2abs pop @ARGV;
+ die "dest is not a directory\n" unless -d $dest;
+ my $dest_device_id = (stat($dest))[0];
+ my $dannex = Git::Annex->new($dest);
+ my $do_commit = 0;
+ if ($ARGV[0] eq '--commit') {
+ $do_commit = 1;
+ shift @ARGV;
+
+ my @git_status = $dannex->git->RUN("status", { porcelain => 1 });
+ die "git repo containing $dest is not clean; please commit\n"
+ unless @git_status == 0;
+
+ #<<<
+ try {
+ $dannex->git->symbolic_ref({ quiet => 1 }, "HEAD");
+ } catch {
+ die "$dest has a detached HEAD; aborting";
+ };
+ #>>>
+ }
+ my @sources = map rel2abs($_), @ARGV;
+
+ # process one entry in @sources at a time because we can start up
+ # annex batch processes for each of these as all files under each
+ # entry in @sources will lie in the same annex
+ foreach my $source (@sources) {
+ my $dir = dirname $source;
+ my $annex = Git::Annex->new($dir);
+ #<<<
+ try {
+ $annex->git->annex("status");
+ } catch {
+ die "$source does not appear to lie within an annex\n";
+ };
+ #>>>
+ die "$source does not exist\n" unless -e $source;
+
+ if ($do_commit) {
+ my @git_status = $annex->git->RUN("status", { porcelain => 1 });
+ die "git repo containing $source is not clean; please commit\n"
+ unless @git_status == 0;
+
+ #<<<
+ try {
+ $annex->git->symbolic_ref({ quiet => 1 }, "HEAD");
+ } catch {
+ die "$dest has a detached HEAD; aborting";
+ };
+ #>>>
+ }
+
+ my $base = basename $source;
+ my @missing
+ = $annex->git->annex("find", "--not", "--in", "here", $base);
+ if (@missing) {
+ say "Following annexed files are not present in this repo:";
+ say for @missing;
+ die "cannot continue; please `git-annex get` them\n";
+ }
+
+ # start batch processes
+ my $lk = $annex->batch("lookupkey");
+ my $cl = $annex->batch("contentlocation");
+ my $find = $annex->batch("find", "--unlocked");
+
+ find({
+ wanted => sub {
+ my $rel = abs2rel $File::Find::name, $dir;
+ my $target = catfile $dest, $rel;
+ die "$target already exists!\n"
+ if -e $target and !-d $target;
+
+ my $key = $lk->ask($rel);
+ if ($key) { # this is an annexed file
+ my $content = rel2abs $cl->ask($key), $annex->toplevel;
+ my $content_device_id = (stat $content)[0];
+ if ($dest_device_id == $content_device_id) {
+ link $content, $target;
+ } else {
+ _copy_and_md5($content, $target);
+ }
+ # add, and then maybe unlock. we don't use `-c
+ # annex.addunlocked=true` because we want to
+ # hardlink from .git/annex/objects in the source
+ # to .git/annex/objects in the dest, rather than
+ # having the unlocked copy in dest be hardlinked
+ # to the source, or anything like that
+ system "git", "-C", $dest, "annex", "add", $rel;
+ system "git", "-C", $dest, "annex", "unlock", $rel
+ if $find->ask($rel);
+
+ # if using the default backend, quick sanity check
+ if ($key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) {
+ my $key_sum = $1;
+ chomp(my $dest_key
+ = `git -C "$dest" annex lookupkey "$rel"`);
+ if ($dest_key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) {
+ my $dest_key_sum = $1;
+ die
+"git-annex calculated a different checksum for $target"
+ unless $key_sum eq $dest_key_sum;
+ }
+ }
+ } else { # this is not an annexed file
+ if (-d $File::Find::name) {
+ mkdir $target unless -d $target;
+ } else {
+ _copy_and_md5($File::Find::name, $target);
+ system "git", "-C", $dest,
+ "-c", "annex.gitaddtoannex=false", "add", $rel;
+ }
+ }
+ $annex->git->rm($File::Find::name)
+ unless -d $File::Find::name;
+ },
+ no_chdir => 1,
+ },
+ $source
+ );
+ $annex->git->commit({ message => "migrated by annex-to-annex" })
+ if $do_commit;
+ }
+ $dannex->git->commit({ message => "add" }) if $do_commit;
+
+ EXIT_MAIN:
+ return $exit_main;
+}
+
+sub _copy_and_md5 {
+ copy($_[0], $_[1]);
+ die "md5 checksum failure after copying $_[0] to $_[1]!"
+ unless file_md5($_[0]) eq file_md5($_[1]);
+}
+
+sub exit { $exit_main = shift // 0; goto EXIT_MAIN }
+
+1;
diff --git a/t/21_annex-to-annex.t b/t/21_annex-to-annex.t
index 9e84388..76ddd27 100755
--- a/t/21_annex-to-annex.t
+++ b/t/21_annex-to-annex.t
@@ -5,6 +5,7 @@ use strict;
use warnings;
use lib 't/lib';
+use App::annex_to_annex;
use Test::More;
use t::Setup;
use t::Util;
@@ -16,45 +17,37 @@ use File::chdir;
plan skip_all => "device ID issues" if device_id_issues;
-# make sure that `make test` will always use the right version of the
-# script we seek to test
-#
-# TODO testing the development version is currently difficult because
-# the 'run' sub in this script seems to reset PATH. Right now the
-# only sure fire way to run these test against the development version
-# is `dzil test`, which runs everything
-my $a2a = "annex-to-annex";
-$a2a = rel2abs "blib/script/annex-to-annex" if -x "blib/script/annex-to-annex";
-
-my $exit;
-my @output;
+my ($output, $error, $exit, @output);
with_temp_annexes {
make_path "dest/foo/foo2";
write_file "dest/foo/bar", "";
write_file "dest/foo/foo2/baz", "";
- ($exit, @output) = run($a2a, "source1/foo", "source2/other", "dest");
+ (undef, $error, $exit)
+ = run_bin qw(annex-to-annex source1/foo source2/other dest);
ok $exit, "it exits nonzero instead of clobbering an existing file";
- ok grep(/\/dest\/foo\/bar already exists!$/, @output),
+ ok grep(/\/dest\/foo\/bar already exists!$/, @$error),
"it won't clobber an existing file";
};
with_temp_annexes {
write_file catfile("source1", "quux"), "quux\n";
- ($exit, @output) = run($a2a, "--commit", "source1/foo/bar", "dest");
+ (undef, $error, $exit)
+ = run_bin qw(annex-to-annex --commit source1/foo/bar dest);
ok $exit,
"with --commit, it exits nonzero when uncommitted source changes";
ok grep(/^git repo containing [^ ]+\/bar is not clean; please commit$/,
- @output),
+ @$error),
"with --commit, it exits when uncommitted changes";
};
with_temp_annexes {
write_file catfile("dest", "quux"), "quux\n";
- ($exit, @output) = run($a2a, "--commit", "source1/foo/bar", "dest");
+ (undef, $error, $exit)
+ = run_bin qw(annex-to-annex --commit source1/foo/bar dest);
ok $exit, "with --commit, it exits nonzero when uncommitted dest changes";
ok grep(/^git repo containing [^ ]+\/dest is not clean; please commit$/,
- @output),
+ @$error),
"with --commit, it exits when uncommitted changes";
};
@@ -63,7 +56,7 @@ with_temp_annexes {
with_temp_annexes {
my (undef, $source1, $source2, $dest) = @_;
- system $a2a, qw(--commit source1/foo source2/other dest);
+ run_bin qw(annex-to-annex --commit source1/foo source2/other dest);
@output = $source1->RUN(qw(log -1 --oneline --name-status));
like $output[0], qr/migrated by annex-to-annex/,
@@ -89,10 +82,10 @@ with_temp_annexes {
my (undef, $source1) = @_;
corrupt_annexed_file $source1, "foo/foo2/baz";
- ($exit, @output)
- = run($a2a, "--commit", "source1/foo", "source2/other", "dest");
+ (undef, $error, $exit)
+ = run_bin qw(annex-to-annex --commit source1/foo source2/other dest);
ok $exit, "it exits nonzero when dest annex calculates a diff checksum";
- ok grep(/git-annex calculated a different checksum for/, @output),
+ ok grep(/git-annex calculated a different checksum for/, @$error),
"it warns when dest annex calculates a diff checksum";
};
@@ -100,18 +93,18 @@ with_temp_annexes {
my (undef, $source1) = @_;
$source1->annex(qw(drop --force foo/foo2/baz));
- ($exit, @output)
- = run($a2a, "--commit", "source1/foo", "source2/other", "dest");
+ ($output, undef, $exit)
+ = run_bin qw(annex-to-annex --commit source1/foo source2/other dest);
ok $exit, "it exits nonzero when an annexed file is not present";
ok
- grep(/^Following annexed files are not present in this repo:$/, @output),
+ grep(/^Following annexed files are not present in this repo:$/, @$output),
"it exits when annexed files are not present";
};
# this is the main integration test for the script doing its job
with_temp_annexes {
my (undef, $source1, $source2, $dest) = @_;
- system $a2a, qw(source1/foo source2/other dest);
+ run_bin qw(annex-to-annex source1/foo source2/other dest);
{
local $CWD = "source1";
ok !-e "foo/bar", "bar should not exist in source1";
@@ -156,10 +149,3 @@ with_temp_annexes {
};
done_testing;
-
-sub run {
- my @cmd = @_;
- my ($exit, @output);
- @output = split "\n", capture_merged { system @cmd; $exit = $? >> 8 };
- return ($exit, @output);
-}