From 2ed222c8ea789702b2aeffd63e177abd08f21b80 Mon Sep 17 00:00:00 2001 From: Sean Whitton Date: Fri, 7 Feb 2020 23:27:29 -0700 Subject: convert annex-to-annex into an App:: module Signed-off-by: Sean Whitton --- bin/annex-to-annex | 150 +----------------------------------- lib/App/annex_to_annex.pm | 190 ++++++++++++++++++++++++++++++++++++++++++++++ t/21_annex-to-annex.t | 52 +++++-------- 3 files changed, 211 insertions(+), 181 deletions(-) create mode 100644 lib/App/annex_to_annex.pm diff --git a/bin/annex-to-annex b/bin/annex-to-annex index ee16f05..6fc78ae 100755 --- a/bin/annex-to-annex +++ b/bin/annex-to-annex @@ -52,151 +52,5 @@ git-annex(1), annex-to-annex-dropunused(1), annex-to-annex-reinject(1) =cut -use 5.028; -use strict; -use warnings; - -use autodie; -use Digest::MD5::File qw(file_md5); -use File::Basename qw(dirname basename); -use File::Copy; -use File::Find; -use File::Spec::Functions qw(catfile rel2abs abs2rel); -use Try::Tiny; -use Git::Annex; - -# only support v7 because supporting v5 too would make things quite -# complex. require git-annex >=7.20191009 because it will refuse to -# work in v5 repos, and because it supports `git annex find --unlocked` -chomp(my %annex_version_fields = map { split ': ' } `git annex version`); -die "I need git-annex >=7.20191009 and a v7 repository\n" - unless $annex_version_fields{'git-annex version'} >= 7.20191009; - -die "need at least two arguments\n" unless @ARGV > 1; -my $dest = rel2abs pop @ARGV; -die "dest is not a directory\n" unless -d $dest; -my $dest_device_id = (stat($dest))[0]; -my $dannex = Git::Annex->new($dest); -my $do_commit = 0; -if ($ARGV[0] eq '--commit') { - $do_commit = 1; - shift @ARGV; - - my @git_status = $dannex->git->RUN("status", { porcelain => 1 }); - die "git repo containing $dest is not clean; please commit\n" - unless @git_status == 0; - - #<<< - try { - $dannex->git->symbolic_ref({ quiet => 1 }, "HEAD"); - } catch { - die "$dest has a detached HEAD; aborting"; - }; - #>>> -} -my @sources = map rel2abs($_), @ARGV; - -# process one entry in @sources at a time because we can start up -# annex batch processes for each of these as all files under each -# entry in @sources will lie in the same annex -foreach my $source (@sources) { - my $dir = dirname $source; - my $annex = Git::Annex->new($dir); - #<<< - try { - $annex->git->annex("status"); - } catch { - die "$source does not appear to lie within an annex\n"; - }; - #>>> - die "$source does not exist\n" unless -e $source; - - if ($do_commit) { - my @git_status = $annex->git->RUN("status", { porcelain => 1 }); - die "git repo containing $source is not clean; please commit\n" - unless @git_status == 0; - - #<<< - try { - $annex->git->symbolic_ref({ quiet => 1 }, "HEAD"); - } catch { - die "$dest has a detached HEAD; aborting"; - }; - #>>> - } - - my $base = basename $source; - my @missing = $annex->git->annex("find", "--not", "--in", "here", $base); - if (@missing) { - say "Following annexed files are not present in this repo:"; - say for @missing; - die "cannot continue; please `git-annex get` them\n"; - } - - # start batch processes - my $lk = $annex->batch("lookupkey"); - my $cl = $annex->batch("contentlocation"); - my $find = $annex->batch("find", "--unlocked"); - - find({ - wanted => sub { - my $rel = abs2rel $File::Find::name, $dir; - my $target = catfile $dest, $rel; - die "$target already exists!\n" if -e $target and !-d $target; - - my $key = $lk->ask($rel); - if ($key) { # this is an annexed file - my $content = rel2abs $cl->ask($key), $annex->toplevel; - my $content_device_id = (stat $content)[0]; - if ($dest_device_id == $content_device_id) { - link $content, $target; - } else { - copy_and_md5($content, $target); - } - # add, and then maybe unlock. we don't use `-c - # annex.addunlocked=true` because we want to - # hardlink from .git/annex/objects in the source - # to .git/annex/objects in the dest, rather than - # having the unlocked copy in dest be hardlinked - # to the source, or anything like that - system "git", "-C", $dest, "annex", "add", $rel; - system "git", "-C", $dest, "annex", "unlock", $rel - if $find->ask($rel); - - # if using the default backend, quick sanity check - if ($key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) { - my $key_sum = $1; - chomp(my $dest_key - = `git -C "$dest" annex lookupkey "$rel"`); - if ($dest_key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) { - my $dest_key_sum = $1; - die -"git-annex calculated a different checksum for $target" - unless $key_sum eq $dest_key_sum; - } - } - } else { # this is not an annexed file - if (-d $File::Find::name) { - mkdir $target unless -d $target; - } else { - copy_and_md5($File::Find::name, $target); - system "git", "-C", $dest, - "-c", "annex.gitaddtoannex=false", "add", $rel; - } - } - $annex->git->rm($File::Find::name) unless -d $File::Find::name; - }, - no_chdir => 1, - }, - $source - ); - $annex->git->commit({ message => "migrated by annex-to-annex" }) - if $do_commit; -} -$dannex->git->commit({ message => "add" }) if $do_commit; - -sub copy_and_md5 { - copy($_[0], $_[1]); - die "md5 checksum failure after copying $_[0] to $_[1]!" - unless file_md5($_[0]) eq file_md5($_[1]); -} +use App::annex_to_annex; +exit App::annex_to_annex->main; diff --git a/lib/App/annex_to_annex.pm b/lib/App/annex_to_annex.pm new file mode 100644 index 0000000..29cd3f7 --- /dev/null +++ b/lib/App/annex_to_annex.pm @@ -0,0 +1,190 @@ +package App::annex_to_annex; +# ABSTRACT: use hardlinks to migrate files between git annex repos +# +# Copyright (C) 2019-2020 Sean Whitton +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +use 5.028; +use strict; +use warnings; + +use autodie; +use subs qw(main exit); +use Digest::MD5::File qw(file_md5); +use File::Basename qw(dirname basename); +use File::Copy; +use File::Find; +use File::Spec::Functions qw(catfile rel2abs abs2rel); +use Try::Tiny; +use Git::Annex; + +my $exit_main = 0; + +CORE::exit main unless caller; + +=func main + +Implementation of annex-to-annex(1). Please see documentation for +that command. + +=cut + +sub main { + # only support v7 because supporting v5 too would make things quite + # complex. require git-annex >=7.20191009 because it will refuse to + # work in v5 repos, and because it supports `git annex find --unlocked` + chomp(my %annex_version_fields = map { split ': ' } `git annex version`); + die "I need git-annex >=7.20191009 and a v7 repository\n" + unless $annex_version_fields{'git-annex version'} >= 7.20191009; + + die "need at least two arguments\n" unless @ARGV > 1; + my $dest = rel2abs pop @ARGV; + die "dest is not a directory\n" unless -d $dest; + my $dest_device_id = (stat($dest))[0]; + my $dannex = Git::Annex->new($dest); + my $do_commit = 0; + if ($ARGV[0] eq '--commit') { + $do_commit = 1; + shift @ARGV; + + my @git_status = $dannex->git->RUN("status", { porcelain => 1 }); + die "git repo containing $dest is not clean; please commit\n" + unless @git_status == 0; + + #<<< + try { + $dannex->git->symbolic_ref({ quiet => 1 }, "HEAD"); + } catch { + die "$dest has a detached HEAD; aborting"; + }; + #>>> + } + my @sources = map rel2abs($_), @ARGV; + + # process one entry in @sources at a time because we can start up + # annex batch processes for each of these as all files under each + # entry in @sources will lie in the same annex + foreach my $source (@sources) { + my $dir = dirname $source; + my $annex = Git::Annex->new($dir); + #<<< + try { + $annex->git->annex("status"); + } catch { + die "$source does not appear to lie within an annex\n"; + }; + #>>> + die "$source does not exist\n" unless -e $source; + + if ($do_commit) { + my @git_status = $annex->git->RUN("status", { porcelain => 1 }); + die "git repo containing $source is not clean; please commit\n" + unless @git_status == 0; + + #<<< + try { + $annex->git->symbolic_ref({ quiet => 1 }, "HEAD"); + } catch { + die "$dest has a detached HEAD; aborting"; + }; + #>>> + } + + my $base = basename $source; + my @missing + = $annex->git->annex("find", "--not", "--in", "here", $base); + if (@missing) { + say "Following annexed files are not present in this repo:"; + say for @missing; + die "cannot continue; please `git-annex get` them\n"; + } + + # start batch processes + my $lk = $annex->batch("lookupkey"); + my $cl = $annex->batch("contentlocation"); + my $find = $annex->batch("find", "--unlocked"); + + find({ + wanted => sub { + my $rel = abs2rel $File::Find::name, $dir; + my $target = catfile $dest, $rel; + die "$target already exists!\n" + if -e $target and !-d $target; + + my $key = $lk->ask($rel); + if ($key) { # this is an annexed file + my $content = rel2abs $cl->ask($key), $annex->toplevel; + my $content_device_id = (stat $content)[0]; + if ($dest_device_id == $content_device_id) { + link $content, $target; + } else { + _copy_and_md5($content, $target); + } + # add, and then maybe unlock. we don't use `-c + # annex.addunlocked=true` because we want to + # hardlink from .git/annex/objects in the source + # to .git/annex/objects in the dest, rather than + # having the unlocked copy in dest be hardlinked + # to the source, or anything like that + system "git", "-C", $dest, "annex", "add", $rel; + system "git", "-C", $dest, "annex", "unlock", $rel + if $find->ask($rel); + + # if using the default backend, quick sanity check + if ($key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) { + my $key_sum = $1; + chomp(my $dest_key + = `git -C "$dest" annex lookupkey "$rel"`); + if ($dest_key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) { + my $dest_key_sum = $1; + die +"git-annex calculated a different checksum for $target" + unless $key_sum eq $dest_key_sum; + } + } + } else { # this is not an annexed file + if (-d $File::Find::name) { + mkdir $target unless -d $target; + } else { + _copy_and_md5($File::Find::name, $target); + system "git", "-C", $dest, + "-c", "annex.gitaddtoannex=false", "add", $rel; + } + } + $annex->git->rm($File::Find::name) + unless -d $File::Find::name; + }, + no_chdir => 1, + }, + $source + ); + $annex->git->commit({ message => "migrated by annex-to-annex" }) + if $do_commit; + } + $dannex->git->commit({ message => "add" }) if $do_commit; + + EXIT_MAIN: + return $exit_main; +} + +sub _copy_and_md5 { + copy($_[0], $_[1]); + die "md5 checksum failure after copying $_[0] to $_[1]!" + unless file_md5($_[0]) eq file_md5($_[1]); +} + +sub exit { $exit_main = shift // 0; goto EXIT_MAIN } + +1; diff --git a/t/21_annex-to-annex.t b/t/21_annex-to-annex.t index 9e84388..76ddd27 100755 --- a/t/21_annex-to-annex.t +++ b/t/21_annex-to-annex.t @@ -5,6 +5,7 @@ use strict; use warnings; use lib 't/lib'; +use App::annex_to_annex; use Test::More; use t::Setup; use t::Util; @@ -16,45 +17,37 @@ use File::chdir; plan skip_all => "device ID issues" if device_id_issues; -# make sure that `make test` will always use the right version of the -# script we seek to test -# -# TODO testing the development version is currently difficult because -# the 'run' sub in this script seems to reset PATH. Right now the -# only sure fire way to run these test against the development version -# is `dzil test`, which runs everything -my $a2a = "annex-to-annex"; -$a2a = rel2abs "blib/script/annex-to-annex" if -x "blib/script/annex-to-annex"; - -my $exit; -my @output; +my ($output, $error, $exit, @output); with_temp_annexes { make_path "dest/foo/foo2"; write_file "dest/foo/bar", ""; write_file "dest/foo/foo2/baz", ""; - ($exit, @output) = run($a2a, "source1/foo", "source2/other", "dest"); + (undef, $error, $exit) + = run_bin qw(annex-to-annex source1/foo source2/other dest); ok $exit, "it exits nonzero instead of clobbering an existing file"; - ok grep(/\/dest\/foo\/bar already exists!$/, @output), + ok grep(/\/dest\/foo\/bar already exists!$/, @$error), "it won't clobber an existing file"; }; with_temp_annexes { write_file catfile("source1", "quux"), "quux\n"; - ($exit, @output) = run($a2a, "--commit", "source1/foo/bar", "dest"); + (undef, $error, $exit) + = run_bin qw(annex-to-annex --commit source1/foo/bar dest); ok $exit, "with --commit, it exits nonzero when uncommitted source changes"; ok grep(/^git repo containing [^ ]+\/bar is not clean; please commit$/, - @output), + @$error), "with --commit, it exits when uncommitted changes"; }; with_temp_annexes { write_file catfile("dest", "quux"), "quux\n"; - ($exit, @output) = run($a2a, "--commit", "source1/foo/bar", "dest"); + (undef, $error, $exit) + = run_bin qw(annex-to-annex --commit source1/foo/bar dest); ok $exit, "with --commit, it exits nonzero when uncommitted dest changes"; ok grep(/^git repo containing [^ ]+\/dest is not clean; please commit$/, - @output), + @$error), "with --commit, it exits when uncommitted changes"; }; @@ -63,7 +56,7 @@ with_temp_annexes { with_temp_annexes { my (undef, $source1, $source2, $dest) = @_; - system $a2a, qw(--commit source1/foo source2/other dest); + run_bin qw(annex-to-annex --commit source1/foo source2/other dest); @output = $source1->RUN(qw(log -1 --oneline --name-status)); like $output[0], qr/migrated by annex-to-annex/, @@ -89,10 +82,10 @@ with_temp_annexes { my (undef, $source1) = @_; corrupt_annexed_file $source1, "foo/foo2/baz"; - ($exit, @output) - = run($a2a, "--commit", "source1/foo", "source2/other", "dest"); + (undef, $error, $exit) + = run_bin qw(annex-to-annex --commit source1/foo source2/other dest); ok $exit, "it exits nonzero when dest annex calculates a diff checksum"; - ok grep(/git-annex calculated a different checksum for/, @output), + ok grep(/git-annex calculated a different checksum for/, @$error), "it warns when dest annex calculates a diff checksum"; }; @@ -100,18 +93,18 @@ with_temp_annexes { my (undef, $source1) = @_; $source1->annex(qw(drop --force foo/foo2/baz)); - ($exit, @output) - = run($a2a, "--commit", "source1/foo", "source2/other", "dest"); + ($output, undef, $exit) + = run_bin qw(annex-to-annex --commit source1/foo source2/other dest); ok $exit, "it exits nonzero when an annexed file is not present"; ok - grep(/^Following annexed files are not present in this repo:$/, @output), + grep(/^Following annexed files are not present in this repo:$/, @$output), "it exits when annexed files are not present"; }; # this is the main integration test for the script doing its job with_temp_annexes { my (undef, $source1, $source2, $dest) = @_; - system $a2a, qw(source1/foo source2/other dest); + run_bin qw(annex-to-annex source1/foo source2/other dest); { local $CWD = "source1"; ok !-e "foo/bar", "bar should not exist in source1"; @@ -156,10 +149,3 @@ with_temp_annexes { }; done_testing; - -sub run { - my @cmd = @_; - my ($exit, @output); - @output = split "\n", capture_merged { system @cmd; $exit = $? >> 8 }; - return ($exit, @output); -} -- cgit v1.2.3