summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2020-02-07 23:27:29 -0700
committerSean Whitton <spwhitton@spwhitton.name>2020-02-07 23:27:29 -0700
commit2ed222c8ea789702b2aeffd63e177abd08f21b80 (patch)
tree1d4a80921ebbdcf735674afa75db8d2dc38cd779 /lib
parent64f954528b4c62abdf2de391afb6fe2715c60046 (diff)
downloadp5-Git-Annex-2ed222c8ea789702b2aeffd63e177abd08f21b80.tar.gz
convert annex-to-annex into an App:: module
Signed-off-by: Sean Whitton <spwhitton@spwhitton.name>
Diffstat (limited to 'lib')
-rw-r--r--lib/App/annex_to_annex.pm190
1 files changed, 190 insertions, 0 deletions
diff --git a/lib/App/annex_to_annex.pm b/lib/App/annex_to_annex.pm
new file mode 100644
index 0000000..29cd3f7
--- /dev/null
+++ b/lib/App/annex_to_annex.pm
@@ -0,0 +1,190 @@
+package App::annex_to_annex;
+# ABSTRACT: use hardlinks to migrate files between git annex repos
+#
+# Copyright (C) 2019-2020 Sean Whitton <spwhitton@spwhitton.name>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or (at
+# your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use 5.028;
+use strict;
+use warnings;
+
+use autodie;
+use subs qw(main exit);
+use Digest::MD5::File qw(file_md5);
+use File::Basename qw(dirname basename);
+use File::Copy;
+use File::Find;
+use File::Spec::Functions qw(catfile rel2abs abs2rel);
+use Try::Tiny;
+use Git::Annex;
+
+my $exit_main = 0;
+
+CORE::exit main unless caller;
+
+=func main
+
+Implementation of annex-to-annex(1). Please see documentation for
+that command.
+
+=cut
+
+sub main {
+ # only support v7 because supporting v5 too would make things quite
+ # complex. require git-annex >=7.20191009 because it will refuse to
+ # work in v5 repos, and because it supports `git annex find --unlocked`
+ chomp(my %annex_version_fields = map { split ': ' } `git annex version`);
+ die "I need git-annex >=7.20191009 and a v7 repository\n"
+ unless $annex_version_fields{'git-annex version'} >= 7.20191009;
+
+ die "need at least two arguments\n" unless @ARGV > 1;
+ my $dest = rel2abs pop @ARGV;
+ die "dest is not a directory\n" unless -d $dest;
+ my $dest_device_id = (stat($dest))[0];
+ my $dannex = Git::Annex->new($dest);
+ my $do_commit = 0;
+ if ($ARGV[0] eq '--commit') {
+ $do_commit = 1;
+ shift @ARGV;
+
+ my @git_status = $dannex->git->RUN("status", { porcelain => 1 });
+ die "git repo containing $dest is not clean; please commit\n"
+ unless @git_status == 0;
+
+ #<<<
+ try {
+ $dannex->git->symbolic_ref({ quiet => 1 }, "HEAD");
+ } catch {
+ die "$dest has a detached HEAD; aborting";
+ };
+ #>>>
+ }
+ my @sources = map rel2abs($_), @ARGV;
+
+ # process one entry in @sources at a time because we can start up
+ # annex batch processes for each of these as all files under each
+ # entry in @sources will lie in the same annex
+ foreach my $source (@sources) {
+ my $dir = dirname $source;
+ my $annex = Git::Annex->new($dir);
+ #<<<
+ try {
+ $annex->git->annex("status");
+ } catch {
+ die "$source does not appear to lie within an annex\n";
+ };
+ #>>>
+ die "$source does not exist\n" unless -e $source;
+
+ if ($do_commit) {
+ my @git_status = $annex->git->RUN("status", { porcelain => 1 });
+ die "git repo containing $source is not clean; please commit\n"
+ unless @git_status == 0;
+
+ #<<<
+ try {
+ $annex->git->symbolic_ref({ quiet => 1 }, "HEAD");
+ } catch {
+ die "$dest has a detached HEAD; aborting";
+ };
+ #>>>
+ }
+
+ my $base = basename $source;
+ my @missing
+ = $annex->git->annex("find", "--not", "--in", "here", $base);
+ if (@missing) {
+ say "Following annexed files are not present in this repo:";
+ say for @missing;
+ die "cannot continue; please `git-annex get` them\n";
+ }
+
+ # start batch processes
+ my $lk = $annex->batch("lookupkey");
+ my $cl = $annex->batch("contentlocation");
+ my $find = $annex->batch("find", "--unlocked");
+
+ find({
+ wanted => sub {
+ my $rel = abs2rel $File::Find::name, $dir;
+ my $target = catfile $dest, $rel;
+ die "$target already exists!\n"
+ if -e $target and !-d $target;
+
+ my $key = $lk->ask($rel);
+ if ($key) { # this is an annexed file
+ my $content = rel2abs $cl->ask($key), $annex->toplevel;
+ my $content_device_id = (stat $content)[0];
+ if ($dest_device_id == $content_device_id) {
+ link $content, $target;
+ } else {
+ _copy_and_md5($content, $target);
+ }
+ # add, and then maybe unlock. we don't use `-c
+ # annex.addunlocked=true` because we want to
+ # hardlink from .git/annex/objects in the source
+ # to .git/annex/objects in the dest, rather than
+ # having the unlocked copy in dest be hardlinked
+ # to the source, or anything like that
+ system "git", "-C", $dest, "annex", "add", $rel;
+ system "git", "-C", $dest, "annex", "unlock", $rel
+ if $find->ask($rel);
+
+ # if using the default backend, quick sanity check
+ if ($key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) {
+ my $key_sum = $1;
+ chomp(my $dest_key
+ = `git -C "$dest" annex lookupkey "$rel"`);
+ if ($dest_key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) {
+ my $dest_key_sum = $1;
+ die
+"git-annex calculated a different checksum for $target"
+ unless $key_sum eq $dest_key_sum;
+ }
+ }
+ } else { # this is not an annexed file
+ if (-d $File::Find::name) {
+ mkdir $target unless -d $target;
+ } else {
+ _copy_and_md5($File::Find::name, $target);
+ system "git", "-C", $dest,
+ "-c", "annex.gitaddtoannex=false", "add", $rel;
+ }
+ }
+ $annex->git->rm($File::Find::name)
+ unless -d $File::Find::name;
+ },
+ no_chdir => 1,
+ },
+ $source
+ );
+ $annex->git->commit({ message => "migrated by annex-to-annex" })
+ if $do_commit;
+ }
+ $dannex->git->commit({ message => "add" }) if $do_commit;
+
+ EXIT_MAIN:
+ return $exit_main;
+}
+
+sub _copy_and_md5 {
+ copy($_[0], $_[1]);
+ die "md5 checksum failure after copying $_[0] to $_[1]!"
+ unless file_md5($_[0]) eq file_md5($_[1]);
+}
+
+sub exit { $exit_main = shift // 0; goto EXIT_MAIN }
+
+1;