diff options
author | Sean Whitton <spwhitton@spwhitton.name> | 2020-02-07 23:27:29 -0700 |
---|---|---|
committer | Sean Whitton <spwhitton@spwhitton.name> | 2020-02-07 23:27:29 -0700 |
commit | 2ed222c8ea789702b2aeffd63e177abd08f21b80 (patch) | |
tree | 1d4a80921ebbdcf735674afa75db8d2dc38cd779 /lib | |
parent | 64f954528b4c62abdf2de391afb6fe2715c60046 (diff) | |
download | p5-Git-Annex-2ed222c8ea789702b2aeffd63e177abd08f21b80.tar.gz |
convert annex-to-annex into an App:: module
Signed-off-by: Sean Whitton <spwhitton@spwhitton.name>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/App/annex_to_annex.pm | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/lib/App/annex_to_annex.pm b/lib/App/annex_to_annex.pm new file mode 100644 index 0000000..29cd3f7 --- /dev/null +++ b/lib/App/annex_to_annex.pm @@ -0,0 +1,190 @@ +package App::annex_to_annex; +# ABSTRACT: use hardlinks to migrate files between git annex repos +# +# Copyright (C) 2019-2020 Sean Whitton <spwhitton@spwhitton.name> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +use 5.028; +use strict; +use warnings; + +use autodie; +use subs qw(main exit); +use Digest::MD5::File qw(file_md5); +use File::Basename qw(dirname basename); +use File::Copy; +use File::Find; +use File::Spec::Functions qw(catfile rel2abs abs2rel); +use Try::Tiny; +use Git::Annex; + +my $exit_main = 0; + +CORE::exit main unless caller; + +=func main + +Implementation of annex-to-annex(1). Please see documentation for +that command. + +=cut + +sub main { + # only support v7 because supporting v5 too would make things quite + # complex. require git-annex >=7.20191009 because it will refuse to + # work in v5 repos, and because it supports `git annex find --unlocked` + chomp(my %annex_version_fields = map { split ': ' } `git annex version`); + die "I need git-annex >=7.20191009 and a v7 repository\n" + unless $annex_version_fields{'git-annex version'} >= 7.20191009; + + die "need at least two arguments\n" unless @ARGV > 1; + my $dest = rel2abs pop @ARGV; + die "dest is not a directory\n" unless -d $dest; + my $dest_device_id = (stat($dest))[0]; + my $dannex = Git::Annex->new($dest); + my $do_commit = 0; + if ($ARGV[0] eq '--commit') { + $do_commit = 1; + shift @ARGV; + + my @git_status = $dannex->git->RUN("status", { porcelain => 1 }); + die "git repo containing $dest is not clean; please commit\n" + unless @git_status == 0; + + #<<< + try { + $dannex->git->symbolic_ref({ quiet => 1 }, "HEAD"); + } catch { + die "$dest has a detached HEAD; aborting"; + }; + #>>> + } + my @sources = map rel2abs($_), @ARGV; + + # process one entry in @sources at a time because we can start up + # annex batch processes for each of these as all files under each + # entry in @sources will lie in the same annex + foreach my $source (@sources) { + my $dir = dirname $source; + my $annex = Git::Annex->new($dir); + #<<< + try { + $annex->git->annex("status"); + } catch { + die "$source does not appear to lie within an annex\n"; + }; + #>>> + die "$source does not exist\n" unless -e $source; + + if ($do_commit) { + my @git_status = $annex->git->RUN("status", { porcelain => 1 }); + die "git repo containing $source is not clean; please commit\n" + unless @git_status == 0; + + #<<< + try { + $annex->git->symbolic_ref({ quiet => 1 }, "HEAD"); + } catch { + die "$dest has a detached HEAD; aborting"; + }; + #>>> + } + + my $base = basename $source; + my @missing + = $annex->git->annex("find", "--not", "--in", "here", $base); + if (@missing) { + say "Following annexed files are not present in this repo:"; + say for @missing; + die "cannot continue; please `git-annex get` them\n"; + } + + # start batch processes + my $lk = $annex->batch("lookupkey"); + my $cl = $annex->batch("contentlocation"); + my $find = $annex->batch("find", "--unlocked"); + + find({ + wanted => sub { + my $rel = abs2rel $File::Find::name, $dir; + my $target = catfile $dest, $rel; + die "$target already exists!\n" + if -e $target and !-d $target; + + my $key = $lk->ask($rel); + if ($key) { # this is an annexed file + my $content = rel2abs $cl->ask($key), $annex->toplevel; + my $content_device_id = (stat $content)[0]; + if ($dest_device_id == $content_device_id) { + link $content, $target; + } else { + _copy_and_md5($content, $target); + } + # add, and then maybe unlock. we don't use `-c + # annex.addunlocked=true` because we want to + # hardlink from .git/annex/objects in the source + # to .git/annex/objects in the dest, rather than + # having the unlocked copy in dest be hardlinked + # to the source, or anything like that + system "git", "-C", $dest, "annex", "add", $rel; + system "git", "-C", $dest, "annex", "unlock", $rel + if $find->ask($rel); + + # if using the default backend, quick sanity check + if ($key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) { + my $key_sum = $1; + chomp(my $dest_key + = `git -C "$dest" annex lookupkey "$rel"`); + if ($dest_key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) { + my $dest_key_sum = $1; + die +"git-annex calculated a different checksum for $target" + unless $key_sum eq $dest_key_sum; + } + } + } else { # this is not an annexed file + if (-d $File::Find::name) { + mkdir $target unless -d $target; + } else { + _copy_and_md5($File::Find::name, $target); + system "git", "-C", $dest, + "-c", "annex.gitaddtoannex=false", "add", $rel; + } + } + $annex->git->rm($File::Find::name) + unless -d $File::Find::name; + }, + no_chdir => 1, + }, + $source + ); + $annex->git->commit({ message => "migrated by annex-to-annex" }) + if $do_commit; + } + $dannex->git->commit({ message => "add" }) if $do_commit; + + EXIT_MAIN: + return $exit_main; +} + +sub _copy_and_md5 { + copy($_[0], $_[1]); + die "md5 checksum failure after copying $_[0] to $_[1]!" + unless file_md5($_[0]) eq file_md5($_[1]); +} + +sub exit { $exit_main = shift // 0; goto EXIT_MAIN } + +1; |