package App::annex_to_annex; # ABSTRACT: use hardlinks to migrate files between git annex repos # # Copyright (C) 2019-2020 Sean Whitton # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or (at # your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . use 5.028; use strict; use warnings; use autodie; use subs qw(main exit); use Digest::MD5::File qw(file_md5); use File::Basename qw(dirname basename); use File::Copy; use File::Find; use File::Spec::Functions qw(catfile rel2abs abs2rel); use Try::Tiny; use Git::Annex; my $exit_main = 0; CORE::exit main unless caller; =func main Implementation of annex-to-annex(1). Please see documentation for that command. =cut sub main { # only support v7 because supporting v5 too would make things quite # complex. require git-annex >=7.20191009 because it will refuse to # work in v5 repos, and because it supports `git annex find --unlocked` chomp(my %annex_version_fields = map { split ': ' } `git annex version`); die "I need git-annex >=7.20191009 and a v7 repository\n" unless $annex_version_fields{'git-annex version'} >= 7.20191009; die "need at least two arguments\n" unless @ARGV > 1; my $dest = rel2abs pop @ARGV; die "dest is not a directory\n" unless -d $dest; my $dest_device_id = (stat($dest))[0]; my $dannex = Git::Annex->new($dest); my $do_commit = 0; if ($ARGV[0] eq '--commit') { $do_commit = 1; shift @ARGV; my @git_status = $dannex->git->RUN("status", { porcelain => 1 }); die "git repo containing $dest is not clean; please commit\n" unless @git_status == 0; #<<< try { $dannex->git->symbolic_ref({ quiet => 1 }, "HEAD"); } catch { die "$dest has a detached HEAD; aborting"; }; #>>> } my @sources = map rel2abs($_), @ARGV; # process one entry in @sources at a time because we can start up # annex batch processes for each of these as all files under each # entry in @sources will lie in the same annex foreach my $source (@sources) { my $dir = dirname $source; my $annex = Git::Annex->new($dir); #<<< try { $annex->git->annex("status"); } catch { die "$source does not appear to lie within an annex\n"; }; #>>> die "$source does not exist\n" unless -e $source; if ($do_commit) { my @git_status = $annex->git->RUN("status", { porcelain => 1 }); die "git repo containing $source is not clean; please commit\n" unless @git_status == 0; #<<< try { $annex->git->symbolic_ref({ quiet => 1 }, "HEAD"); } catch { die "$dest has a detached HEAD; aborting"; }; #>>> } my $base = basename $source; my @missing = $annex->git->annex("find", "--not", "--in", "here", $base); if (@missing) { say "Following annexed files are not present in this repo:"; say for @missing; die "cannot continue; please `git-annex get` them\n"; } # start batch processes my $lk = $annex->batch("lookupkey"); my $cl = $annex->batch("contentlocation"); my $find = $annex->batch("find", "--unlocked"); find({ wanted => sub { my $rel = abs2rel $File::Find::name, $dir; my $target = catfile $dest, $rel; die "$target already exists!\n" if -e $target and !-d $target; my $key = $lk->ask($rel); if ($key) { # this is an annexed file my $content = rel2abs $cl->ask($key), $annex->toplevel; my $content_device_id = (stat $content)[0]; if ($dest_device_id == $content_device_id) { link $content, $target; } else { _copy_and_md5($content, $target); } # add, and then maybe unlock. we don't use `-c # annex.addunlocked=true` because we want to # hardlink from .git/annex/objects in the source # to .git/annex/objects in the dest, rather than # having the unlocked copy in dest be hardlinked # to the source, or anything like that system "git", "-C", $dest, "annex", "add", $rel; system "git", "-C", $dest, "annex", "unlock", $rel if $find->ask($rel); # if using the default backend, quick sanity check if ($key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) { my $key_sum = $1; chomp(my $dest_key = `git -C "$dest" annex lookupkey "$rel"`); if ($dest_key =~ /^SHA256E-s[0-9]+--([0-9a-f]+)/) { my $dest_key_sum = $1; die "git-annex calculated a different checksum for $target" unless $key_sum eq $dest_key_sum; } } } else { # this is not an annexed file if (-d $File::Find::name) { mkdir $target unless -d $target; } else { _copy_and_md5($File::Find::name, $target); system "git", "-C", $dest, "-c", "annex.gitaddtoannex=false", "add", $rel; } } $annex->git->rm($File::Find::name) unless -d $File::Find::name; }, no_chdir => 1, }, $source ); $annex->git->commit({ message => "migrated by annex-to-annex" }) if $do_commit; } $dannex->git->commit({ message => "add" }) if $do_commit; EXIT_MAIN: return $exit_main; } sub _copy_and_md5 { copy($_[0], $_[1]); die "md5 checksum failure after copying $_[0] to $_[1]!" unless file_md5($_[0]) eq file_md5($_[1]); } sub exit { $exit_main = shift // 0; goto EXIT_MAIN } 1;