diff options
author | Sean Whitton <spwhitton@spwhitton.name> | 2020-02-01 15:36:10 -0700 |
---|---|---|
committer | Sean Whitton <spwhitton@spwhitton.name> | 2020-02-01 15:36:10 -0700 |
commit | 961bc7ee398aff3ecb2e29a5c2d44e2283380531 (patch) | |
tree | 4bd978b481d9becf3b95f21975e4380bc2051741 /lib | |
parent | c1aabf33bd7f70298ea977b0e20dc262a92bde2b (diff) | |
download | p5-Git-Annex-961bc7ee398aff3ecb2e29a5c2d44e2283380531.tar.gz |
add 'unused' method to Git::Annex
Signed-off-by: Sean Whitton <spwhitton@spwhitton.name>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Git/Annex.pm | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/lib/Git/Annex.pm b/lib/Git/Annex.pm index c744d7b..fda3a5c 100644 --- a/lib/Git/Annex.pm +++ b/lib/Git/Annex.pm @@ -66,7 +66,12 @@ use Cwd; use File::chdir; use Git::Wrapper; use Git::Repository; +use Try::Tiny; use File::Spec::Functions qw(catfile rel2abs); +use Storable; +use Data::Compare; +use List::Util qw(all); +use Time::HiRes qw(stat time); use Moo; use namespace::clean; @@ -104,6 +109,135 @@ has repo => ( # Git::Repository::new, so we chdir and let call without arguments default => sub { local $CWD = shift->toplevel; Git::Repository->new }); +=head2 unused(%opts) + +Runs C<git annex unused> and returns a hashref containing information +on unused files. + +The information is cached inside the C<.git/annex> directory. This +means that a user can keep running your script without repeatedly +executing expensive C<git annex> and C<git log> commands. + +Optional arguments: + +=over + +=item log + +If true, run C<git log --stat -S> on each unused file, to see what +filenames the unused data had if and when it was used data in the +annex. + +Defaults to false, but if there is log data in the cache it will +always be returned. + +=item from + +Corresponds to the C<--from> option to C<git annex unused>. + +=item used_refspec + +Corresponds to the C<--used-refspec> option to C<git annex unused>. + +Defaults to the C<annex.used-refspec> git config key if set, or +C<+refs/heads/*:-refs/heads/synced/*>. + +=back + +=cut + +sub unused { + my ($self, %opts) = @_; + $opts{log} //= 0; + my $used_refspec_config; + try { ($used_refspec_config) = $self->git->config("annex.used-refspec") }; + $opts{used_refspec} + //= ($used_refspec_config // "+refs/heads/*:-refs/heads/synced/*"); + + my %unused_args; + for (qw(from used_refspec)) { + $unused_args{$_} = $opts{$_} if defined $opts{$_}; + } + + $self->{_unused} //= retrieve $self->_unused_cache + if -e $self->_unused_cache; + # see if cache needs to be invalidated, whether or not we just + # retrieved it + if (defined $self->{_unused}) { + my $git_annex_unused = $self->_git_path(qw(annex unused)); + my $last_unused = (stat $git_annex_unused)[9]; + my %branch_timestamps + = map { split } + $self->git->for_each_ref( + { format => '%(refname:short) %(committerdate:unix)' }, + "refs/heads/"); + + # we don't need to invalidate the cache if the git-annex + # branch has changed, because the worst that can happen is we + # try to drop a file which has already been dropped + delete $branch_timestamps{'git-annex'}; + + $self->_clear_unused_cache + unless $last_unused <= $self->{_unused}{timestamp} + and Compare(\%unused_args, $self->{_unused}{unused_args}) + and all { $_ < $last_unused } values %branch_timestamps; + } + + # get the unused info if we couldn't load from the cache or had to + # invalidate it + unless (defined $self->{_unused}) { + my ($bad, $tmp) = (0, 0); + $self->{_unused}{unused_args} = \%unused_args; + # make a copy of %unused_args because Git::Wrapper will remove + # them from the hash + for ($self->git->annex("unused", {%unused_args})) { + if ( +/Some corrupted files have been preserved by fsck, just in case/ + ) { + ($bad, $tmp) = (1, 0); + } elsif ( + /Some partially transferred data exists in temporary files/) { + ($bad, $tmp) = (0, 1); + } elsif (/^ ([0-9]+) +([^ ]+)$/) { + push @{ $self->{_unused}{unused} }, + { number => $1, key => $2, bad => $bad, tmp => $tmp }; + } + } + $self->_store_unused_cache; + } + + # run any needed calls to git-log(1) + if ($opts{log}) { + my $changed = 0; + foreach my $unused_file (@{ $self->{_unused}{unused} }) { + next + if defined $unused_file->{log_lines} + or $unused_file->{bad} + or $unused_file->{tmp}; + $changed = 1; + # We need the RUN here to avoid special postprocessing but + # also to get the -c option passed -- unclear how to pass + # short options to git itself, not the 'log' subcommand, + # with Git::Wrapper except by using RUN (passing long + # options to git itself is easy, per Git::Wrapper docs) + @{ $unused_file->{log_lines} } = $self->git->RUN( + "-c", + "diff.renameLimit=3000", + "log", + { + stat => 1, + no_textconv => 1 + }, + "--color=always", + "-S", + $unused_file->{key}); + } + $self->_store_unused_cache if $changed; + } + + return $self->{_unused}{unused}; +} + has _unused_cache => ( is => "lazy", default => sub { shift->_git_path(qw(annex unused_info)) }); |