summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2020-02-01 15:36:10 -0700
committerSean Whitton <spwhitton@spwhitton.name>2020-02-01 15:36:10 -0700
commit961bc7ee398aff3ecb2e29a5c2d44e2283380531 (patch)
tree4bd978b481d9becf3b95f21975e4380bc2051741 /lib
parentc1aabf33bd7f70298ea977b0e20dc262a92bde2b (diff)
downloadp5-Git-Annex-961bc7ee398aff3ecb2e29a5c2d44e2283380531.tar.gz
add 'unused' method to Git::Annex
Signed-off-by: Sean Whitton <spwhitton@spwhitton.name>
Diffstat (limited to 'lib')
-rw-r--r--lib/Git/Annex.pm134
1 files changed, 134 insertions, 0 deletions
diff --git a/lib/Git/Annex.pm b/lib/Git/Annex.pm
index c744d7b..fda3a5c 100644
--- a/lib/Git/Annex.pm
+++ b/lib/Git/Annex.pm
@@ -66,7 +66,12 @@ use Cwd;
use File::chdir;
use Git::Wrapper;
use Git::Repository;
+use Try::Tiny;
use File::Spec::Functions qw(catfile rel2abs);
+use Storable;
+use Data::Compare;
+use List::Util qw(all);
+use Time::HiRes qw(stat time);
use Moo;
use namespace::clean;
@@ -104,6 +109,135 @@ has repo => (
# Git::Repository::new, so we chdir and let call without arguments
default => sub { local $CWD = shift->toplevel; Git::Repository->new });
+=head2 unused(%opts)
+
+Runs C<git annex unused> and returns a hashref containing information
+on unused files.
+
+The information is cached inside the C<.git/annex> directory. This
+means that a user can keep running your script without repeatedly
+executing expensive C<git annex> and C<git log> commands.
+
+Optional arguments:
+
+=over
+
+=item log
+
+If true, run C<git log --stat -S> on each unused file, to see what
+filenames the unused data had if and when it was used data in the
+annex.
+
+Defaults to false, but if there is log data in the cache it will
+always be returned.
+
+=item from
+
+Corresponds to the C<--from> option to C<git annex unused>.
+
+=item used_refspec
+
+Corresponds to the C<--used-refspec> option to C<git annex unused>.
+
+Defaults to the C<annex.used-refspec> git config key if set, or
+C<+refs/heads/*:-refs/heads/synced/*>.
+
+=back
+
+=cut
+
+sub unused {
+ my ($self, %opts) = @_;
+ $opts{log} //= 0;
+ my $used_refspec_config;
+ try { ($used_refspec_config) = $self->git->config("annex.used-refspec") };
+ $opts{used_refspec}
+ //= ($used_refspec_config // "+refs/heads/*:-refs/heads/synced/*");
+
+ my %unused_args;
+ for (qw(from used_refspec)) {
+ $unused_args{$_} = $opts{$_} if defined $opts{$_};
+ }
+
+ $self->{_unused} //= retrieve $self->_unused_cache
+ if -e $self->_unused_cache;
+ # see if cache needs to be invalidated, whether or not we just
+ # retrieved it
+ if (defined $self->{_unused}) {
+ my $git_annex_unused = $self->_git_path(qw(annex unused));
+ my $last_unused = (stat $git_annex_unused)[9];
+ my %branch_timestamps
+ = map { split }
+ $self->git->for_each_ref(
+ { format => '%(refname:short) %(committerdate:unix)' },
+ "refs/heads/");
+
+ # we don't need to invalidate the cache if the git-annex
+ # branch has changed, because the worst that can happen is we
+ # try to drop a file which has already been dropped
+ delete $branch_timestamps{'git-annex'};
+
+ $self->_clear_unused_cache
+ unless $last_unused <= $self->{_unused}{timestamp}
+ and Compare(\%unused_args, $self->{_unused}{unused_args})
+ and all { $_ < $last_unused } values %branch_timestamps;
+ }
+
+ # get the unused info if we couldn't load from the cache or had to
+ # invalidate it
+ unless (defined $self->{_unused}) {
+ my ($bad, $tmp) = (0, 0);
+ $self->{_unused}{unused_args} = \%unused_args;
+ # make a copy of %unused_args because Git::Wrapper will remove
+ # them from the hash
+ for ($self->git->annex("unused", {%unused_args})) {
+ if (
+/Some corrupted files have been preserved by fsck, just in case/
+ ) {
+ ($bad, $tmp) = (1, 0);
+ } elsif (
+ /Some partially transferred data exists in temporary files/) {
+ ($bad, $tmp) = (0, 1);
+ } elsif (/^ ([0-9]+) +([^ ]+)$/) {
+ push @{ $self->{_unused}{unused} },
+ { number => $1, key => $2, bad => $bad, tmp => $tmp };
+ }
+ }
+ $self->_store_unused_cache;
+ }
+
+ # run any needed calls to git-log(1)
+ if ($opts{log}) {
+ my $changed = 0;
+ foreach my $unused_file (@{ $self->{_unused}{unused} }) {
+ next
+ if defined $unused_file->{log_lines}
+ or $unused_file->{bad}
+ or $unused_file->{tmp};
+ $changed = 1;
+ # We need the RUN here to avoid special postprocessing but
+ # also to get the -c option passed -- unclear how to pass
+ # short options to git itself, not the 'log' subcommand,
+ # with Git::Wrapper except by using RUN (passing long
+ # options to git itself is easy, per Git::Wrapper docs)
+ @{ $unused_file->{log_lines} } = $self->git->RUN(
+ "-c",
+ "diff.renameLimit=3000",
+ "log",
+ {
+ stat => 1,
+ no_textconv => 1
+ },
+ "--color=always",
+ "-S",
+ $unused_file->{key});
+ }
+ $self->_store_unused_cache if $changed;
+ }
+
+ return $self->{_unused}{unused};
+}
+
has _unused_cache => (
is => "lazy",
default => sub { shift->_git_path(qw(annex unused_info)) });