#!/usr/bin/perl # mbox-extract-patch -- extract a git patch series from an mbox # # Copyright (C) 2020 Sean Whitton # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or (at # your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . use 5.028; use strict; use warnings; use Getopt::Long; use Mail::Box::Mbox; use File::Temp (); use List::Util qw(max); our $patch_tag_re = qr/\[(.*PATCH.*)\]/; our $trailer_re = qr/^[A-Z][a-z-]+: .+$/; # ordering here is from Konstantin Ryabitsev's get-lore-mbox.py our @trailer_order = ( "fixes", "reported", "suggested", "original", "co-", "signed-off", "tested", "reviewed", "acked", "cc", "link", "", ); # empty STDIN into a temporary file my $mbox = File::Temp->new; binmode STDIN; binmode $mbox; my $BUFSIZ = 64 * (2**10); while (read STDIN, my $buf, $BUFSIZ) { print $mbox $buf or die "couldn't write to " . $mbox->filename . ": $!"; } $mbox->close; # close filehandle for writing; keeps the file # command line arguments my $extract_trailers = 1; my ($reroll_count, $cover_letter_trailers); Getopt::Long::Configure("bundling"); GetOptions "reroll-count|v=i" => \$reroll_count, "trailers!" => \$extract_trailers, "cover-letter-trailers!" => \$cover_letter_trailers; my $folder = Mail::Box::Mbox->new(folder => $mbox->filename, access => "rw"); # first pass: extract info from messages, and delete some known not to # be wanted patches my (@reroll_counts, %trailers, %covers); foreach my $message ($folder->messages) { # we assume that the first text/plain part we find is the # patch/message, and if none, we give up on the message $message->delete, next unless defined $message->first_text_plain_part; my $subject = $message->study("subject"); $subject =~ /$patch_tag_re/, my $subject_front = $` if defined $subject; my @lines = $message->first_text_plain_part->body->decoded->lines; # $is_cover is a bit simplistic right now my $is_patch = grep /^@@ [0-9 +,-]+ @@/, @lines; my $is_cover = defined $message->patch_count && $message->patch_count == 0 && defined $subject_front && $subject_front !~ /Re: \z/i; if ($is_patch) { $message->delete, next if $reroll_count and $reroll_count != $message->reroll_count; # record reroll counts seen so we can determine, later, # whether there is more than one version of the patch series # in our input push @reroll_counts, $message->reroll_count; } elsif ($is_cover) { # all we need from covers is their msgids so we can look for # trailers sent in reply to those covers $covers{ $message->reroll_count } = $message->messageId; $message->delete; } else { my $in_replies_to = $message->get("In-Reply-To"); if ($in_replies_to and my @ids = $in_replies_to =~ m/\<([^>]+)\>/g) { warn "In-Reply-To field with more than one Message-Id; using first" if @ids > 1; my $id = $ids[0]; my @ts = grep /$trailer_re/, @lines; push $trailers{$id}->@*, @ts; } $message->delete; } } # expunge deleted messages $folder->write or die "failed to update mbox!"; # second pass requires $reroll_count to be set, and we can now # determine what it should be based on information gathered during the # first pass unless ($reroll_count) { if (@reroll_counts > 0) { # we saw one or more series, and user did not specify a reroll # count, so we extract the series with the highest version number $reroll_count = max @reroll_counts; } else { # we didn't see any reroll counts, so we mustn't have seen any # patches exit; } } # second pass: edits to patch messages, and delete remaining unwanted # messages. note that only patches remain in the mbox after first # pass foreach my $message ($folder->messages) { $message->delete, next unless $reroll_count == $message->reroll_count; my @ts; my $id = $message->messageId; my $cid = $covers{ $message->reroll_count }; @ts = $trailers{$id}->@* if $extract_trailers and $trailers{$id}; push @ts, $trailers{$cid}->@* if $cover_letter_trailers and $trailers{$cid}; $message->insert_trailers(@ts) if @ts; # if Subject: contains [PATCH nn/mm] then any text before that # should be stripped, as it should not form part of the commit # message. (The debbugs system prepends 'Bug#nnnnnn: ') my $subject = $message->study("subject"); $subject =~ /$patch_tag_re.*$/; $message->head->set(Subject => $&); } # save mbox and output $folder->close or die "failed to update & close mbox!"; exit unless -e $mbox->filename; # no patches to extract open my $fh, "< :raw :bytes", $mbox->filename or die "couldn't open " . $mbox->filename . " for reading"; while (read $fh, my $buf, $BUFSIZ) { print $buf; } package Mail::Message { use Carp; use Mail::Message::Body; use List::MoreUtils qw(first_index); sub insert_trailers { my ($self, @ts) = @_; my $part = $self->first_text_plain_part; return unless defined $part; my @lines = $part->body->decoded->lines; my $i = my $j = first_index { /^---$/ } @lines; carp "couldn't find cut; not daring insert any trailers", return if $i == -1; $i-- while $i > 0 and $lines[$i - 1] =~ /$trailer_re/; unshift @ts, splice @lines, $i, $j - $i; # algorithm based on Konstantin Ryabitsev's in his get-lore-mbox.py my (@new_ts, %added); foreach my $pat (@trailer_order) { foreach my $t (@ts) { next if exists $added{$t}; next unless $t =~ /^$pat(?:-by)?:/i; push @new_ts, $t; $added{$t} = undef; } } splice @lines, $i, 0, @new_ts; my $body = Mail::Message::Body->new( charset => "PERL", data => \@lines ); $body->encode; $part->body($body); } sub first_text_plain_part { my $self = shift; if ($self->isMultipart) { for ($self->parts("RECURSE")) { return $_ if $_->body->mimeType eq "text/plain"; } } else { return $self if $self->body->mimeType eq "text/plain"; } return; } sub reroll_count { for (shift->_subject_patch_components) { /\Av([0-9]+)\z/ and return $1; } return 1; } sub patch_count { for (shift->_subject_patch_components) { m#\A([0-9]+)/[0-9]+\z# and return $1; } return; } sub _subject_patch_components { my $subject = shift->study("subject"); return unless defined $subject; $subject =~ /$patch_tag_re/; return unless defined $1; split " ", $1; } }