From bd274a20b46202f3a7d53a51a117cfd4110c3a67 Mon Sep 17 00:00:00 2001 From: Sean Whitton Date: Sat, 23 Jan 2021 17:18:10 -0700 Subject: new script: gmi2email Signed-off-by: Sean Whitton --- gmi2email | 272 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100755 gmi2email (limited to 'gmi2email') diff --git a/gmi2email b/gmi2email new file mode 100755 index 0000000..c85728b --- /dev/null +++ b/gmi2email @@ -0,0 +1,272 @@ +#!/usr/bin/perl + +# gmi2email -- subscribe to gemlogs and read individual Gemini pages by e-mail + +# Copyright (C) 2021 Sean Whitton +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +use 5.028; +use strict; +use warnings; + +use DBI; +use File::Spec::Functions "catfile"; +use IO::Socket::SSL; +use MIME::Lite; +use Config::Tiny; +use Text::Wrap; +use Try::Tiny; +use Getopt::Long; +use Email::Date::Format "email_date"; +use Time::Local; +use Mail::Field::AddrList; + +my ($from, $to, $do_subs, $inline_images); +GetOptions + "from=s" => \$from, + "to=s" => \$to, + "subscriptions!" => \$do_subs, + "inline-images!" => \$inline_images; + +my $conf_r = $ENV{XDG_CONFIG_HOME} || catfile $ENV{HOME}, ".config"; +my $conf_f = catfile $conf_r, "mailscripts", "gmi2email.config"; +-e $conf_f + or (defined $to and defined $from) + or die + "no config file or sufficient command line options: don't know who to mail"; +my $conf = Config::Tiny->new->read($conf_f); + +my %to_mail_opts = ( + from => ( + $from + or $conf->{_}->{from} + or die "no From: address set in config or on command line" + ), + to => ( + $to + or $conf->{_}->{to} + or die "no To: address set in config or on command line" + ), + inline_images => $inline_images // $conf->{_}->{inline_images} // 0 +); + +@ARGV or $do_subs or die "nothing to do\n"; + +for (@ARGV) { + my $data; + if (-f) { + open my $fh, "<", $_; + $data = [<$fh>]; + } else { + my $type; + ($type, $data) = gemini_fetch($_, abs_links => 1); + $type =~ m{^text/gemini} or die "$_ is not gemtext"; + } + gemtext_to_mail($data, %to_mail_opts)->send; +} + +exit unless $do_subs; + +my $subs_f = catfile $conf_r, "mailscripts", "gmi2email.subscriptions"; +-e $subs_f or die "no list of subscriptions"; +open my $subs_fh, "<", $subs_f; + +my $db_r = $ENV{XDG_CACHE_HOME} || catfile $ENV{HOME}, ".cache"; +my $db_d = catfile $db_r, "mailscripts"; +-d $db_d or mkdir $db_d; +my $db_f = catfile $db_d, "gmi2email.db"; +my $dbh = DBI->connect("dbi:SQLite:dbname=$db_f", "", ""); +$dbh->do("CREATE TABLE IF NOT EXISTS seen (uri TEXT PRIMARY KEY)") + or die "failed to initialise database"; + +foreach my $sub (<$subs_fh>) { + chomp $sub; + my ($gemlog, $type, $data); + #<<< + try { + ($type, $data) = gemini_fetch($sub, abs_links => 1); + } catch { + my ($code) = /"gemini error: ([1-6])/; + if ($code == 4) { + warn "temporary failure retrieving $sub"; + next; # try again next run + } else { + die "while retrieving $sub $_"; + } + }; + #>>> + $type =~ m{^text/gemini} or die "$sub is not gemtext"; + for (@$data) { + if (/^#\s*/ and not $gemlog) { + chomp($gemlog = $'); + } elsif (my ($uri, $y, $m, $d, $title) + = /^=>\s*(\S+)\s+([0-9]{4})-([0-9]{2})-([0-9]{2})[\s-]*(.*)/) { + my ($rows) + = $dbh->selectrow_array( + "SELECT COUNT(*) FROM seen WHERE uri = \"$uri\""); + if ($rows == 0) { + my $mail = 1; + my ($type, $data); + #<<< + try { + ($type, $data) = gemini_fetch($uri, abs_links => 1); + } catch { + warn "when fetching $uri, $_"; + my ($code) = /"gemini error: ([1-6])/; + if ($code == 4) { + next; # try again next run + } else { + $mail = 0; # don't try this one again + } + }; + #>>> + $dbh->do("INSERT INTO seen VALUES (\"$uri\")"); + $mail or next; + if ($type =~ m{^text/gemini}) { + gemtext_to_mail( + $data, %to_mail_opts, + gemlog => $gemlog // "unknown gemlog", + link_title => $title, + date => email_date timelocal 0, + 0, 12, $d, $m - 1, $y + )->send; + } else { + warn "$uri is not gemtext"; + } + } + } + } +} + +sub gemini_fetch { + my ($uri, %opts) = @_; + + # regexp from Alex Schroeder's moku-pona program + my ($scheme, $authority, $path, $query, $fragment) + = $uri + =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; + $scheme and $scheme eq "gemini" + or die "'$uri' does not use the gemini scheme"; + $authority or die "'$uri' lacks an authority"; + my ($host, $port) = split ":", $authority; + my $cl = IO::Socket::SSL->new( + PeerHost => $host, + PeerPort => $port // 1965, + SSL_verify_mode => SSL_VERIFY_NONE + ); + print $cl "$uri\r\n"; + + my ($status, $meta) = <$cl> =~ /^([0-9]+) (.+)/; + 20 <= $status and $status < 30 or die "gemini error: $status $meta"; + + if ($meta =~ "^text/gemini") { + my @lines; + if ($opts{abs_links}) { + $authority =~ m{/$} or $authority .= "/"; + $path =~ m{/$} or $path .= "/"; + for (<$cl>) { + s/\r?\n\z//; + if (m{^=> (?!/)} and not m{^=> [a-z]+://}) { + push @lines, "=> gemini://$authority$path$'"; + } elsif (m{^=> /}) { + push @lines, "=> gemini://$authority$'"; + } else { + push @lines, $_; + } + } + } else { + @lines = <$cl>; + } + push @lines, "" unless $lines[$#lines] eq ""; + push @lines, "Retrieved from $uri\n at " . localtime; + return $meta, \@lines; + } else { + return $meta, do { local $/; <$cl> }; + } +} + +sub gemtext_to_mail { + my ($gemtext, %opts) = @_; + $opts{from} or die "no From: address specified"; + $opts{to} or die "no To: address specified"; + + my $subject = $opts{link_title} // ""; + if ($gemtext->[0] =~ m{^#(?!#)\s*}) { + $subject = $'; + shift @$gemtext; + shift @$gemtext while $gemtext->[0] =~ /^$/; + } + + if ($opts{gemlog}) { + $opts{from} + = Mail::Field->new("From")->create($opts{from}, $opts{gemlog}) + ->stringify; + $subject = "$opts{gemlog}: $subject" if $subject; + } + + my $msg = MIME::Lite->new( + From => $opts{from}, + To => $opts{to}, + Subject => $subject, + Type => "multipart/mixed" + ); + $msg->add(Date => $opts{date}) if $opts{date}; + + my ($pre, @buffer); + my $flush = sub { + return unless @buffer; + $msg->attach(Type => "TEXT", Data => join "\r\n", @buffer); + undef @buffer; + }; + my $pad + = sub { push @buffer, "" unless !@buffer or $buffer[$#buffer] eq "" }; + for (@$gemtext) { + if ($pre) { + if (/^```/) { + $pre = 0; + } else { + push @buffer, " $_"; + } + } elsif (/^```/) { + &$pad; + $pre = 1; + } elsif (/^>\s*/) { + &$pad; + push @buffer, split "\n", wrap "> ", "> ", $'; + } elsif (/^\*\s*/) { + &$pad; + push @buffer, split "\n", wrap "• ", " ", $'; + } elsif ($opts{inline_images} + and my ($uri) = /^=>\s(\S+\.(?:jpg|jpeg|png|gif))\s/) { + &$flush; + my ($type, $data) = gemini_fetch($uri); + $msg->attach( + Type => $type, + Data => $data, + Filename => (split "/", $uri)[-1], + Disposition => "inline" + ); + } elsif (/^(?:=>|#+)/) { + &$pad; + push @buffer, $_; + } else { + &$pad; + push @buffer, split "\n", wrap "", "", $_; + } + } + + &$flush; + return $msg; +} -- cgit v1.2.3