summaryrefslogtreecommitdiff
path: root/archive/bin/reading.py
blob: c4fc6361e5c3bc67df8a4a2d5d41b98f4f7aadd9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python

"""reading.py -- adds stuff to my reading list"""

import subprocess
import sys
import os
from readability.readability import Document
import html2text
import urllib2
import time
import socket
import tempfile
import shutil

READINGDIR = "/home/swhitton/local/reading"
READINGORG = "/home/swhitton/doc/org/reading.org"

def main():
    """Run the script"""
    url = sys.argv[1]
    req = urllib2.Request(url,
                          headers={'User-Agent':
                                   'Mozilla/5.0 (X11; Linux i686; rv:32.0)'
                                   + 'Gecko/20100101 Firefox/32.0'
                                   + 'Iceweasel/32.0'})
    page = urllib2.urlopen(req)
    unreadable_html = page.read()

    # encoding stuff from
    # http://cdn3.brettterpstra.com/downloads/Read2Text1.zip
    try:
        from feedparser import _getCharacterEncoding as enc
    except ImportError:
        enc = lambda x, y: ('utf-8', 1)

    readable_html = Document(unreadable_html).summary().encode('ascii',
                                                               'ignore')
    readable_title = Document(unreadable_html).short_title().encode('ascii',
                                                                    'ignore')

    encoding = enc(page.headers, readable_html)[0]
    if encoding == 'us-ascii': encoding = 'utf-8'
    data = readable_html.decode(encoding)
    data_title = readable_title.decode(encoding)

    h2t = html2text.HTML2Text()
    h2t.ignore_links = True
    markdown = h2t.handle(data)

    filename = (READINGDIR
                + "/"
                + "".join(x if x.isalnum() else "_" for x in readable_title)
                + str(int(time.time()))
                + ".md")

    try:
        os.mkdir(READINGDIR)
    except OSError:
        pass

    with open(filename, 'w') as markdown_file:
        markdown_file.write("## "
                            + data_title.encode('utf8')
                            + "\n\n"
                            + markdown.encode('utf8'))

    org = """
* TODO [[{url}][{title}]]
:PROPERTIES:
:markdown: [[file:{mdfile}]]
:machine:  {hostname}
:END:""".format(url=url,
                title=readable_title,
                mdfile=filename,
                hostname=socket.gethostname())

    with open(READINGORG, 'a') as org_file:
        org_file.write(org)

    workdir = tempfile.mkdtemp()
    os.chdir(workdir)
    subprocess.call(["pandoc", filename, "-o", "article.epub"])
    subprocess.call(["makemobi", "article.epub", readable_title, ""])
    subprocess.call(["sendtokindle", "article.mobi"])
    shutil.rmtree(workdir)

    dbf = open('/home/swhitton/.tmp-dbus-addr', 'r')
    dbv = dbf.readline()
    dbf.close()
    os.environ['DBUS_SESSION_BUS_ADDRESS'] = dbv
    os.environ['DISPLAY'] = "0:0"
    zenerr = open('/tmp/zenityerr', 'a')
    subprocess.Popen(['/usr/bin/notify-send',
                      '--hint=int:transient:1',
                      filename], stderr=zenerr, env=os.environ)
    zenerr.close()

if __name__ == "__main__":
    main()