1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
#!/usr/bin/env python
"""reading.py -- adds stuff to my reading list"""
import subprocess
import sys
import os
from readability.readability import Document
import html2text
import urllib2
import time
import socket
import tempfile
import shutil
READINGDIR = "/home/swhitton/local/reading"
READINGORG = "/home/swhitton/doc/org/reading.org"
def main():
"""Run the script"""
url = sys.argv[1]
req = urllib2.Request(url,
headers={'User-Agent':
'Mozilla/5.0 (X11; Linux i686; rv:32.0)'
+ 'Gecko/20100101 Firefox/32.0'
+ 'Iceweasel/32.0'})
page = urllib2.urlopen(req)
unreadable_html = page.read()
# encoding stuff from
# http://cdn3.brettterpstra.com/downloads/Read2Text1.zip
try:
from feedparser import _getCharacterEncoding as enc
except ImportError:
enc = lambda x, y: ('utf-8', 1)
readable_html = Document(unreadable_html).summary().encode('ascii',
'ignore')
readable_title = Document(unreadable_html).short_title().encode('ascii',
'ignore')
encoding = enc(page.headers, readable_html)[0]
if encoding == 'us-ascii': encoding = 'utf-8'
data = readable_html.decode(encoding)
data_title = readable_title.decode(encoding)
h2t = html2text.HTML2Text()
h2t.ignore_links = True
markdown = h2t.handle(data)
filename = (READINGDIR
+ "/"
+ "".join(x if x.isalnum() else "_" for x in readable_title)
+ str(int(time.time()))
+ ".md")
try:
os.mkdir(READINGDIR)
except OSError:
pass
with open(filename, 'w') as markdown_file:
markdown_file.write("## "
+ data_title.encode('utf8')
+ "\n\n"
+ markdown.encode('utf8'))
org = """
* TODO [[{url}][{title}]]
:PROPERTIES:
:markdown: [[file:{mdfile}]]
:machine: {hostname}
:END:""".format(url=url,
title=readable_title,
mdfile=filename,
hostname=socket.gethostname())
with open(READINGORG, 'a') as org_file:
org_file.write(org)
workdir = tempfile.mkdtemp()
os.chdir(workdir)
subprocess.call(["pandoc", filename, "-o", "article.epub"])
subprocess.call(["makemobi", "article.epub", readable_title, ""])
subprocess.call(["sendtokindle", "article.mobi"])
shutil.rmtree(workdir)
dbf = open('/home/swhitton/.tmp-dbus-addr', 'r')
dbv = dbf.readline()
dbf.close()
os.environ['DBUS_SESSION_BUS_ADDRESS'] = dbv
os.environ['DISPLAY'] = "0:0"
zenerr = open('/tmp/zenityerr', 'a')
subprocess.Popen(['/usr/bin/notify-send',
'--hint=int:transient:1',
filename], stderr=zenerr, env=os.environ)
zenerr.close()
if __name__ == "__main__":
main()
|