commit 5a5d10ddc8ffc58403a4469fa04edf781148e9d7
Author: Christoph Lohmann <20h@r-36.net>
Date: Sun, 9 Mar 2014 18:26:25 +0100
Initial commit of Zeitungsschau.
Diffstat:
feed.py | | | 170 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
feeddb.py | | | 180 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
feedemail.py | | | 97 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
opml.py | | | 51 | +++++++++++++++++++++++++++++++++++++++++++++++++++ |
zs.py | | | 122 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
5 files changed, 620 insertions(+), 0 deletions(-)
diff --git a/feed.py b/feed.py
@@ -0,0 +1,170 @@
+#
+# Copy me if you can.
+# by 20h
+#
+
+from lxml import objectify
+from datetime import datetime
+import dateutil.parser
+import urllib.request, urllib.parse, urllib.error
+
+def parseiso(dstr):
+ return dateutil.parser.parse(str(dstr))
+
+def removenamespaces(xml):
+ for key in xml.nsmap:
+ nsstr = u'{%s}' % (xml.nsmap[key])
+ nsl = len(nsstr)
+
+ for elem in xml.getiterator():
+ if elem.tag.startswith(nsstr):
+ elem.tag = elem.tag[nsl:]
+
+def parsexml(astr):
+ xml = objectify.fromstring(astr)
+ removenamespaces(xml)
+ return xml
+
+def parse(astr):
+ xml = parsexml(astr)
+
+ feed = {}
+ articles = []
+ isrss = False
+ isrdf = False
+
+ if hasattr(xml, "channel"):
+ if hasattr(xml, "item"):
+ isrdf = True
+ oxml = xml
+ xml = xml.channel
+ isrss = True
+
+ feed["title"] = ""
+ for e in ("title", "description"):
+ if hasattr(xml, e):
+ feed[e] = str(xml[e])
+
+ if hasattr(xml, "image") and hasattr(xml.image, "title"):
+ if "title" not in feed:
+ feed["title"] = str(xml.image.title)
+
+ if hasattr(xml, "updated"):
+ feed["updated"] = parseiso(xml.updated)
+ elif hasattr(xml, "pubDate"):
+ feed["updated"] = parseiso(xml.pubDate)
+ elif hasattr(xml, "lastBuildDate"):
+ feed["updated"] = parseiso(xml.lastBuildDate)
+ else:
+ feed["updated"] = datetime.now()
+
+ if hasattr(xml, "link"):
+ if "href" in xml.link.attrib:
+ feed["link"] = str(xml.link.attrib["href"])
+ else:
+ feed["link"] = str(xml.link)
+
+ if hasattr(xml, "webmaster"):
+ feed["email"] = str(xml.webmaster)
+ elif hasattr(xml, "owner") and hasattr(xml.owner, "email"):
+ feed["email"] = str(xml.owner.email)
+ elif hasattr(xml, "author") and hasattr(xml.author, "email"):
+ feed["email"] = str(xml.author.email)
+ elif hasattr(xml, "webMaster"):
+ feed["email"] = str(xml.webMaster)
+ elif hasattr(xml, "managingeditor"):
+ feed["email"] = str(xml.managingeditor)
+ elif hasattr(xml, "managingEditor"):
+ feed["email"] = str(xml.managingEditor)
+
+ if hasattr(xml, "author"):
+ if hasattr(xml.author, "name"):
+ feed["author"] = str(xml.author.name)
+ else:
+ feed["author"] = str(xml.author)
+ elif hasattr(xml, "creator"):
+ feed["author"] = str(xml.creator)
+
+ entryname = "entry"
+ if isrss == True or isrdf == True:
+ entryname = "item"
+ if isrdf == True:
+ xml = oxml
+ if hasattr(xml, entryname):
+ for entry in xml[entryname][:]:
+ article = {}
+ # title
+ if hasattr(entry, "title"):
+ article["title"] = str(entry["title"])
+
+ # link
+ if hasattr(entry, "link"):
+ if "href" in entry.link.attrib:
+ article["link"] = str(entry.link.attrib["href"])
+ else:
+ article["link"] = str(entry.link)
+ elif hasattr(entry, "source"):
+ article["link"] = str(entry.source)
+
+ # id
+ if hasattr(entry, "id"):
+ article["id"] = str(entry["id"])
+
+ # enclosure
+ if hasattr(entry, "enclosure"):
+ if "href" in entry.enclosure.attrib:
+ article["file"] = \
+ str(entry.enclosure.attrib["href"])
+ elif "url" in entry.enclosure.attrib:
+ article["file"] = \
+ str(entry.enclosure.attrib["url"])
+ else:
+ article["file"] = str(entry.enclosure)
+
+ # updated
+ if hasattr(entry, "updated"):
+ article["updated"] = parseiso(entry.updated)
+ elif hasattr(entry, "pubDate"):
+ article["updated"] = parseiso(entry.pubDate)
+ elif hasattr(entry, "date"):
+ article["updated"] = parseiso(entry.date)
+ else:
+ article["updated"] = datetime.now()
+
+ # author
+ if hasattr(entry, "author"):
+ if hasattr(entry.author, "name"):
+ article["author"] = str(entry.author.name)
+ else:
+ article["author"] = str(entry.author)
+ elif hasattr(entry, "creator"):
+ article["author"] = str(entry.creator)
+
+ # tags
+ if hasattr(entry, "category"):
+ article["tags"] = []
+ for cat in entry["category"][:]:
+ article["tags"].append(str(cat))
+
+ # text
+ if hasattr(entry, "encoded"):
+ article["text"] = str(entry.encoded)
+ elif hasattr(entry, "content"):
+ article["text"] = str(entry.content)
+ elif hasattr(entry, "summary"):
+ article["text"] = str(entry.summary)
+ elif hasattr(entry, "description"):
+ article["text"] = str(entry.description)
+
+ articles.append(article)
+ feed["articles"] = articles
+
+ return feed
+
+class feedopener(urllib.request.FancyURLopener):
+ version = "Zeitungsschau/1.0"
+urllib.request._urlopener = feedopener
+
+def fetch(uri):
+ return parse(urllib.request.urlopen(uri).read())
+
diff --git a/feeddb.py b/feeddb.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+# coding=utf-8
+#
+# Copy me if you can.
+# by 20h
+#
+
+import shelve
+import os
+import os.path
+import fcntl
+from subprocess import Popen
+
+class feeddb(object):
+ db = None
+ lockf = None
+ feeds = {}
+ cfg = {}
+
+ def __init__(self, path="~/.zs/feed.db", email=None):
+ dbpath = os.path.expanduser(path)
+ path = os.path.abspath(os.path.dirname(dbpath))
+ if not os.path.exists(path):
+ os.makedirs(path, 0o750)
+ lockpath = "%s.lck" % (dbpath)
+ self.lockf = open(lockpath, "w")
+ fcntl.lockf(self.lockf.fileno(), fcntl.LOCK_EX)
+ self.db = shelve.open(dbpath)
+ if "feeds" in self.db:
+ self.feeds = self.db["feeds"]
+ if "cfg" in self.db:
+ self.cfg = self.db["cfg"]
+
+ if not "email" in self.cfg:
+ print("You need to specify the default email. Please "\
+ "run 'zs cfg email me@me.com' to "\
+ "set it.")
+
+ if not "smtphost" in self.cfg:
+ self.cfg["smtphost"] = "localhost"
+ if not "smtpport" in self.cfg:
+ self.cfg["smtpport"] = None
+ if not "smtpssl" in self.cfg:
+ self.cfg["smtpssl"] = False
+ if not "smtpuser" in self.cfg:
+ self.cfg["smtpuser"] = None
+ if not "smtppassword" in self.cfg:
+ self.cfg["smtppassword"] = None
+
+ def __del__(self):
+ if self.db != None:
+ self.db["feeds"] = self.feeds
+ self.db["cfg"] = self.cfg
+ self.db.close()
+ if self.lockf != None:
+ fcntl.flock(self.lockf.fileno(), fcntl.LOCK_UN)
+ self.lockf.close()
+
+ def readfeed(self, uri):
+ if not uri in self.feeds:
+ return None
+ return self.feeds[uri]
+
+ def writefeed(self, uri, feed):
+ self.feeds[uri] = feed
+
+ def sethook(self, uri, hookfile):
+ feed = self.readfeed(uri)
+ if feed == None:
+ return
+ feed["hook"] = hookfile
+ self.writefeed(uri, feed)
+
+ def runhook(self, uri):
+ feed = self.readfeed(uri)
+ if feed == None:
+ return
+ if not "hook" in feed:
+ return
+
+ cmd = os.path.expanduser(feed["hook"])
+ if not os.path.exists(cmd):
+ return
+
+ fd = open("/dev/null")
+ if os.fork() == 0:
+ p = Popen(cmd, shell=True, stdout=fd, stderr=fd)
+ p.wait()
+
+ def setfeedval(self, uri, key, value):
+ feed = self.readfeed(uri)
+ if feed == None:
+ return
+ feed[key] = value
+ self.writefeed(uri, feed)
+
+ def pause(self, uri):
+ self.setfeedval(uri, "pause", True)
+
+ def unpause(self, uri):
+ self.setfeedval(uri, "pause", False)
+
+ def addfeed(self, uri, email=None):
+ if not uri in self.feeds:
+ feed = {}
+ if email == None:
+ feed["toemail"] = self.cfg["email"]
+ else:
+ feed["toemail"] = email
+ feed["uri"] = uri
+ feed["pause"] = False
+ feed["articles"] = []
+ self.writefeed(uri, feed)
+
+ def delfeed(self, uri):
+ if uri in self.feeds:
+ del self.feeds[uri]
+
+ def listfeeds(self):
+ return list(self.feeds.keys())
+
+ def listactivefeeds(self):
+ rfeeds = []
+ for f in self.feeds:
+ if self.feeds[f]["pause"] == False:
+ rfeeds.append(f)
+ return rfeeds
+
+ def mergefeed(self, uri, curfeed):
+ rarticles = []
+ feed = self.readfeed(uri)
+ if feed == None:
+ return curfeed
+
+ history = feed["articles"]
+ for article in curfeed["articles"]:
+ if not article in history:
+ article["unread"] = True
+ history.append(article)
+ rarticles.append(article)
+ feed["articles"] = history
+
+ for metakey in ("link", "title", "updated", "author", \
+ "email"):
+ if metakey in curfeed:
+ feed[metakey] = curfeed[metakey]
+
+ self.writefeed(uri, feed)
+ curfeed["articles"] = rarticles
+
+ return curfeed
+
+ def unreadarticles(self, uri):
+ rfeed = {}
+ rfeed["articles"] = []
+ feed = self.readfeed(uri)
+ if feed == None:
+ return rfeed
+
+ for metakey in ("link", "title", "updated", "author", \
+ "email", "toemail"):
+ if metakey in feed:
+ rfeed[metakey] = feed[metakey]
+
+ history = feed["articles"]
+ for article in history:
+ if article["unread"] == True:
+ rfeed["articles"].append(article)
+
+ return rfeed
+
+ def setreadarticles(self, uri, curfeed=None):
+ feed = self.readfeed(uri)
+ if feed == None:
+ return
+
+ for article in curfeed["articles"]:
+ if article in feed["history"]:
+ article["unread"] == False
+
diff --git a/feedemail.py b/feedemail.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# coding=utf-8
+#
+# Copy me if you can.
+# by 20h
+#
+
+import smtplib
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from email.utils import formataddr, formatdate, parseaddr
+from email.header import Header
+
+import html2text as h2t
+h2t.UNICODE_SNOB = 1
+h2t.LINKS_EACH_PARAGRAPH = 0
+h2t.BODY_WIDTH = 0
+h2t.INLINE_LINKS = 0
+html2text = h2t.html2text
+
+def normalizeheader(hstr):
+ return hstr.replace("\n", " ").strip()
+
+def send(feed, to, smtphost="localhost", smtpport=None, ssl=False, \
+ user=None, password=None):
+ articles = feed["articles"]
+
+ for article in articles:
+ if "text" in article:
+ text = html2text(article["text"])
+ else:
+ text = ""
+
+ if "title" in article:
+ subject = Header( \
+ normalizeheader(article["title"]),\
+ "utf-8")
+ else:
+ subject = Header(normalizeheader(text[:70]),\
+ "utf-8")
+
+ # Append metadata.
+ if "link" in article:
+ text = "%sLink: %s\n" % (text, article["link"])
+ if "file" in article:
+ text = "%sEnclosure: %s\n" % (text, article["file"])
+
+ msg = MIMEText(text, "plain", "utf-8")
+
+ if "email" in feed:
+ faddr = feed["email"]
+ else:
+ faddr = "none@none.no"
+ if "title" in feed:
+ if "author" in article:
+ fname = "%s: %s" % (feed["title"], \
+ article["author"])
+ else:
+ fname = feed["title"]
+
+ msg["From"] = formataddr((fname, faddr))
+ msg["To"] = formataddr(parseaddr(to))
+ msg["Date"] = formatdate()
+ msg["Subject"] = subject
+
+ if "link" in article:
+ msg["X-RSS-URL"] = article["link"]
+ if "link" in feed:
+ msg["X-RSS-Feed"] = feed["link"]
+ if "id" in article:
+ msg["X-RSS-ID"] = article["id"]
+ if "tags" in article:
+ msg["X-RSS-TAGS"] = Header(",".join(article["tags"]),\
+ "utf-8")
+ msg["User-Agent"] = "Zeitungsschau"
+
+ print(msg.as_string())
+
+ if ssl == True:
+ s = smtplib.SMTP_SSL()
+ else:
+ s = smtplib.SMTP()
+ if smtpport != None:
+ s.connect(smtphost, smtpport)
+ else:
+ s.connect(smtphost)
+
+ if user != None and password != None:
+ s.ehlo()
+ if ssl == False:
+ s.starttls()
+ s.ehlo()
+ s.login(user, password)
+
+ s.sendmail(faddr, to, msg.as_string())
+ s.quit()
+
diff --git a/opml.py b/opml.py
@@ -0,0 +1,51 @@
+#
+# Copy me if you can.
+# by 20h
+#
+
+from lxml import etree
+from datetime import datetime
+
+def read(ostr):
+ parser = etree.XMLParser(recover=True, encoding='utf-8')
+ xml = etree.fromstring(ostr, parser)
+
+ rssfeeds = []
+
+ feeds = xml.xpath("//outline")
+ for feed in feeds:
+ if "xmlUrl" in feed.attrib:
+ rssfeeds.append(feed.attrib["xmlUrl"])
+ elif "text" in feed.attrib:
+ rssfeeds.append(feed.attrib["text"])
+
+ return rssfeeds
+
+def write(rssfeeds):
+ opmle = etree.Element("opml")
+
+ heade = etree.SubElement(opmle, "head")
+ titlee = etree.SubElement(heade, "title")
+
+ daten = datetime.now().strftime("%Y-%m-%dT%H:%M:%S%Z")
+ datece = etree.SubElement(heade, "dateCreated")
+ datece.text = daten
+ dateme = etree.SubElement(heade, "dateModified")
+ dateme.text = daten
+ ownerne = etree.SubElement(heade, "ownerName")
+ ownerne.text = "Me"
+ docse = etree.SubElement(heade, "docs")
+ docse.text = "http://dev.opml.org/spec2.html"
+
+ bodye = etree.SubElement(opmle, "body")
+
+ for rss in rssfeeds:
+ outlinee = etree.SubElement(bodye, "outline")
+ outlinee.attrib["type"] = "rss"
+ outlinee.attrib["text"] = rss
+ outlinee.attrib["xmlUrl"] = rss
+
+ return etree.tostring(opmle, encoding="utf-8", \
+ pretty_print=True, \
+ xml_declaration=True).decode("utf-8")
+
diff --git a/zs.py b/zs.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+# coding=utf-8
+#
+# Copy me if you can.
+# by 20h
+#
+
+import sys
+import os
+import feed
+import feeddb
+import opml
+import feedemail
+
+def run(db, selfeed=None):
+ feeduris = db.listfeeds()
+
+ if feeduris != None and selfeed in feeduris:
+ feeduris = [selfeed]
+ print("feeduris: %s" % (feeduris))
+
+ for feeduri in feeduris:
+ curfeed = feed.fetch(feeduri)
+ print("curfeed: %s" % (curfeed))
+ db.mergefeed(feeduri, curfeed)
+ ufeed = db.unreadarticles(feeduri)
+ print("unread: %s" % (ufeed))
+
+ if "toemail" in ufeed:
+ toemail = ufeed["toemail"]
+ else:
+ toemail = db.cfg["email"]
+ feedemail.send(ufeed, toemail, db.cfg["smtphost"], \
+ db.cfg["smtpport"], db.cfg["smtpssl"], \
+ db.cfg["smtpuser"], db.cfg["smtppassword"])
+ db.setreadarticles(feeduri, ufeed)
+
+def usage(app):
+ app = os.path.basename(app)
+ sys.stderr.write("usage: %s [-h] cmd\n" % (app))
+ sys.exit(1)
+
+def main(args):
+ retval = 0
+
+ if len(args) < 2:
+ usage(args[0])
+
+ db = feeddb.feeddb()
+
+ if args[1] == "run":
+ if len(args) > 2:
+ run(db, args[2])
+ else:
+ run(db)
+
+ elif args[1] == "cfg":
+ if len(args) < 3:
+ for k in db.cfg:
+ print("%s = '%s'" % (k, db.cfg[k]))
+ elif len(args) < 4:
+ if args[2] in db.cfg:
+ print("%s = '%s'" % (args[2], \
+ db.cfg[args[2]]))
+ else:
+ retval = 1
+ else:
+ db.cfg[args[2]] = args[3]
+ print("%s = '%s'" % (args[2], db.cfg[args[2]]))
+
+ elif args[1] == "add":
+ if len(args) < 3:
+ usage(args[0])
+ email = None
+ if len(args) > 3:
+ email = args[3]
+ db.addfeed(args[2], email)
+
+ elif args[1] == "list":
+ for f in db.listfeeds():
+ print(f)
+
+ elif args[1] == "delete":
+ if len(args) < 3:
+ usage(args[0])
+ db.delfeed(args[1])
+
+ elif args[1] == "pause":
+ if len(args) < 3:
+ usage(args[0])
+ db.pause(args[2])
+
+ elif args[1] == "unpause":
+ if len(args) < 3:
+ usage(args[0])
+ db.unpause(args[2])
+
+ elif args[1] == "opmlexport":
+ if len(args) > 2:
+ filen = open(args[2], "w")
+ else:
+ filen = sys.stdout
+ filen.write(opml.write(db.listfeeds()))
+
+ elif args[1] == "opmlimport":
+ if len(args) > 2:
+ filen = open(args[2], "r")
+ else:
+ filen = sys.stdin
+ feedlist = db.listfeeds()
+ nfeedlist = opml.read(filen.read().encode("utf-8"))
+ for f in nfeedlist:
+ if not f in feedlist:
+ print("import feed: %s" % (f))
+ db.addfeed(f)
+
+ del db
+ return retval
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv))
+