commit c1d3567569feb6b1cbb4513fa52c10839e1a9598
parent 8faa15f0c27e430b19c4137a2b523791efaeb45d
Author: Christoph Lohmann <20h@r-36.net>
Date: Sun, 17 Aug 2014 17:53:53 +0200
Major overhaul.
What has changed:
* Now there is setup.py, which works, using dependency handling.
* Zeitungsschau is more restrictive.
* Only import the last 64 articles in chronological order.
* Only manage the last 2048 articles, which keeps the database small.
* Allow local smtp delivery, which is faster and does bypass silly filters.
* Now use getopt to handle a debug flag.
Diffstat:
Makefile | | | 45 | --------------------------------------------- |
config.mk | | | 14 | -------------- |
feed.py | | | 213 | ------------------------------------------------------------------------------- |
feeddb.py | | | 239 | ------------------------------------------------------------------------------- |
feedemail.py | | | 102 | ------------------------------------------------------------------------------- |
opml.py | | | 53 | ----------------------------------------------------- |
setup.py | | | 46 | ++++++++++++++++++++++++++++++++++++++++++++++ |
zeitungsschau/__init__.py | | | 0 | |
zeitungsschau/feed.py | | | 226 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
zeitungsschau/feeddb.py | | | 275 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
zeitungsschau/feedemail.py | | | 127 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
zeitungsschau/opml.py | | | 53 | +++++++++++++++++++++++++++++++++++++++++++++++++++++ |
zs | | | 241 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
zs.py | | | 215 | ------------------------------------------------------------------------------- |
14 files changed, 968 insertions(+), 881 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,45 +0,0 @@
-# zeitungsschau - rss2email
-# See LICENSE file for copyright and license details.
-
-include config.mk
-
-SRC = ${NAME}.py
-OBJ = ${SRC:.py=.exe}
-
-all: options ${NAME}
-
-options:
- @echo ${NAME} build options:
- @echo "CC = ${CC}"
-
-${OBJ}: config.mk
-
-${NAME}: ${OBJ}
- @echo CC -o $@.py
- @${CC} $@.py
-
-clean:
- @echo cleaning
- @rm -f ${NAME}.exe __pycache__ zs.build ${NAME}-${VERSION}.tar.gz
- @find bin -type l -exec rm {} \;
-
-dist: clean
- @echo creating dist tarball
- @mkdir -p ${NAME}-${VERSION}
- @cp -R LICENSE README.md config.mk Makefile \
- *.py ${NAME}-${VERSION}
- @tar -cf ${NAME}-${VERSION}.tar ${NAME}-${VERSION}
- @gzip ${NAME}-${VERSION}.tar
- @rm -rf ${NAME}-${VERSION}
-
-install: all
- @echo installing executable files to ${DESTDIR}${PREFIX}/bin
- @mkdir -p ${DESTDIR}${PREFIX}/bin
- @cp zs.exe ${DESTDIR}${PREFIX}/bin/zs
-
-uninstall:
- @echo removing executable files from ${DESTDIR}${PREFIX}/bin
- @rm -f ${DESTDIR}${PREFIX}/bin/zs
-
-.PHONY: all options clean dist install uninstall
-
diff --git a/config.mk b/config.mk
@@ -1,14 +0,0 @@
-# Zeitungsschau metadata
-NAME = zs
-VERSION = 0.1
-
-# Customize below to fit your system
-
-# paths
-PREFIX = /usr/local
-MANPREFIX = ${PREFIX}/share/man
-
-# compiler and linker
-CC = nuitka --recurse-all --recurse-directory --show-modules \
- --show-progress
-
diff --git a/feed.py b/feed.py
@@ -1,213 +0,0 @@
-#
-# See LICENSE for licensing details.
-#
-# Copy me if you can.
-# by 20h
-#
-
-from lxml import objectify
-from lxml import etree
-from datetime import datetime
-import dateutil.parser
-import urllib.request, urllib.parse, urllib.error
-
-def parseiso(dstr):
- return dateutil.parser.parse(str(dstr))
-
-def removenamespaces(xml):
- for key in xml.nsmap:
- nsstr = u'{%s}' % (xml.nsmap[key])
- nsl = len(nsstr)
-
- for elem in xml.getiterator():
- if elem.tag.startswith(nsstr):
- elem.tag = elem.tag[nsl:]
-
-def parsexml(astr):
- try:
- xml = objectify.fromstring(astr)
- removenamespaces(xml)
- except etree.XMLSyntaxError:
- try:
- parser = etree.HTMLParser()
- xml = objectify.fromstring(astr)
- removenamespaces(xml)
- except etree.XMLSyntaxError:
- return None
- return xml
-
-def parse(astr):
- xml = parsexml(astr)
- if xml == None:
- return None
-
- feed = {}
- articles = []
- isrss = False
- isrdf = False
- now = datetime.now()
-
- if hasattr(xml, "channel"):
- if hasattr(xml, "item"):
- isrdf = True
- oxml = xml
- xml = xml.channel
- isrss = True
-
- feed["title"] = ""
- for e in ("title", "description"):
- if hasattr(xml, e):
- feed[e] = str(xml[e])
-
- if hasattr(xml, "image") and hasattr(xml.image, "title"):
- if "title" not in feed:
- feed["title"] = str(xml.image.title)
-
- if hasattr(xml, "updated"):
- feed["updated"] = parseiso(xml.updated)
- elif hasattr(xml, "pubDate"):
- feed["updated"] = parseiso(xml.pubDate)
- elif hasattr(xml, "lastBuildDate"):
- feed["updated"] = parseiso(xml.lastBuildDate)
- else:
- feed["updated"] = now
-
- if hasattr(xml, "link"):
- if "href" in xml.link.attrib:
- feed["link"] = str(xml.link.attrib["href"])
- else:
- feed["link"] = str(xml.link)
-
- if hasattr(xml, "webmaster"):
- feed["email"] = str(xml.webmaster)
- elif hasattr(xml, "owner") and hasattr(xml.owner, "email"):
- feed["email"] = str(xml.owner.email)
- elif hasattr(xml, "author") and hasattr(xml.author, "email"):
- feed["email"] = str(xml.author.email)
- elif hasattr(xml, "webMaster"):
- feed["email"] = str(xml.webMaster)
- elif hasattr(xml, "managingeditor"):
- feed["email"] = str(xml.managingeditor)
- elif hasattr(xml, "managingEditor"):
- feed["email"] = str(xml.managingEditor)
-
- if hasattr(xml, "author"):
- if hasattr(xml.author, "name"):
- feed["author"] = str(xml.author.name)
- else:
- feed["author"] = str(xml.author)
- elif hasattr(xml, "creator"):
- feed["author"] = str(xml.creator)
-
- entryname = "entry"
- if isrss == True or isrdf == True:
- entryname = "item"
- if isrdf == True:
- xml = oxml
- if hasattr(xml, entryname):
- for entry in xml[entryname][:]:
- article = {}
- # title
- if hasattr(entry, "title"):
- article["title"] = str(entry["title"])
-
- # link
- if hasattr(entry, "link"):
- if "href" in entry.link.attrib:
- article["link"] = str(entry.link.attrib["href"])
- else:
- article["link"] = str(entry.link)
- elif hasattr(entry, "source"):
- article["link"] = str(entry.source)
-
- # enclosure
- if hasattr(entry, "enclosure"):
- if "href" in entry.enclosure.attrib:
- article["file"] = \
- str(entry.enclosure.attrib["href"])
- elif "url" in entry.enclosure.attrib:
- article["file"] = \
- str(entry.enclosure.attrib["url"])
- else:
- article["file"] = str(entry.enclosure)
-
- if hasattr(entry, "group") and \
- hasattr(entry.group, "content"):
- if "url" in entry.group.content:
- article["file"] = \
- str(entry.group.content.\
- attrib["file"])
-
- # updated
- if hasattr(entry, "updated"):
- article["updated"] = parseiso(entry.updated)
- elif hasattr(entry, "pubDate"):
- article["updated"] = parseiso(entry.pubDate)
- elif hasattr(entry, "date"):
- article["updated"] = parseiso(entry.date)
- else:
- article["updated"] = now
-
- # author
- if hasattr(entry, "author"):
- if hasattr(entry.author, "name"):
- article["author"] = str(entry.author.name)
- else:
- article["author"] = str(entry.author)
- elif hasattr(entry, "creator"):
- article["author"] = str(entry.creator)
-
- # tags
- if hasattr(entry, "category"):
- article["tags"] = []
- for cat in entry["category"][:]:
- article["tags"].append(str(cat))
-
- # text
- if hasattr(entry, "encoded"):
- article["text"] = str(entry.encoded)
- elif hasattr(entry, "content"):
- article["text"] = str(entry.content)
- elif hasattr(entry, "summary"):
- article["text"] = str(entry.summary)
- elif hasattr(entry, "description"):
- article["text"] = str(entry.description)
-
- # id
- if hasattr(entry, "id"):
- article["id"] = str(entry["id"])
- else:
- if "link" in article:
- article["id"] = article["link"]
- elif "file" in article:
- article["id"] = article["file"]
- else:
- article["id"] = article["text"][:30]
-
- if article["updated"] == now:
- article["uuid"] = ""
- else:
- article["uuid"] = "%s" % (article["updated"])
- for e in ("id", "title", "file"):
- if e in article:
- article["uuid"] = "%s-%s" % \
- (article["uuid"],\
- article[e])
-
- # sanity checks
- if "title" not in article and "text" not in article \
- and "file" not in article:
- continue
-
- articles.append(article)
- feed["articles"] = articles
-
- return feed
-
-class feedopener(urllib.request.FancyURLopener):
- version = "Zeitungsschau/1.0"
-urllib.request._urlopener = feedopener
-
-def fetch(uri):
- return parse(urllib.request.urlopen(uri, timeout=5).read())
-
diff --git a/feeddb.py b/feeddb.py
@@ -1,239 +0,0 @@
-#
-# See LICENSE for licensing details.
-#
-# Copy me if you can.
-# by 20h
-#
-
-import pickle
-import os
-import os.path
-import fcntl
-from subprocess import Popen
-
-class feeddb(object):
- db = None
- lockf = None
- feeds = {}
- cfg = {}
- dbpath = ""
- lpath = ""
-
- def __init__(self, path="~/.zs/feed.db", email=None):
- self.dbpath = os.path.expanduser(path)
- path = os.path.abspath(os.path.dirname(self.dbpath))
- if not os.path.exists(path):
- os.makedirs(path, 0o750)
- self.lpath = "%s.lck" % (self.dbpath)
- self.lockf = open(self.lpath, "w")
- fcntl.lockf(self.lockf.fileno(), fcntl.LOCK_EX)
-
- try:
- fd = open(self.dbpath, "rb")
- self.db = pickle.load(fd)
- fd.close()
- except FileNotFoundError:
- self.db = {}
-
- if "feeds" in self.db:
- self.feeds = self.db["feeds"]
- if "cfg" in self.db:
- self.cfg = self.db["cfg"]
-
- if not "email" in self.cfg:
- print("You need to specify the default email. Please "\
- "run 'zs cfg email me@me.com' to "\
- "set it.")
-
- if not "smtphost" in self.cfg:
- self.cfg["smtphost"] = "localhost"
- if not "smtpport" in self.cfg:
- self.cfg["smtpport"] = None
- if not "smtpssl" in self.cfg:
- self.cfg["smtpssl"] = False
- if not "smtpuser" in self.cfg:
- self.cfg["smtpuser"] = None
- if not "smtppassword" in self.cfg:
- self.cfg["smtppassword"] = None
-
- def __del__(self):
- if self.db != None:
- self.db["feeds"] = self.feeds
- self.db["cfg"] = self.cfg
- fd = open(self.dbpath, "wb+")
- pickle.dump(self.db, fd)
- fd.close()
- if self.lockf != None:
- fcntl.flock(self.lockf.fileno(), fcntl.LOCK_UN)
- self.lockf.close()
- os.remove(self.lpath)
-
- def readfeed(self, uri):
- if not uri in self.feeds:
- return None
- return self.feeds[uri]
-
- def writefeed(self, uri, feed):
- self.feeds[uri] = feed
-
- def sethook(self, uri, hookfile):
- feed = self.readfeed(uri)
- if feed == None:
- return
- feed["hook"] = hookfile
- self.writefeed(uri, feed)
-
- def runhook(self, uri):
- feed = self.readfeed(uri)
- if feed == None:
- return
- if not "hook" in feed:
- return
-
- cmd = os.path.expanduser(feed["hook"])
- if not os.path.exists(cmd):
- return
-
- fd = open("/dev/null")
- if os.fork() == 0:
- p = Popen(cmd, shell=True, stdout=fd, stderr=fd)
- p.wait()
-
- def setfeedval(self, uri, key, value):
- feed = self.readfeed(uri)
- if feed == None:
- return
- feed[key] = value
- self.writefeed(uri, feed)
-
- def getfeedval(self, uri, key):
- feed = self.readfeed(uri)
- if feed == None:
- return None
- if key not in feed:
- return None
- return feed[key]
-
- def setretry(self, uri, retries):
- self.setfeedval(uri, "retry", retries)
-
- def getretry(self, uri):
- retries = self.getfeedval(uri, "retry")
- if retries == None:
- return 0
- else:
- return retries
-
- def pause(self, uri):
- self.setfeedval(uri, "pause", True)
-
- def unpause(self, uri):
- self.setfeedval(uri, "pause", False)
- self.setretry(uri, 0)
-
- def ispaused(self, uri):
- return self.getfeedval(uri, "pause")
-
- def listfeeds(self):
- return list(self.feeds.keys())
-
- def addfeed(self, uri):
- if not uri in self.listfeeds():
- feed = {}
- feed["uri"] = uri
- feed["pause"] = False
- feed["articles"] = []
- self.writefeed(uri, feed)
-
- def delfeed(self, uri):
- if uri in self.listfeeds():
- del self.feeds[uri]
- return True
- else:
- return False
-
- def listactivefeeds(self):
- rfeeds = []
- for f in self.feeds:
- if self.feeds[f]["pause"] == False:
- rfeeds.append(f)
- return rfeeds
-
- def mergefeed(self, uri, curfeed):
- rarticles = []
- feed = self.readfeed(uri)
- if feed == None:
- return curfeed
-
- history = feed["articles"]
- for article in curfeed["articles"]:
- a = [art for art in history if art["uuid"] == \
- article["uuid"]]
- if len(a) == 0:
- article["unread"] = True
- history.append(article)
- rarticles.append(article)
- feed["articles"] = history
-
- for metakey in ("link", "title", "updated", "author", \
- "email"):
- if metakey in curfeed:
- feed[metakey] = curfeed[metakey]
-
- self.writefeed(uri, feed)
- curfeed["articles"] = rarticles
-
- return curfeed
-
- def unreadarticles(self, uri):
- rfeed = {}
- rfeed["articles"] = []
- feed = self.readfeed(uri)
- if feed == None:
- return rfeed
-
- for metakey in ("link", "title", "updated", "author", \
- "email", "toemail"):
- if metakey in feed:
- rfeed[metakey] = feed[metakey]
-
- history = feed["articles"]
- for article in history:
- if article["unread"] == True:
- rfeed["articles"].append(article)
-
- return rfeed
-
- def setarticleunread(self, uri, ids):
- feed = self.readfeed(uri)
- if feed == None:
- return
-
- for article in feed["articles"]:
- a = [art for art in feed["articles"] if art["uuid"] == \
- ids]
- if len(a) > 0:
- for aa in a:
- aa["unread"] = True
- self.writefeed(uri, feed);
-
- def setreadarticles(self, uri, curfeed=None):
- feed = self.readfeed(uri)
- if feed == None:
- return
-
- for article in curfeed["articles"]:
- a = [art for art in curfeed["articles"] if art["uuid"] == \
- article["uuid"]]
- if len(a) > 0:
- for aa in a:
- aa["unread"] = False
- self.writefeed(uri, feed);
-
- def resetarticles(self, uri):
- feed = self.readfeed(uri)
- if feed == None:
- return
- feed["articles"] = []
- self.writefeed(uri, feed)
-
diff --git a/feedemail.py b/feedemail.py
@@ -1,102 +0,0 @@
-#
-# See LICENSE for licensing details.
-#
-# Copy me if you can.
-# by 20h
-#
-
-import smtplib
-from email.mime.text import MIMEText
-from email.mime.multipart import MIMEMultipart
-from email.utils import formataddr, formatdate, parseaddr
-from email.header import Header
-import time
-
-import html2text
-
-def normalizeheader(hstr):
- return hstr.replace("\n", " ").strip()
-
-def send(feed, to, smtphost="localhost", smtpport=None, ssl="False", \
- user=None, password=None):
- articles = feed["articles"]
-
- h2t = html2text.HTML2Text()
- h2t.body_width = 0
- h2t.unicode_snob = 1
- h2t.escape_snob = 1
- h2t.inline_links = 0
- h2t.links_each_paragraph = 0
-
- for article in articles:
- if "text" in article:
- text = "%s\n" % (h2t.handle(article["text"]))
- else:
- text = ""
-
- if "title" in article:
- subject = Header( \
- normalizeheader(article["title"]),\
- "utf-8")
- else:
- subject = Header(normalizeheader(text[:70]),\
- "utf-8")
-
- # Append metadata.
- if "link" in article:
- text = "%sURL: %s\n" % (text, article["link"])
- if "file" in article:
- text = "%sEnclosure: %s\n" % (text, article["file"])
-
- msg = MIMEText(text, "plain", "utf-8")
-
- if "email" in feed:
- faddr = feed["email"]
- else:
- faddr = "none@none.no"
- if "title" in feed:
- if "author" in article:
- fname = "%s: %s" % (feed["title"], \
- article["author"])
- else:
- fname = feed["title"]
-
- msg["From"] = formataddr((fname, faddr))
- msg["To"] = formataddr(parseaddr(to))
- if "updated" in article:
- msg["Date"] = formatdate(time.mktime(\
- article["updated"].timetuple()))
- else:
- msg["Date"] = formatdate()
- msg["Subject"] = subject
-
- if "link" in article:
- msg["X-RSS-URL"] = article["link"]
- if "link" in feed:
- msg["X-RSS-Feed"] = feed["link"]
- if "id" in article:
- msg["X-RSS-ID"] = article["id"]
- if "tags" in article:
- msg["X-RSS-TAGS"] = Header(",".join(article["tags"]),\
- "utf-8")
- msg["User-Agent"] = "Zeitungsschau"
-
- if ssl == "True":
- s = smtplib.SMTP_SSL()
- else:
- s = smtplib.SMTP()
- if smtpport != None:
- s.connect(smtphost, smtpport)
- else:
- s.connect(smtphost)
-
- s.ehlo()
- if ssl == False:
- s.starttls()
- s.ehlo()
- if user != None and password != None:
- s.login(user, password)
-
- s.sendmail(faddr, to, msg.as_string())
- s.quit()
-
diff --git a/opml.py b/opml.py
@@ -1,53 +0,0 @@
-#
-# See LICENSE for licensing details.
-#
-# Copy me if you can.
-# by 20h
-#
-
-from lxml import etree
-from datetime import datetime
-
-def read(ostr):
- parser = etree.XMLParser(recover=True, encoding='utf-8')
- xml = etree.fromstring(ostr, parser)
-
- rssfeeds = []
-
- feeds = xml.xpath("//outline")
- for feed in feeds:
- if "xmlUrl" in feed.attrib:
- rssfeeds.append(feed.attrib["xmlUrl"])
- elif "text" in feed.attrib:
- rssfeeds.append(feed.attrib["text"])
-
- return rssfeeds
-
-def write(rssfeeds):
- opmle = etree.Element("opml")
-
- heade = etree.SubElement(opmle, "head")
- titlee = etree.SubElement(heade, "title")
-
- daten = datetime.now().strftime("%Y-%m-%dT%H:%M:%S%Z")
- datece = etree.SubElement(heade, "dateCreated")
- datece.text = daten
- dateme = etree.SubElement(heade, "dateModified")
- dateme.text = daten
- ownerne = etree.SubElement(heade, "ownerName")
- ownerne.text = "Me"
- docse = etree.SubElement(heade, "docs")
- docse.text = "http://dev.opml.org/spec2.html"
-
- bodye = etree.SubElement(opmle, "body")
-
- for rss in rssfeeds:
- outlinee = etree.SubElement(bodye, "outline")
- outlinee.attrib["type"] = "rss"
- outlinee.attrib["text"] = rss
- outlinee.attrib["xmlUrl"] = rss
-
- return etree.tostring(opmle, encoding="utf-8", \
- pretty_print=True, \
- xml_declaration=True).decode("utf-8")
-
diff --git a/setup.py b/setup.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+# coding=utf-8
+#
+# Copy me if you can.
+# by 20h
+#
+
+try:
+ from setuptools import setup
+except ImportError:
+ from distutils.core import setup
+
+setup(
+ name='zeitungsschau',
+ version='0.5.0',
+
+ py_modules=['zeitungsschau'],
+ packages=['zeitungsschau'],
+ scripts=['zs'],
+
+ provides=['zeitungsschau'],
+ requires=[
+ 'lxml (>=0.1)',
+ 'dateutil (>=0.1)',
+ 'html2text (>=0.1)'
+ ],
+ platforms=['all'],
+
+ author='Christoph Lohmann',
+ author_email='20h@r-36.net',
+ maintainer='Christoph Lohmann',
+ maintainer_email='20h@r-36.net',
+ url='http://git.r-36.net/zs',
+ description='Zeitungsschau is an rss2email converter',
+ long_description=open("README.md").read(),
+ license='GPLv3',
+ classifiers=[
+ 'Environment :: Console',
+ 'Intended Audience :: End Users/Desktop',
+ 'Operating System :: OS Independent',
+ 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
+ 'Programming Language :: Python',
+ 'Topic :: Communications :: Email'
+ ],
+)
+
diff --git a/zeitungsschau/__init__.py b/zeitungsschau/__init__.py
diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py
@@ -0,0 +1,226 @@
+#
+# See LICENSE for licensing details.
+#
+# Copy me if you can.
+# by 20h
+#
+
+from lxml import objectify
+from lxml import etree
+from datetime import datetime
+import dateutil.parser
+import urllib.request, urllib.parse, urllib.error
+import hashlib
+
+def parseiso(dstr):
+ return dateutil.parser.parse(str(dstr))
+
+def removenamespaces(xml):
+ for key in xml.nsmap:
+ nsstr = u'{%s}' % (xml.nsmap[key])
+ nsl = len(nsstr)
+
+ for elem in xml.getiterator():
+ if elem.tag.startswith(nsstr):
+ elem.tag = elem.tag[nsl:]
+
+def parsexml(astr):
+ try:
+ xml = objectify.fromstring(astr)
+ removenamespaces(xml)
+ except etree.XMLSyntaxError:
+ try:
+ parser = etree.HTMLParser()
+ xml = objectify.fromstring(astr)
+ removenamespaces(xml)
+ except etree.XMLSyntaxError:
+ return None
+ return xml
+
+def parse(astr):
+ xml = parsexml(astr)
+ if xml == None:
+ return None
+
+ feed = {}
+ articles = []
+ isrss = False
+ isrdf = False
+ now = datetime.now()
+
+ if hasattr(xml, "channel"):
+ if hasattr(xml, "item"):
+ isrdf = True
+ oxml = xml
+ xml = xml.channel
+ isrss = True
+
+ feed["title"] = ""
+ for e in ("title", "description"):
+ if hasattr(xml, e):
+ feed[e] = str(xml[e])
+
+ if hasattr(xml, "image") and hasattr(xml.image, "title"):
+ if "title" not in feed:
+ feed["title"] = str(xml.image.title)
+
+ if hasattr(xml, "updated"):
+ feed["updated"] = parseiso(xml.updated)
+ elif hasattr(xml, "pubDate"):
+ feed["updated"] = parseiso(xml.pubDate)
+ elif hasattr(xml, "lastBuildDate"):
+ feed["updated"] = parseiso(xml.lastBuildDate)
+ else:
+ feed["updated"] = now
+
+ if hasattr(xml, "link"):
+ if "href" in xml.link.attrib:
+ feed["link"] = str(xml.link.attrib["href"])
+ else:
+ feed["link"] = str(xml.link)
+
+ if hasattr(xml, "webmaster"):
+ feed["email"] = str(xml.webmaster)
+ elif hasattr(xml, "owner") and hasattr(xml.owner, "email"):
+ feed["email"] = str(xml.owner.email)
+ elif hasattr(xml, "author") and hasattr(xml.author, "email"):
+ feed["email"] = str(xml.author.email)
+ elif hasattr(xml, "webMaster"):
+ feed["email"] = str(xml.webMaster)
+ elif hasattr(xml, "managingeditor"):
+ feed["email"] = str(xml.managingeditor)
+ elif hasattr(xml, "managingEditor"):
+ feed["email"] = str(xml.managingEditor)
+
+ if hasattr(xml, "author"):
+ if hasattr(xml.author, "name"):
+ feed["author"] = str(xml.author.name)
+ else:
+ feed["author"] = str(xml.author)
+ elif hasattr(xml, "creator"):
+ feed["author"] = str(xml.creator)
+
+ entryname = "entry"
+ if isrss == True or isrdf == True:
+ entryname = "item"
+ if isrdf == True:
+ xml = oxml
+ if hasattr(xml, entryname):
+ for entry in xml[entryname][:]:
+ article = {}
+ # title
+ if hasattr(entry, "title"):
+ article["title"] = str(entry["title"])
+
+ # link
+ if hasattr(entry, "link"):
+ if "href" in entry.link.attrib:
+ article["link"] = str(entry.link.attrib["href"])
+ else:
+ article["link"] = str(entry.link)
+ elif hasattr(entry, "source"):
+ article["link"] = str(entry.source)
+
+ # enclosure
+ if hasattr(entry, "enclosure"):
+ if "href" in entry.enclosure.attrib:
+ article["file"] = \
+ str(entry.enclosure.attrib["href"])
+ elif "url" in entry.enclosure.attrib:
+ article["file"] = \
+ str(entry.enclosure.attrib["url"])
+ else:
+ article["file"] = str(entry.enclosure)
+
+ if hasattr(entry, "group") and \
+ hasattr(entry.group, "content"):
+ if "url" in entry.group.content:
+ article["file"] = \
+ str(entry.group.content.\
+ attrib["file"])
+
+ # updated
+ if hasattr(entry, "updated"):
+ article["updated"] = parseiso(entry.updated)
+ elif hasattr(entry, "pubDate"):
+ article["updated"] = parseiso(entry.pubDate)
+ elif hasattr(entry, "date"):
+ article["updated"] = parseiso(entry.date)
+ else:
+ article["updated"] = now
+
+ # author
+ if hasattr(entry, "author"):
+ if hasattr(entry.author, "name"):
+ article["author"] = str(entry.author.name)
+ else:
+ article["author"] = str(entry.author)
+ elif hasattr(entry, "creator"):
+ article["author"] = str(entry.creator)
+
+ # tags
+ if hasattr(entry, "category"):
+ article["tags"] = []
+ for cat in entry["category"][:]:
+ article["tags"].append(str(cat))
+
+ # text
+ if hasattr(entry, "encoded"):
+ article["text"] = str(entry.encoded)
+ elif hasattr(entry, "content"):
+ article["text"] = str(entry.content)
+ elif hasattr(entry, "summary"):
+ article["text"] = str(entry.summary)
+ elif hasattr(entry, "description"):
+ article["text"] = str(entry.description)
+
+ # id
+ if hasattr(entry, "id"):
+ article["id"] = str(entry["id"])
+ else:
+ if "link" in article:
+ article["id"] = article["link"]
+ elif "file" in article:
+ article["id"] = article["file"]
+ else:
+ article["id"] = article["text"][:30]
+
+ if article["updated"] == now:
+ article["uuid"] = ""
+ else:
+ article["uuid"] = "%s" % (article["updated"])
+ for e in ("id", "title", "file"):
+ if e in article:
+ article["uuid"] = "%s-%s" % \
+ (article["uuid"],\
+ article[e])
+
+ def mkuuid(s):
+ return hashlib.sha256(str(s).\
+ encode("utf8")).hexdigest()
+ if len(article["uuid"]) == 0:
+ article["uuid"] = mkuuid(now)
+ else:
+ article["uuid"] = mkuuid(article["uuid"])
+
+ # sanity checks
+ if "title" not in article and "text" not in article \
+ and "file" not in article:
+ continue
+
+ articles.append(article)
+
+ # Will not process feeds with more than 64 entries. Can you hear me
+ # Richard Stallman?
+ feed["articles"] = sorted(articles, key=lambda article: \
+ article["updated"])[-64:]
+
+ return feed
+
+class feedopener(urllib.request.FancyURLopener):
+ version = "Zeitungsschau/1.0"
+urllib.request._urlopener = feedopener
+
+def fetch(uri):
+ return parse(urllib.request.urlopen(uri, timeout=5).read())
+
diff --git a/zeitungsschau/feeddb.py b/zeitungsschau/feeddb.py
@@ -0,0 +1,275 @@
+#
+# See LICENSE for licensing details.
+#
+# Copy me if you can.
+# by 20h
+#
+
+import pickle
+import os
+import os.path
+import fcntl
+from subprocess import Popen
+
+class feeddb(object):
+ feeds = {}
+ cfg = {}
+ dbpath = ""
+ lpath = ""
+ lockarr = ""
+ locks = {}
+
+ def lock(self, fpath):
+ if fpath not in self.locks:
+ self.lpath = "%s.lck" % (fpath)
+ self.locks[fpath] = open(self.lpath, "w")
+ fcntl.lockf(self.locks[fpath].fileno(), fcntl.LOCK_EX)
+
+ def unlock(self, fpath, doremove=True):
+ if fpath in self.locks:
+ fcntl.flock(self.locks[fpath].fileno(), fcntl.LOCK_UN)
+ self.locks[fpath].close()
+ lpath = "%s.lck" % (fpath)
+ os.remove(lpath)
+ if doremove == True:
+ del self.locks[fpath]
+
+ def pickleread(self, fi):
+ fpath = "%s/%s" % (self.dbpath, fi)
+ path = os.path.abspath(os.path.dirname(fpath))
+ if not os.path.exists(path):
+ os.makedirs(path, 0o750)
+
+ self.lock(fpath)
+
+ try:
+ fd = open(fpath, "rb")
+ db = pickle.load(fd)
+ fd.close()
+ except FileNotFoundError:
+ db = {}
+
+ return db
+
+ def picklewrite(self, fi, db):
+ fpath = "%s/%s" % (self.dbpath, fi)
+ path = os.path.abspath(os.path.dirname(fpath))
+ if not os.path.exists(path):
+ os.makedirs(path, 0o750)
+
+ fd = open(fpath, "wb+")
+ pickle.dump(db, fd)
+ fd.close()
+
+ def unlockall(self):
+ for key in self.locks.keys():
+ self.unlock(key, doremove=False)
+ self.locks = []
+
+ def __init__(self, path="~/.zs", email=None):
+ self.dbpath = os.path.abspath(os.path.expanduser(path))
+ self.feeds = self.pickleread("feeds.db")
+ self.cfg = self.pickleread("cfg.db")
+
+ if not "email" in self.cfg:
+ print("You need to specify the default email. Please "\
+ "run 'zs cfg email me@me.com' to "\
+ "set it.")
+
+ if not "smtphost" in self.cfg:
+ self.cfg["smtphost"] = "localhost"
+ if not "smtpport" in self.cfg:
+ self.cfg["smtpport"] = None
+ if not "smtpssl" in self.cfg:
+ self.cfg["smtpssl"] = "False"
+ if not "smtpuser" in self.cfg:
+ self.cfg["smtpuser"] = None
+ if not "smtppassword" in self.cfg:
+ self.cfg["smtppassword"] = None
+ if not "smtpstarttls" in self.cfg:
+ self.cfg["smtpstarttls"] = "False"
+ if not "smtpcmd" in self.cfg:
+ self.cfg["smtpcmd"] = None
+ if not "smtpuselocal" in self.cfg:
+ self.cfg["smtpuselocal"] = "False"
+
+ def sync(self):
+ if self.cfg != None:
+ self.picklewrite("cfg.db", self.cfg)
+ if self.feeds != None:
+ self.picklewrite("feeds.db", self.feeds)
+
+ def __del__(self):
+ self.sync()
+ self.unlockall()
+
+ def readfeed(self, uri):
+ if not uri in self.feeds:
+ return None
+ return self.feeds[uri]
+
+ def writefeed(self, uri, feed):
+ self.feeds[uri] = feed
+
+ def sethook(self, uri, hookfile):
+ feed = self.readfeed(uri)
+ if feed == None:
+ return
+ feed["hook"] = hookfile
+ self.writefeed(uri, feed)
+
+ def runhook(self, uri):
+ feed = self.readfeed(uri)
+ if feed == None:
+ return
+ if not "hook" in feed:
+ return
+
+ cmd = os.path.expanduser(feed["hook"])
+ if not os.path.exists(cmd):
+ return
+
+ fd = open("/dev/null")
+ if os.fork() == 0:
+ p = Popen(cmd, shell=True, stdout=fd, stderr=fd)
+ p.wait()
+
+ def setfeedval(self, uri, key, value):
+ feed = self.readfeed(uri)
+ if feed == None:
+ return
+ feed[key] = value
+ self.writefeed(uri, feed)
+
+ def getfeedval(self, uri, key):
+ feed = self.readfeed(uri)
+ if feed == None:
+ return None
+ if key not in feed:
+ return None
+ return feed[key]
+
+ def setretry(self, uri, retries):
+ self.setfeedval(uri, "retry", retries)
+
+ def getretry(self, uri):
+ retries = self.getfeedval(uri, "retry")
+ if retries == None:
+ return 0
+ else:
+ return retries
+
+ def pause(self, uri):
+ self.setfeedval(uri, "pause", True)
+
+ def unpause(self, uri):
+ self.setfeedval(uri, "pause", False)
+ self.setretry(uri, 0)
+
+ def ispaused(self, uri):
+ return self.getfeedval(uri, "pause")
+
+ def listfeeds(self):
+ return list(self.feeds.keys())
+
+ def addfeed(self, uri):
+ if not uri in self.listfeeds():
+ feed = {}
+ feed["uri"] = uri
+ feed["pause"] = False
+ feed["articles"] = []
+ self.writefeed(uri, feed)
+
+ def delfeed(self, uri):
+ if uri in self.listfeeds():
+ del self.feeds[uri]
+ return True
+ else:
+ return False
+
+ def listactivefeeds(self):
+ rfeeds = []
+ for f in self.feeds:
+ if self.feeds[f]["pause"] == False:
+ rfeeds.append(f)
+ return rfeeds
+
+ def mergefeed(self, uri, curfeed):
+ rarticles = []
+ feed = self.readfeed(uri)
+ if feed == None:
+ return curfeed
+
+ history = feed["articles"]
+ for article in curfeed["articles"]:
+ a = [art for art in history if art["uuid"] == \
+ article["uuid"]]
+ if len(a) == 0:
+ article["unread"] = True
+ history.append(article)
+ rarticles.append(article)
+ # Only keep track of the last 2048 articles.
+ feed["articles"] = history[-2048:]
+
+ for metakey in ("link", "title", "updated", "author", \
+ "email"):
+ if metakey in curfeed:
+ feed[metakey] = curfeed[metakey]
+
+ self.writefeed(uri, feed)
+ curfeed["articles"] = rarticles
+
+ return curfeed
+
+ def unreadarticles(self, uri):
+ rfeed = {}
+ rfeed["articles"] = []
+ feed = self.readfeed(uri)
+ if feed == None:
+ return rfeed
+
+ for metakey in ("link", "title", "updated", "author", \
+ "email", "toemail"):
+ if metakey in feed:
+ rfeed[metakey] = feed[metakey]
+
+ history = feed["articles"]
+ for article in history:
+ if article["unread"] == True:
+ rfeed["articles"].append(article)
+
+ return rfeed
+
+ def setarticleunread(self, uri, ids):
+ feed = self.readfeed(uri)
+ if feed == None:
+ return
+
+ for article in feed["articles"]:
+ a = [art for art in feed["articles"] if art["uuid"] == \
+ ids]
+ if len(a) > 0:
+ for aa in a:
+ aa["unread"] = True
+ self.writefeed(uri, feed);
+
+ def setreadarticles(self, uri, curfeed=None):
+ feed = self.readfeed(uri)
+ if feed == None:
+ return
+
+ for article in curfeed["articles"]:
+ a = [art for art in curfeed["articles"] if art["uuid"] == \
+ article["uuid"]]
+ if len(a) > 0:
+ for aa in a:
+ aa["unread"] = False
+ self.writefeed(uri, feed);
+
+ def resetarticles(self, uri):
+ feed = self.readfeed(uri)
+ if feed == None:
+ return
+ feed["articles"] = []
+ self.writefeed(uri, feed)
+
diff --git a/zeitungsschau/feedemail.py b/zeitungsschau/feedemail.py
@@ -0,0 +1,127 @@
+#
+# See LICENSE for licensing details.
+#
+# Copy me if you can.
+# by 20h
+#
+
+import smtplib
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from email.utils import formataddr, formatdate, parseaddr
+from email.header import Header
+import time
+import subprocess
+
+import html2text
+
+def normalizeheader(hstr):
+ return hstr.replace("\n", " ").strip()
+
+class LocalSendmail(object):
+ cmd="/usr/sbin/sendmail -f \"%s\" \"%s\""
+
+ def __init__(self, cmd=None):
+ if cmd != None:
+ self.cmd = cmd
+
+ def sendmail(self, faddr, taddr, msg):
+ cmd = self.cmd % (faddr, taddr)
+ p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
+ p.communicate(input=msg.encode("utf8"))
+
+def send(feed, to, smtphost="localhost", smtpport=None, ssl="False",\
+ starttls="True", user=None, password=None, smtpcmd=None,\
+ smtpuselocal=False):
+ articles = feed["articles"]
+
+ for article in articles:
+ if "text" in article:
+ h2t = html2text.HTML2Text()
+ h2t.body_width = 0
+ h2t.unicode_snob = 1
+ h2t.escape_snob = 1
+ h2t.inline_links = 0
+ h2t.links_each_paragraph = 0
+
+ text = "%s\n" % (h2t.handle(article["text"]))
+
+ del h2t
+ else:
+ text = ""
+
+ # Larger than 10 MiB, something is wrong.
+ if len(text) > 10 * 1024 * 1024:
+ continue
+
+ if "title" in article:
+ subject = Header( \
+ normalizeheader(article["title"]),\
+ "utf-8")
+ else:
+ subject = Header(normalizeheader(text[:70]),\
+ "utf-8")
+
+ # Append metadata.
+ if "link" in article:
+ text = "%sURL: %s\n" % (text, article["link"])
+ if "file" in article:
+ text = "%sEnclosure: %s\n" % (text, article["file"])
+
+ msg = MIMEText(text, "plain", "utf-8")
+
+ if "email" in feed:
+ faddr = feed["email"]
+ else:
+ faddr = "none@none.no"
+ if "title" in feed:
+ if "author" in article:
+ fname = "%s: %s" % (feed["title"], \
+ article["author"])
+ else:
+ fname = feed["title"]
+
+ msg["From"] = formataddr((fname, faddr))
+ msg["To"] = formataddr(parseaddr(to))
+ if "updated" in article:
+ msg["Date"] = formatdate(time.mktime(\
+ article["updated"].timetuple()))
+ else:
+ msg["Date"] = formatdate()
+ msg["Subject"] = subject
+
+ if "link" in article:
+ msg["X-RSS-URL"] = article["link"]
+ if "link" in feed:
+ msg["X-RSS-Feed"] = feed["link"]
+ if "id" in article:
+ msg["X-RSS-ID"] = article["id"]
+ if "tags" in article:
+ msg["X-RSS-TAGS"] = Header(",".join(article["tags"]),\
+ "utf-8")
+ msg["User-Agent"] = "Zeitungsschau"
+
+ if smtpuselocal == "True":
+ s = LocalSendmail(smtpcmd)
+ s.sendmail(faddr, to, msg.as_string())
+ else:
+ if ssl == "True":
+ s = smtplib.SMTP_SSL()
+ else:
+ s = smtplib.SMTP()
+ if smtpport != None:
+ s.connect(smtphost, smtpport)
+ else:
+ s.connect(smtphost)
+
+ s.ehlo()
+ if ssl == "False" and starttls == "True":
+ s.starttls()
+ s.ehlo()
+
+ if user != None and password != None:
+ s.login(user, password)
+
+ s.sendmail(faddr, to, msg.as_string())
+ s.quit()
+
diff --git a/zeitungsschau/opml.py b/zeitungsschau/opml.py
@@ -0,0 +1,53 @@
+#
+# See LICENSE for licensing details.
+#
+# Copy me if you can.
+# by 20h
+#
+
+from lxml import etree
+from datetime import datetime
+
+def read(ostr):
+ parser = etree.XMLParser(recover=True, encoding='utf-8')
+ xml = etree.fromstring(ostr, parser)
+
+ rssfeeds = []
+
+ feeds = xml.xpath("//outline")
+ for feed in feeds:
+ if "xmlUrl" in feed.attrib:
+ rssfeeds.append(feed.attrib["xmlUrl"])
+ elif "text" in feed.attrib:
+ rssfeeds.append(feed.attrib["text"])
+
+ return rssfeeds
+
+def write(rssfeeds):
+ opmle = etree.Element("opml")
+
+ heade = etree.SubElement(opmle, "head")
+ titlee = etree.SubElement(heade, "title")
+
+ daten = datetime.now().strftime("%Y-%m-%dT%H:%M:%S%Z")
+ datece = etree.SubElement(heade, "dateCreated")
+ datece.text = daten
+ dateme = etree.SubElement(heade, "dateModified")
+ dateme.text = daten
+ ownerne = etree.SubElement(heade, "ownerName")
+ ownerne.text = "Me"
+ docse = etree.SubElement(heade, "docs")
+ docse.text = "http://dev.opml.org/spec2.html"
+
+ bodye = etree.SubElement(opmle, "body")
+
+ for rss in rssfeeds:
+ outlinee = etree.SubElement(bodye, "outline")
+ outlinee.attrib["type"] = "rss"
+ outlinee.attrib["text"] = rss
+ outlinee.attrib["xmlUrl"] = rss
+
+ return etree.tostring(opmle, encoding="utf-8", \
+ pretty_print=True, \
+ xml_declaration=True).decode("utf-8")
+
diff --git a/zs b/zs
@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+# coding=utf-8
+#
+# Copy me if you can.
+# by 20h
+#
+
+import sys
+import os
+import zeitungsschau.feed as feed
+import zeitungsschau.feeddb as feeddb
+import zeitungsschau.opml as opml
+import zeitungsschau.feedemail as feedemail
+import urllib.error
+import socket
+import http.client
+import ssl
+import getopt
+
+dodebug = False
+
+def debug(msg):
+ global dodebug
+ if dodebug == True:
+ print("debug: %s" % (msg))
+
+def sendfeed(db, ufeed):
+ feedemail.send(ufeed, db.cfg["email"], db.cfg["smtphost"], \
+ db.cfg["smtpport"], db.cfg["smtpssl"], \
+ db.cfg["smtpstarttls"], db.cfg["smtpuser"], \
+ db.cfg["smtppassword"], db.cfg["smtpcmd"], \
+ db.cfg["smtpuselocal"])
+
+def run(db, selfeed=None, dryrun=False):
+ feeduris = db.listfeeds()
+
+ if feeduris != None and selfeed in feeduris:
+ feeduris = [selfeed]
+
+ for feeduri in feeduris:
+ if db.ispaused(feeduri):
+ print("pause %s" % (feeduri))
+ continue
+
+ retries = db.getretry(feeduri)
+ estr = None
+ print("fetch %s" % (feeduri))
+ curfeed = None
+ try:
+ curfeed = feed.fetch(feeduri)
+ except urllib.error.HTTPError as err:
+ if err.code == 404:
+ estr = "404"
+ retries += 1
+ except socket.gaierror:
+ continue
+ except socket.timeout:
+ continue
+ except urllib.error.URLError:
+ continue
+ except TimeoutError:
+ continue
+ except ConnectionResetError:
+ estr = "connreset"
+ retries += 1
+ except http.client.IncompleteRead:
+ estr = "incompleteread"
+ continue
+ except http.client.BadStatusLine:
+ estr = "badstatusline"
+ continue
+
+ if curfeed == None:
+ continue
+
+ # retry handling
+ if estr != None:
+ if retries > 2:
+ sys.stderr.write("pause %s %s\n" % \
+ (estr, feeduri))
+ db.pause(feeduri)
+ db.setretry(feeduri, retries)
+ continue
+ elif retries > 0:
+ db.setretry(feeduri, 0)
+
+ clen = len(curfeed["articles"])
+ if clen == 0:
+ print("0 articles -> pause %s" % (feeduri))
+ db.pause(feeduri)
+ continue
+
+ db.mergefeed(feeduri, curfeed)
+ ufeed = db.unreadarticles(feeduri)
+ if len(ufeed["articles"]) > 0:
+ print("cur %d unread %d" % (clen, \
+ len(ufeed["articles"])))
+ debug(ufeed)
+ if dryrun == False:
+ sendfeed(db, ufeed)
+ db.setreadarticles(feeduri, ufeed)
+
+def usage(app):
+ app = os.path.basename(app)
+ sys.stderr.write("usage: %s [-dh] cmd\n" % (app))
+ sys.exit(1)
+
+def main(args):
+ global dodebug
+ retval = 0
+
+ try:
+ opts, largs = getopt.getopt(args[1:], "hd")
+ except getopt.GetoptError as err:
+ print(str(err))
+ usage(args[0])
+
+ for o, a in opts:
+ if o == "-h":
+ usage(args[0])
+ elif o == "-d":
+ dodebug = True
+ else:
+ usage(args[0])
+
+ if len(largs) < 1:
+ usage(args[0])
+
+ db = feeddb.feeddb()
+
+ if largs[0] == "run":
+ if len(largs) > 1:
+ run(db, largs[1])
+ else:
+ run(db)
+
+ elif largs[0] == "dryrun":
+ if len(largs) > 1:
+ run(db, largs[1], dryrun=True)
+ else:
+ run(db, dryrun=True)
+
+ elif largs[0] == "cfg":
+ if len(largs) < 2:
+ for k in db.cfg:
+ print("%s = '%s'" % (k, db.cfg[k]))
+ elif len(args) < 3:
+ if largs[1] in db.cfg:
+ print("%s = '%s'" % (largs[1], \
+ db.cfg[largs[1]]))
+ else:
+ retval = 1
+ else:
+ db.cfg[largs[1]] = largs[2]
+ print("%s = '%s'" % (largs[1], db.cfg[largs[1]]))
+
+ elif largs[0] == "cfgdel":
+ if len(largs) < 2:
+ usage(args[0])
+ if largs[1] in db.cfg:
+ del db.cfg[largs[1]]
+
+ elif largs[0] == "add":
+ if len(largs) < 2:
+ usage(args[0])
+ db.addfeed(largs[1])
+
+ elif largs[0] == "list":
+ print("\n".join(db.listfeeds()))
+
+ elif largs[0] == "listuuids":
+ if len(largs) < 2:
+ usage(args[0])
+ feed = db.readfeed(largs[1])
+ for art in feed["articles"]:
+ print("%s: %s: %s" % (art["uuid"], art["link"],\
+ art["title"]))
+
+ elif largs[0] == "unread":
+ if len(largs) < 3:
+ usage(args[0])
+ db.setarticleunread(largs[1], largs[2])
+
+ elif largs[0] == "resend":
+ if len(largs) < 2:
+ usage(args[0])
+ ufeed = db.unreadarticles(largs[1])
+ sendfeed(db, ufeed)
+ db.setreadarticles(largs[1], ufeed)
+
+ elif largs[0] == "del":
+ if len(largs) < 2:
+ usage(args[0])
+ if db.delfeed(largs[1]) == True:
+ print("'%s' has been deleted." % (largs[1]))
+
+ elif largs[0] == "reset":
+ if len(largs) < 2:
+ usage(args[0])
+ db.resetarticles(largs[1])
+
+ elif largs[0] == "retry":
+ if len(largs) < 3:
+ usage(args[0])
+ db.setretry(largs[1], int(largs[2]))
+
+ elif largs[0] == "pause":
+ if len(largs) < 2:
+ usage(args[0])
+ db.pause(largs[1])
+
+ elif largs[0] == "unpause":
+ if len(largs) < 2:
+ usage(args[0])
+ db.unpause(largs[1])
+
+ elif largs[0] == "opmlexport":
+ if len(largs) > 1:
+ filen = open(largs[1], "w")
+ else:
+ filen = sys.stdout
+ filen.write(opml.write(db.listfeeds()))
+
+ elif largs[0] == "opmlimport":
+ if len(largs) > 1:
+ filen = open(largs[1], "r")
+ else:
+ filen = sys.stdin
+ feedlist = db.listfeeds()
+ nfeedlist = opml.read(filen.read().encode("utf-8"))
+ for f in nfeedlist:
+ if not f in feedlist:
+ print("import feed: %s" % (f))
+ db.addfeed(f)
+
+ del db
+ return retval
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv))
+
diff --git a/zs.py b/zs.py
@@ -1,215 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-#
-# Copy me if you can.
-# by 20h
-#
-
-import sys
-import os
-import feed
-import feeddb
-import opml
-import feedemail
-import urllib.error
-import socket
-import http.client
-import ssl
-
-def sendfeed(db, ufeed):
- feedemail.send(ufeed, db.cfg["email"], db.cfg["smtphost"], \
- db.cfg["smtpport"], db.cfg["smtpssl"], \
- db.cfg["smtpuser"], db.cfg["smtppassword"])
-
-def run(db, selfeed=None, dryrun=False):
- feeduris = db.listfeeds()
-
- if feeduris != None and selfeed in feeduris:
- feeduris = [selfeed]
-
- for feeduri in feeduris:
- if db.ispaused(feeduri):
- print("pause %s" % (feeduri))
- continue
-
- retries = db.getretry(feeduri)
- estr = None
- print("fetch %s" % (feeduri))
- curfeed = None
- try:
- curfeed = feed.fetch(feeduri)
- except urllib.error.HTTPError as err:
- if err.code == 404:
- estr = "404"
- retries += 1
- except socket.gaierror:
- continue
- except socket.timeout:
- continue
- except urllib.error.URLError:
- continue
- except TimeoutError:
- continue
- except ConnectionResetError:
- estr = "connreset"
- retries += 1
- except http.client.IncompleteRead:
- estr = "incompleteread"
- continue
- except http.client.BadStatusLine:
- estr = "badstatusline"
- continue
-
- if curfeed == None:
- continue
-
- # retry handling
- if estr != None:
- if retries > 2:
- sys.stderr.write("pause %s %s\n" % \
- (estr, feeduri))
- db.pause(feeduri)
- db.setretry(feeduri, retries)
- continue
- elif retries > 0:
- db.setretry(feeduri, 0)
-
- clen = len(curfeed["articles"])
- if clen == 0:
- print("0 articles -> pause %s" % (feeduri))
- db.pause(feeduri)
- continue
-
- db.mergefeed(feeduri, curfeed)
- ufeed = db.unreadarticles(feeduri)
- if len(ufeed["articles"]) > 0:
- print("cur %d unread %d" % (clen, \
- len(ufeed["articles"])))
- if dryrun == False:
- sendfeed(db, ufeed)
- db.setreadarticles(feeduri, ufeed)
-
-def usage(app):
- app = os.path.basename(app)
- sys.stderr.write("usage: %s [-h] cmd\n" % (app))
- sys.exit(1)
-
-def main(args):
- retval = 0
-
- if len(args) < 2:
- usage(args[0])
-
- db = feeddb.feeddb()
-
- if args[1] == "run":
- if len(args) > 2:
- run(db, args[2])
- else:
- run(db)
-
- elif args[1] == "dryrun":
- if len(args) > 2:
- run(db, args[2], dryrun=True)
- else:
- run(db, dryrun=True)
-
- elif args[1] == "cfg":
- if len(args) < 3:
- for k in db.cfg:
- print("%s = '%s'" % (k, db.cfg[k]))
- elif len(args) < 4:
- if args[2] in db.cfg:
- print("%s = '%s'" % (args[2], \
- db.cfg[args[2]]))
- else:
- retval = 1
- else:
- db.cfg[args[2]] = args[3]
- print("%s = '%s'" % (args[2], db.cfg[args[2]]))
-
- elif args[1] == "cfgdel":
- if len(args) < 3:
- usage(args[0])
- if args[2] in db.cfg:
- del db.cfg[args[2]]
-
- elif args[1] == "add":
- if len(args) < 3:
- usage(args[0])
- db.addfeed(args[2])
-
- elif args[1] == "list":
- print("\n".join(db.listfeeds()))
-
- elif args[1] == "listuuids":
- if len(args) < 3:
- usage(args[0])
- feed = db.readfeed(args[2])
- for art in feed["articles"]:
- print("%s: %s: %s" % (art["uuid"], art["link"],\
- art["title"]))
-
- elif args[1] == "unread":
- if len(args) < 4:
- usage(args[0])
- db.setarticleunread(args[2], args[3])
-
- elif args[1] == "resend":
- if len(args) < 3:
- usage(args[0])
- ufeed = db.unreadarticles(args[2])
- sendfeed(db, ufeed)
- db.setreadarticles(args[2], ufeed)
-
- elif args[1] == "del":
- if len(args) < 3:
- usage(args[0])
- if db.delfeed(args[2]) == True:
- print("'%s' has been deleted." % (args[2]))
-
- elif args[1] == "reset":
- if len(args) < 3:
- usage(args[0])
- db.resetarticles(args[1])
-
- elif args[1] == "retry":
- if len(args) < 4:
- usage(args[0])
- db.setretry(args[1], int(args[2]))
-
- elif args[1] == "pause":
- if len(args) < 3:
- usage(args[0])
- db.pause(args[2])
-
- elif args[1] == "unpause":
- if len(args) < 3:
- usage(args[0])
- db.unpause(args[2])
-
- elif args[1] == "opmlexport":
- if len(args) > 2:
- filen = open(args[2], "w")
- else:
- filen = sys.stdout
- filen.write(opml.write(db.listfeeds()))
-
- elif args[1] == "opmlimport":
- if len(args) > 2:
- filen = open(args[2], "r")
- else:
- filen = sys.stdin
- feedlist = db.listfeeds()
- nfeedlist = opml.read(filen.read().encode("utf-8"))
- for f in nfeedlist:
- if not f in feedlist:
- print("import feed: %s" % (f))
- db.addfeed(f)
-
- del db
- return retval
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv))
-