Add JSON Feed support. - zs - Zeitungsschau rss to email converter

commit ac54587c59ad0bdd7c84681f295d924f27019644
parent 63fe7a682af0938334c13a7132b52b933dafec13
Author: Christoph Lohmann <20h@r-36.net>
Date:   Mon, 22 May 2017 19:29:14 +0200

Add JSON Feed support.

Diffstat:
zeitungsschau/feed.py  | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--

1 file changed, 94 insertions(+), 2 deletions(-)
diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py
@@ -16,6 +16,7 @@ import codecs
 import html
 import urllib.parse
 import socket
+import json
 
 def parseiso(dstr, now):
 	try:
@@ -39,7 +40,84 @@ def parsexml(astr):
 	# Throw XML parsing errors so we can blame the feed authors.
 	return xml
 
-def parse(astr):
+def parsejson(astr):
+	js = json.loads(astr)
+
+	feed = {}
+	articles = []
+	now = datetime.now(pytz.utc)
+	now = now.replace(hour=20, minute=20, second=20, microsecond=20)
+
+	if "title" in js:
+		feed["title"] = js["title"]
+	if "description" in js:
+		feed["description"] = js["description"]
+	if "home_page_url" in js:
+		feed["link"] = js["home_page_url"]
+	if "feed_url" in js:
+		feed["link"] = js["feed_url"]
+	if "author" in js:
+		if "name" in js["author"]:
+			feed["author"] = js["author"]["name"]
+	feed["updated"] = now
+
+	for item in js["items"]:
+		article = {}
+		if "url" in item:
+			article["file"] = item["url"]
+		if "title" in item:
+			article["title"] = item["title"]
+		if "id" in item:
+			article["id"] = item["id"]
+		else:
+			if "link" in article:
+				article["id"] = article["link"]
+			elif "file" in article:
+				article["id"] = article["file"]
+			else:
+				article["id"] = article["text"][:30]
+
+		if "summary" in item:
+			article["text"] = html.unescape(item["summary"])
+		if "content_html" in item:
+			article["text"] = html.unescape(item["content_html"])
+		if "content_text" in item:
+			article["text"] = html.unescape(item["content_text"])
+		if "date_published" in item:
+			article["updated"] = \
+				dateutil.parser.parse(item["date_published"])
+		else:
+			article["updated"] = now
+
+		if article["updated"] == now:
+			article["uuid"] = ""
+		else:
+			article["uuid"] = "%s" % (article["updated"])
+
+		for e in ("id", "title", "file"):
+			if e in article:
+				article["uuid"] = "%s-%s" % \
+					(article["uuid"],\
+					 article[e])
+
+		def mkuuid(s):
+			return hashlib.sha256(str(s).\
+				encode("utf8")).hexdigest()
+		if len(article["uuid"]) == 0:
+			article["uuid"] = mkuuid(now)
+		else:
+			article["uuid"] = mkuuid(article["uuid"])
+
+		# sanity checks
+		if "title" not in article and "text" not in article \
+				and "file" not in article:
+			continue
+
+		articles.append(article)
+
+	return feed
+
+def parseatom(astr):
 	xml = parsexml(astr)
 	if xml == None:
 		return None
@@ -246,6 +324,7 @@ def parse(astr):
 	return feed
 
 def fetch(uri):
+	ftype = "xml"
 	if "file://" in uri:
 		fd = codecs.open(uri[7:], "r", "utf-8")
 		fval = fd.read().encode("utf-8")
@@ -280,5 +359,18 @@ def fetch(uri):
 		fval = fd.content
 		rcode = fd.status_code
 
-	return (rcode, parse(fval))
+		if "Content-Type" in fd.headers:
+			if "application/json" in fd.headers["Content-Type"]:
+				ftype = "json"
+
+	if ftype == "xml":
+		suri = uri.lower().rsplit(".", 1)
+		if len(suri) > 1:
+			if suri[-1] == "json":
+				ftype = "json"
+
+	if ftype == "xml":
+		return (rcode, parsexml(fval))
+	else:
+		return (rcode, parsejson(fval))

	zs Zeitungsschau rss to email converter
	git clone git://r-36.net/zs
	Log \| Files \| Refs \| LICENSE