commit 0aabb3ed2c6f959540a9bf48c7c8371778d008f0
parent 662c2a923b6c78163febd94eee17da36ed14242e
Author: Christoph Lohmann <20h@r-36.net>
Date: Sat, 8 Aug 2020 11:59:11 +0200
Update objectifiy code to work with encoding declaration.
Diffstat:
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py
@@ -5,8 +5,9 @@
# by 20h
#
-from lxml import objectify
-from lxml import etree
+import lxml
+import lxml.objectify
+import html
from datetime import datetime
import dateutil.parser
from dateutil.tz import gettz
@@ -14,7 +15,6 @@ import requests
import hashlib
import pytz
import codecs
-import html
import urllib.parse
import socket
import json
@@ -44,9 +44,10 @@ def removenamespaces(xml):
elem.tag = elem.tag[nsl:]
def parsexml(astr):
- xml = objectify.fromstring(astr)
+ xml = lxml.objectify.fromstring(html.unescape(astr.decode("utf-8")).encode("utf-8"))
removenamespaces(xml)
# Throw XML parsing errors so we can blame the feed authors.
+ #print(lxml.objectify.dump(xml))
return xml
def parsetwtxtfeed(astr, uri):
@@ -399,7 +400,6 @@ def fetch(uri):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((host, port))
s.send(("%s\r\n" % (selector)).encode("utf-8"))
- s.shutdown(1)
fd = s.makefile("r")
fval = fd.read().encode("utf-8")
s.close()
diff --git a/zs b/zs
@@ -52,6 +52,11 @@ def run(db, selfeed=None, dryrun=False, onlychanges=False):
print("fetch %s" % (feeduri))
curfeed = None
rcode = 0
+
+ """
+ # All errors.
+ (rcode, curfeed) = feed.fetch(feeduri)
+ """
try:
(rcode, curfeed) = feed.fetch(feeduri)
except socket.gaierror: