zs

Zeitungsschau rss to email converter
git clone git://r-36.net/zs
Log | Files | Refs | README | LICENSE

feedemail.py (3777B)


      1 #
      2 # See LICENSE for licensing details.
      3 #
      4 # Copy me if you can.
      5 # by 20h
      6 #
      7 
      8 import smtplib
      9 from email.mime.text import MIMEText
     10 from email.mime.multipart import MIMEMultipart
     11 from email.utils import formataddr, formatdate, parseaddr
     12 from email.header import Header
     13 import time
     14 import subprocess
     15 import lxml.html
     16 import lxml.etree
     17 import urllib.parse
     18 
     19 import html2text
     20 
     21 def normalizeheader(hstr):
     22 	if len(hstr) == 0:
     23 		return ""
     24 	try:
     25 		return lxml.html.fromstring(hstr).text_content().\
     26 				replace(u"\xa0", "").\
     27 				replace("\n", " ").strip()
     28 	except lxml.etree.ParserError:
     29 		return ""
     30 
     31 
     32 class LocalSendmail(object):
     33 	cmd="/usr/sbin/sendmail -f \"%s\" \"%s\""
     34 
     35 	def __init__(self, cmd=None):
     36 		if cmd != None:
     37 			self.cmd = cmd
     38 
     39 	def sendmail(self, faddr, taddr, msg):
     40 		cmd = self.cmd % (faddr, taddr)
     41 		p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
     42 		p.communicate(input=msg.encode("utf8"))
     43 
     44 def send(feed, to, smtphost="localhost", smtpport=None, ssl="False",\
     45 		starttls="True", user=None, password=None, smtpcmd=None,\
     46 		smtpuselocal=False):
     47 	articles = feed["articles"]
     48 
     49 	for article in articles:
     50 		if "text" in article:
     51 			h2t = html2text.HTML2Text()
     52 			h2t.body_width = 0
     53 			h2t.unicode_snob = 1
     54 			h2t.inline_links = 0
     55 			h2t.links_each_paragraph = 0
     56 
     57 			try:
     58 				text = "%s\n" % (h2t.handle(article["text"]))
     59 			except:
     60 				text = article["text"]
     61 
     62 			del h2t
     63 		else:
     64 			text = ""
     65 
     66 		# Larger than 10 MiB, something is wrong.
     67 		if len(text) > 10 * 1024 * 1024:
     68 			continue
     69 
     70 		if "title" in article:
     71 			subject = Header( \
     72 					normalizeheader(article["title"]),\
     73 					"utf-8")
     74 		else:
     75 			subject = Header(normalizeheader(text[:20]),\
     76 					"utf-8")
     77 
     78 		# Append metadata.
     79 		if "link" in article:
     80 			text = "%sURL: %s\n" % (text, article["link"])
     81 		if "file" in article:
     82 			text = "%sEnclosure: %s\n" % (text, article["file"])
     83 
     84 		msg = MIMEText(text, "plain", "utf-8")
     85 
     86 		if "email" in feed:
     87 			faddr = feed["email"]
     88 		else:
     89 			faddr = "none@none.no"
     90 		if "title" in feed:
     91 			if "author" in article:
     92 				fname = "%s: %s" % (feed["title"], \
     93 						article["author"])
     94 			else:
     95 				fname = feed["title"]
     96 
     97 		msg["From"] = formataddr((fname, faddr))
     98 		msg["To"] = formataddr(parseaddr(to))
     99 		if "updated" in article:
    100 			msg["Date"] = formatdate(time.mktime(\
    101 					article["updated"].timetuple()))
    102 		else:
    103 			msg["Date"] = formatdate()
    104 		msg["Subject"] = subject
    105 
    106 		if "link" in article:
    107 			if "://" not in article["link"]:
    108 				aurl = urllib.parse.urljoin(feed["feeduri"],\
    109 					article["link"])
    110 				if "gopher://" in aurl:
    111 					urls = urllib.parse.urlparse(aurl, \
    112 							allow_fragments=False)
    113 					if urls.path.startswith("/0"):
    114 						aurl = "%s://%s%s" % \
    115 						(urls.scheme, urls.netloc, \
    116 							urls.path.replace(\
    117 							"/0", "/1", 1))
    118 						if len(urls.query) > 0:
    119 							aurl = "%s?%s" % \
    120 							(aurl, urls.query)
    121 			else:
    122 				aurl = article["link"]
    123 			msg["X-RSS-URL"] = aurl
    124 		if "link" in feed:
    125 			msg["X-RSS-Feed"] = feed["link"]
    126 		else:
    127 			msg["X-RSS-Feed"] = feed["feeduri"]
    128 		if "id" in article:
    129 			msg["X-RSS-ID"] = article["id"]
    130 		if "uuid" in article:
    131 			msg["X-RSS-UUID"] = article["uuid"]
    132 		if "tags" in article:
    133 			msg["X-RSS-TAGS"] = Header(",".join(article["tags"]),\
    134 					"utf-8")
    135 		msg["User-Agent"] = "Zeitungsschau"
    136 
    137 		if smtpuselocal == "True":
    138 			s = LocalSendmail(smtpcmd)
    139 			s.sendmail(faddr, to, msg.as_string())
    140 		else:
    141 			if ssl == "True":
    142 				s = smtplib.SMTP_SSL(smtphost)
    143 			else:
    144 				s = smtplib.SMTP(smtphost)
    145 			if smtpport != None:
    146 				s.connect(smtphost, smtpport)
    147 			else:
    148 				s.connect(smtphost)
    149 
    150 			s.ehlo()
    151 			if ssl == "False" and starttls == "True":
    152 				s.starttls()
    153 				s.ehlo()
    154 
    155 			if user != None and password != None:
    156 				s.login(user, password)
    157 
    158 			s.sendmail(faddr, to, msg.as_string())
    159 			s.quit()
    160