commit 0d6a6fd8675019cf67d7aed081f250e7030611c8
parent 91c5bcef0e7000d7d57d10745cc936fdc837b9a4
Author: Christoph Lohmann <20h@r-36.net>
Date:   Sat, 25 Aug 2018 12:37:04 +0200
Add xpath utility.
Diffstat:
| xpath | | | 67 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
1 file changed, 67 insertions(+), 0 deletions(-)
diff --git a/xpath b/xpath
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+#
+# Copy me if you can.
+# by 20h
+#
+
+import os
+import sys
+import getopt
+from lxml import etree
+
+def getxpath(fd, xpath, attribute=None, encoding=None):
+	try:
+		parser = etree.HTMLParser(encoding=encoding)
+		xml = etree.parse(fd, parser)
+		sels = xml.xpath(xpath)
+	except AssertionError:
+		return None
+
+	if attribute != None:
+		return "\n".join(["".join(i.attrib[attribute]) for i in sels \
+			if attribute in i.attrib])
+
+	try:
+		return "\n".join([("".join(i.itertext())).strip() for i in sels])
+	except AttributeError:
+		return "\n".join(sels)
+
+def usage(app):
+	app = os.path.basename(app)
+	sys.stderr.write("usage: %s [-h] [-e encoding] "\
+			"[-a attribute] xpath\n" % (app))
+	sys.exit(1)
+
+def main(args):
+	try:
+		opts, largs = getopt.getopt(args[1:], "he:a:")
+	except getopt.GetoptError as err:
+		print(str(err))
+		usage(args[0])
+
+	encoding = None 
+	attribute = None
+	for o, a in opts:
+		if o == "-h":
+			usage(args[0])
+		elif o == "-e":
+			encoding = a
+		elif o == "-a":
+			attribute = a
+		else:
+			assert False, "unhandled option"
+	
+	if len(largs) < 1:
+		usage(args[0])
+
+	rpath = getxpath(sys.stdin, largs[0], attribute, encoding)
+	if rpath == None:
+		return 1
+
+	print(rpath)
+
+	return 0
+
+if __name__ == "__main__":
+	sys.exit(main(sys.argv))
+