commit 0d6a6fd8675019cf67d7aed081f250e7030611c8
parent 91c5bcef0e7000d7d57d10745cc936fdc837b9a4
Author: Christoph Lohmann <20h@r-36.net>
Date: Sat, 25 Aug 2018 12:37:04 +0200
Add xpath utility.
Diffstat:
xpath | | | 67 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 67 insertions(+), 0 deletions(-)
diff --git a/xpath b/xpath
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+#
+# Copy me if you can.
+# by 20h
+#
+
+import os
+import sys
+import getopt
+from lxml import etree
+
+def getxpath(fd, xpath, attribute=None, encoding=None):
+ try:
+ parser = etree.HTMLParser(encoding=encoding)
+ xml = etree.parse(fd, parser)
+ sels = xml.xpath(xpath)
+ except AssertionError:
+ return None
+
+ if attribute != None:
+ return "\n".join(["".join(i.attrib[attribute]) for i in sels \
+ if attribute in i.attrib])
+
+ try:
+ return "\n".join([("".join(i.itertext())).strip() for i in sels])
+ except AttributeError:
+ return "\n".join(sels)
+
+def usage(app):
+ app = os.path.basename(app)
+ sys.stderr.write("usage: %s [-h] [-e encoding] "\
+ "[-a attribute] xpath\n" % (app))
+ sys.exit(1)
+
+def main(args):
+ try:
+ opts, largs = getopt.getopt(args[1:], "he:a:")
+ except getopt.GetoptError as err:
+ print(str(err))
+ usage(args[0])
+
+ encoding = None
+ attribute = None
+ for o, a in opts:
+ if o == "-h":
+ usage(args[0])
+ elif o == "-e":
+ encoding = a
+ elif o == "-a":
+ attribute = a
+ else:
+ assert False, "unhandled option"
+
+ if len(largs) < 1:
+ usage(args[0])
+
+ rpath = getxpath(sys.stdin, largs[0], attribute, encoding)
+ if rpath == None:
+ return 1
+
+ print(rpath)
+
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv))
+