xpath (1268B)
1 #!/usr/bin/env python 2 # 3 # Copy me if you can. 4 # by 20h 5 # 6 7 import os 8 import sys 9 import getopt 10 from lxml import etree 11 12 def getxpath(fd, xpath, attribute=None, encoding=None): 13 try: 14 parser = etree.HTMLParser(encoding=encoding) 15 xml = etree.parse(fd, parser) 16 sels = xml.xpath(xpath) 17 except AssertionError: 18 return None 19 20 if attribute != None: 21 return "\n".join(["".join(i.attrib[attribute]) for i in sels \ 22 if attribute in i.attrib]) 23 24 try: 25 return "\n".join([("".join(i.itertext())).strip() for i in sels]) 26 except AttributeError: 27 return "\n".join(sels) 28 29 def usage(app): 30 app = os.path.basename(app) 31 sys.stderr.write("usage: %s [-h] [-e encoding] "\ 32 "[-a attribute] xpath\n" % (app)) 33 sys.exit(1) 34 35 def main(args): 36 try: 37 opts, largs = getopt.getopt(args[1:], "he:a:") 38 except getopt.GetoptError as err: 39 print(str(err)) 40 usage(args[0]) 41 42 encoding = None 43 attribute = None 44 for o, a in opts: 45 if o == "-h": 46 usage(args[0]) 47 elif o == "-e": 48 encoding = a 49 elif o == "-a": 50 attribute = a 51 else: 52 assert False, "unhandled option" 53 54 if len(largs) < 1: 55 usage(args[0]) 56 57 rpath = getxpath(sys.stdin, largs[0], attribute, encoding) 58 if rpath == None: 59 return 1 60 61 print(rpath) 62 63 return 0 64 65 if __name__ == "__main__": 66 sys.exit(main(sys.argv)) 67