xpath (1191B)
1 #!/usr/bin/env python 2 # 3 # Copy me if you can. 4 # by 20h 5 # 6 7 import os 8 import sys 9 import getopt 10 from lxml import etree 11 12 def getxpath(fd, xpath, attribute=None, encoding=None): 13 try: 14 parser = etree.HTMLParser(encoding=encoding) 15 xml = etree.parse(fd, parser) 16 sels = xml.xpath(xpath) 17 except AssertionError: 18 return None 19 20 if attribute != None: 21 return "\n".join(["".join(i.attrib[attribute]) for i in sels]) 22 23 return "".join([("".join(i.itertext())).strip() for i in sels]) 24 25 def usage(app): 26 app = os.path.basename(app) 27 sys.stderr.write("usage: %s [-h] [-e encoding] "\ 28 "[-a attribute] xpath\n" % (app)) 29 sys.exit(1) 30 31 def main(args): 32 try: 33 opts, largs = getopt.getopt(args[1:], "he:a:") 34 except getopt.GetoptError as err: 35 print(str(err)) 36 usage(args[0]) 37 38 encoding = None 39 attribute = None 40 for o, a in opts: 41 if o == "-h": 42 usage(args[0]) 43 elif o == "-e": 44 encoding = a 45 elif o == "-a": 46 attribute = a 47 else: 48 assert False, "unhandled option" 49 50 if len(largs) < 1: 51 usage(args[0]) 52 53 rpath = getxpath(sys.stdin, largs[0], attribute, encoding) 54 if rpath == None: 55 return 1 56 57 sys.stdout.write(rpath) 58 59 return 0 60 61 if __name__ == "__main__": 62 sys.exit(main(sys.argv)) 63