网上有通过python来调用google translate的,不过结果不是很细, 于是又写了个取词典的.
需要lxml支持, xpath来解析文档比原始的 SGMLParser 要舒服很多.
需要lxml支持, xpath来解析文档比原始的 SGMLParser 要舒服很多.
import urllib2
import urllib
from StringIO import StringIO
from lxml import etree
def grabData(queryWord):
requestUrl="http://www.google.cn/dictionary?langpair=en|zh-CN&q=what&hl=zh-CN&aq=f"
requestUrl=requestUrl.replace("what",queryWord)
req=urllib2.Request(requestUrl)
data=urllib2.urlopen(req).read()
return data
def parseData(htmlStr):
result=[]
parser=etree.HTMLParser()
tree=etree.parse(StringIO(htmlStr),parser)
element=tree.xpath("//div[@class='dct-srch-rslt']/ul[@class='dfnt']/li")
for item in element:
result.append(item.findtext("h4"))
for subitem in item.xpath("ol/li"):
detailItems=subitem.find("ul") #multi item
if detailItems:
result.append(subitem.findtext("span").replace("\n",""))
else :
detailItems=subitem
ex=""
for textElement in detailItems.iter():
if textElement.text:
ex=ex+str(textElement.text)
result.append(ex.replace("\n",""))
return result
if __name__ == "__main__":
content=parseData(grabData("love"))
for line in content:
print line