Pythno2.7 lxml xpath
1:下载setuptools: https://pypi.python.org/pypi/setuptools,在这里下载Windows(simplified)下边的ez_setup.py
2:安装:在cmd下执行 python ez_setup.py,如果你安装了很多版本的python,则进入相应版本的文件夹,用对应的python.exe ez_setup.py
3:下载想要的lxml安装包
https://pypi.python.org/simple/lxml/
32位、64位:
lxml-2.3-py2.7-win-amd64.egg
lxml-2.3-py2.7-win32.egg
4:安装
进入C:\Python27\Scripts
使用命令行:
easy_install D:\\Downloads\lxml-2.3-py2.7-win-amd64.egg
例子:
#coding:utf-8
import urllib
import urllib2
from lxml import etree as etree
if __name__ == "__main__":
req_url='www.baidu.com'
headers= {'User-Agent':'"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0)Gecko/20100101 Firefox/26.0"'}
req= urllib2.Request(req_url, headers = headers)
content= urllib2.urlopen(req, timeout=60).read()
ifisinstance(content, unicode):
pass
else:
content= content.decode('GBK')
htmlSource= etree.HTML(content)
names= htmlSource.xpath(
u'//*[@id="buildhistory"]/div[2]/table/*[@class="finish_mousenone"]/td[2]')
fori in names:
printi.text
# -*- coding: cp936 -*-
import urllib
import urllib2
from lxml import etree as etree
def InitSogouBranchInfo(req_url=None):
ifreq_url == None or req_url == '':
print"req_url == none,return"
return
headers= {'User-Agent':'"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0)Gecko/20100101 Firefox/26.0"'}
req= urllib2.Request(req_url, headers = headers)
content= urllib2.urlopen(req, timeout=60).read()
#printcontent
ifisinstance(content, unicode):
pass
else:
content= content.decode('GBK')
htmlSource= etree.HTML(content)
buildResultRaw= htmlSource.xpath(ur'//*[@id="buildhistory"]/div[2]/table/*[@class="finish_mousenone"]')
#buildTime=htmlSource.xpath(ur'//*[@id="buildhistory"]/div[2]/table/*[@class="finish_mousenone"]/td[2]')
#buildVersion= htmlSource.xpath(ur'//*[@id="buildhistory"]/div[2]/table/*[@class="finish_mousenone"]/td[3]')
#buildBranch=htmlSource.xpath(ur'//*[@id="buildhistory"]/div[2]/table/*[@class="finish_mousenone"]/td[6]')
#buildAuthor=htmlSource.xpath(ur'//*[@id="buildhistory"]/div[2]/table/*[@class="finish_mousenone"]/td[7]')
#buildDownloadUrl=htmlSource.xpath(ur'//*[@id="buildhistory"]/div[2]/table/*[@class="finish_mousenone"]/td[12]/a')
result= {
'buildTime' :[],
'buildVersion' :[],
'buildBranch' :[],
'buildAuthor' :[],
'buildDownloadUrl' :[]
}
sum= 0
fortree in buildResultRaw:
str= ''
buildTime= tree.xpath(ur'td[2]')
iflen(buildTime)>0 and not buildTime[0].text==None:
str= str + buildTime[0].text+' '
result['buildTime'].append(buildTime[0].text)
else:
result['buildTime'].append("")
buildVersion= tree.xpath(ur'td[3]')
iflen(buildVersion)>0 and not buildVersion[0].text==None:
str= str + buildVersion[0].text+' '
result['buildVersion'].append(buildVersion[0].text)
else:
result['buildVersion'].append("")
buildBranch= tree.xpath(ur'td[6]')
iflen(buildBranch)>0 and not buildBranch[0].text==None:
str= str + buildBranch[0].text+' '
result['buildBranch'].append(buildBranch[0].text)
else:
result['buildBranch'].append("")
buildAuthor= tree.xpath(ur'td[7]')
iflen(buildAuthor)>0 and not buildAuthor[0].text==None:
str= str + buildAuthor[0].text+' '
result['buildAuthor'].append(buildAuthor[0].text)
else:
result['buildAuthor'].append("")
buildDownloadUrl= tree.xpath(ur'td[12]/a/@href')
ifnot buildDownloadUrl==[]:
str= str + buildDownloadUrl[0]+' '
result['buildDownloadUrl'].append(buildDownloadUrl[0])
else:
result['buildDownloadUrl'].append("")
#printstr
#sum= sum + 1
#printsum
returnresult
if __name__ == "__main__":
result=InitSogouBranchInfo('http://build.sogou-inc.com/system_build/common_module/project.php?project=ime&class=ime&branch=branch\PinyinDev_R_7_4_Update_Kernel50')
f= open(r'd:/t.txt','w')
length= len(result['buildVersion'])
fori in range(0,length):
printresult['buildDownloadUrl'][i]
参考:
http://www.cnblogs.com/zhuyp1015/archive/2012/07/17/2596495.html
http://blog.sina.com.cn/s/blog_641289eb0100yf84.html
http://blog.csdn.net/zhaokuo719/article/details/8209496
http://blog.csdn.net/shirdrn/article/details/7030026
http://bbs.csdn.net/topics/390823000
http://www.cnblogs.com/bluescorpio/archive/2010/05/31/1748503.html