发现这种写法解析不出来数据:(这种绝对路径的写法)
from lxml import etree
import requests
url = "http://xxxx"
res=requests.post(url)
tree = etree.HTML(res.text)
trs=tree.xpath('/html/body/table[2]/tbody/tr')
if trs:
for tr in trs[1:]:
a=tr.xpath('./td[1]/text()')[0]
b=tr.xpath('./td[2]/text()')[0]
print(a,b)
pass
这种相对路径的写法可以解析出数据(用的相对路径)
from lxml import etree
import requests
url = "http://xxxx"
res=requests.post(url)
tree = etree.HTML(res.text)
trs=tree.xpath('//table[@class="tablelist"]//tr')
if trs:
for tr in trs[1:]:
a=tr.xpath('./td[1]/text()')[0]
b=tr.xpath('./td[2]/text()')[0]
print(a,b)
pass
scrapy自带xpath,不需要通过etree.HTML解析。还有一点就是scrapy的xpath以后要.extract()一下,才变成列表类型,这一点和etree有点区别。
def parse(self, response):
data = {"index_id": response.meta['index_id'], "index_name": response.meta['index_name'],"statistic_date": response.meta['statistic_date']}
print(data)
trs=response.xpath('//table[@class="tablelist"]//tr')
for tr in trs[1:]:
time_horizon = tr.xpath('./td[1]/text()').extract()[0]
yield_return = tr.xpath('./td[2]/text()').extract()[0]
#这是该页面文本,可以测试看看
html_text="""
<script type="text/javascript">
var ctx="/cbweb-mn";
var ctxStatics="/cbweb-mn/statics";
</script>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title></title>
<script type="text/javascript">
function bzqxYearQuery(ycDefId,wrjxFlg){
var flg = wrjxFlg;
var url = "/cbweb-mn/yc/downYearBzqxList?wrjxCBFlag="+flg+"&&zblx=txy&&ycDefId="+ycDefId+"&&locale=zh_CN";
window.open(url,null,"height=400,width=650,status=yes,scrollbars=yes,menubar=no,location=no,resizable=yes");
}
</script>
</head>
<body>
<table border="0" cellspacing="0" cellpadding="0" class="t1">
<tr>
<td width="4%" align="right"><img
src="/cbweb-mn/statics/images/dot5.jpg" align="absmiddle" /></td>
<td width="95%" style="text-align:left;"><span>中债企业债收益率曲线(AA)(到期)</span></td>
<td width="0%" style="text-align:right;"></td>
<td width="0%"> </td>
</tr>
</table>
<table border="0" cellspacing="1" cellpadding="0" class="tablelist">
<tr>
<td>标准期限</td>
<td>收益率(%)</td>
</tr>
<tr>
<td>0.0y</td>
<td>2.515</td>
</tr>
<tr>
<td>0.08y</td>
<td>2.5348</td>
</tr>
<tr>
<td>0.25y</td>
<td>2.4493</td>
</tr>
<tr>
<td>0.5y</td>
<td>2.4984</td>
</tr>
<tr>
<td>0.75y</td>
<td>2.5554</td>
</tr>
<tr>
<td>1.0y</td>
<td>2.671</td>
</tr>
</table>
<span> </span>
</body>
</html>"""