from lxml import etree import requests def get_bus(page): ls = [] url = 'http://www.lzbus.com/www/BusLineSearchResult.asp?page={}'.format(page) r = requests.get(url) r.encoding = 'gb2312' print(r.status_code) et = etree.HTML(r.text) txt = et.xpath('//td[@class="FontBig"]//tr')[2::3] for i in txt: print(txt) for i in txt: row = [] num = i.xpath('//td[@valign="top"]/strong/text()')[0].replace("\xa0", "") time = i.xpath('//td[@valign="top"]/text()')[2].replace("\xa0", "") station = i.xpath('//td[@valign="top"]/text()')[3].replace("\xa0", "") company = i.xpath('//td[@valign="top"]/text()')[4].replace("\xa0", "") row.append(num) row.append(time) row.append(station) row.append(company) ls.append(row) print(ls) return ls # for i in range(1,17): # pass get_bus(1)
结果循环出来的值都一样,我用
for i in txt:
print(txt)
查看了一下,咋把这个列表重复了八次 ,这是啥情况