python爬虫实战-爬取城市每条公交详细信息并保存至本地(方法、xpath)
需求:通过8684城市公交列表url,爬取所有路线的详细信息及站点名称
import requests
from lxml import etree
import time
class GongJiao(object):
"""docstring for GongJiao"""
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
}
def __init__(self, line_url):
super(GongJiao, self).__init__()
self.line_url = line_url
self.line_list=[]
self.buslins=[]
# url='https://xian.8684.cn/line1'
#通过url获取公交列表
def get_line_list(self):
url=self.line_url+'/line1'
r=requests.get(url=url,headers=self.headers)
tree=etree.HTML(r.text)
href_list=tree.xpath('//div[@id="con_site_1"]/a')
for lt in href_list:
title=lt.xpath('.//@title')[