免责声明:内容仅做分享...
import requests
from lxml import etree
import csv
print("爬取驾校老师数据 输入页数--->:")
yeshu = int(input("输入页数:"))
f = open('D:\\\\驾校数据.csv', mode='a', encoding="gbk", newline='')
writer = csv.DictWriter(f, fieldnames=['老师姓名', '教龄', '驾校名称', '学员数量', ])
writer.writeheader()
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.289 Safari/537.36"}
for i in range(1,yeshu+1):
url_list = "https://www.jiakaobaodian.com/coach/?page="+str(i)
reques = requests.get(url_list,headers=headers).text
html = etree.HTML(reques)
for s in range(1,20+1):
name = html.xpath('//div[5]/div/div[2]/div[1]/div[2]/ul/li['+str(s)+']/div[1]/p[1]/a/text()')[0]
jiao = html.xpath('//div/div[5]/div/div[2]/div[1]/div[2]/ul/li['+str(s)+']/div[1]/p[1]/span/text()')[0]
jiax = html.xpath('//div/div[5]/div/div[2]/div[1]/div[2]/ul/li['+str(s)+']/div[1]/p[3]/a/text()')[0]
xuey = html.xpath('//div/div[5]/div/div[2]/div[1]/div[2]/ul/li['+str(s)+']/div[1]/div/span[2]/text()')[0]
print("当前页数:",i," *** 教练姓名:",name,"教龄:",jiao,"驾校:",jiax,"学员数:",xuey,"------")
dit = {'老师姓名': name,'教龄': jiao,'驾校名称': jiax,'学员数量': xuey, }
writer.writerow(dit)
print("已全部保存到D盘---注意查收---")
-->csv