用selenium库爬取优信二手车信息
上代码
#引入selenium、 pandas、openpyxl库
from selenium import webdriver
import pandas as pd
import openpyxl
#定义存储变量
mingcheng=[]
gongli=[]
diqu=[]
#获取网页源代码
for i in range(1,6):
url='https://www.xin.com/hangzhou/baoma/i'+str(i)
browser=webdriver.Chrome()
browser.get(url)
#解析源代码,提取所需数据信息
for i in browser.find_elements_by_class_name('across'):
mingcheng.append(i.find_elements_by_class_name('pad')[0].find_elements_by_tag_name('span')[0].text)
gongli.append(i.find_elements_by_class_name('pad')[0].find_elements_by_tag_name('span')[1].text)
diqu.append(i.find_elements_by_class_name('pad')[0].find_elements_by_tag_name('span')[2].text)
pd.DataFrame({'名称':mingcheng,'公里':gongli,'地区':diqu})
data=pd.DataFrame({'名称':mingcheng,'公里':gongli,'地区':diqu})
writer=pd.ExcelWriter('youxin.xlsx')
data.to_excel(writer,'爬虫数据')
writer.save()