本性项目从淘车网爬取数据并将爬下来的数据生成excle表格
安装 lxml和XlsxWriter库的时候飘红,从Terminal和Python interpreter安装都不行,最后试了试cmd。好像是先从cmd用清华镜像装,再从Python interpreter装,刚开始Python interpreter也装不上
# -*- codeing = utf-8 -*-
# @Time : 2022/5/6 21:42 下午
# @Auther :ywx
# @File : 爬虫2.py
# @Software: PyCharm
import requests
import xlrd
import xlwt
from lxml import etree
import xlsxwriter
url='https://beijing.taoche.com/landrover/'
#设置请求头
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.50'}
#通过状态码判断网页请求是否成功
def get(uel):
response = requests.get(url,headers=headers)
if response.status_code==200:
print("success!")
else:
print("false")
#爬
def parse(url):
response = requests.get(url, headers=headers)
#定义选择器
selector = etree.HTML(response.text)
name = selector.xpath('//a/span/text()')
originalprice = selector.xpath('//i[@class="onepaynor"]/text()')
print(name,originalprice)
# for i in range(len(name)):
#print(name[i], originalprice[i])
#视频课创建表格
workbook = xlsxwriter.Workbook('taoche_spider.xlsx')
worksheet = workbook.add_worksheet()
for i in range(len(name)):
#在第i行,第1列,写入originalprice
worksheet.write(i,0,name[i])
for i in range(len(originalprice)):
worksheet.write(i, 1, originalprice[i])
workbook.close()
get(url)
parse(url)