import requests
from bs4 import BeautifulSoup
import traceback
# 异常处理
import xlwt
# 写入xls表
# Cookie记录登录信息,session请求
def get_content(url,headers=None,proxy=None):
html=requests.get(url,headers=headers).content
return html
def get_url(html):
soup =BeautifulSoup(html,'lxml')
shop_url_list=soup.find_all('div',class_='tit')
# class是关键字,所以不能直接用,class_就可以了
# print (shop_url_list)
# find是只查询一次,find_all()是查询多次返回一个列表,如果没有值就返回空
# 列表推导式
return [i.find('a')['href'] for i in shop_url_list]
def get_detail_content(html):
try:
soup=BeautifulSoup(html,'lxml')
price=soup.find('span',id='avgPriceTitle').text
evaluation=soup.find('sp