import requests
from bs4 import BeautifulSoup
# import io
# import sys
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030') # 改变标准输出的默认编码
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
'Cookie': 'antipas=9e2f7r9Zdt73Aa5Np1551282931; uuid=6ad2765a-20f5-4965-fa16-775aaa16a524; cityDomain=qinyan; clueSourceCode=%2A%2300; user_city_id=1001984; Hm_lvt_936a6d5df3f3d309bda39e92da3dd52f=1589072257; ganji_uuid=3010858154925758839406; sessionid=d16a17f2-bbb1-4796-a60d-78754a64fd89; lg=1; close_finance_popup=2020-05-10; lng_lat=112.897858_35.042336; gps_type=1; cainfo=%7B%22ca_a%22%3A%22-%22%2C%22ca_b%22%3A%22-%22%2C%22ca_s%22%3A%22seo_baidu%22%2C%22ca_n%22%3A%22default%22%2C%22ca_medium%22%3A%22-%22%2C%22ca_term%22%3A%22-%22%2C%22ca_content%22%3A%22-%22%2C%22ca_campaign%22%3A%22-%22%2C%22ca_kw%22%3A%22-%22%2C%22ca_i%22%3A%22-%22%2C%22scode%22%3A%22-%22%2C%22keyword%22%3A%22-%22%2C%22ca_keywordid%22%3A%22-%22%2C%22display_finance_flag%22%3A%22-%22%2C%22platform%22%3A%221%22%2C%22version%22%3A1%2C%22client_ab%22%3A%22-%22%2C%22guid%22%3A%226ad2765a-20f5-4965-fa16-775aaa16a524%22%2C%22ca_city%22%3A%22jiaozuo%22%2C%22sessionid%22%3A%22d16a17f2-bbb1-4796-a60d-78754a64fd89%22%7D; preTime=%7B%22last%22%3A1589072287%2C%22this%22%3A1589072255%2C%22pre%22%3A1589072255%7D; Hm_lpvt_936a6d5df3f3d309bda39e92da3dd52f=1589072289',
}
# 获取网页信息
def get_html(url):
print("======正在保存{}页数据======".format(page))
try:
html = requests.get(url,headers=headers).content
html = html.decode('utf-8')
return html
except:
return '产生异常'
# 提取数据
def parse_html(html):
# 解析数据
soup = BeautifulSoup(html,'html.parser')
list = soup.find('ul',{'class':'carlist clearfix js-top'}).find_all('li') # 包含所有车辆信息的最小父级
path = r'C:\Users\DELL\Desktop\python_wd\瓜子二手车.csv'
with open(path,'a',encoding='utf-8')as f:
for i in list:
name = i.find('h2',class_="t").get_text()
number = i.find('div',class_="t-i").get_text().split('|')
year = number[0]
km = number[1]
pic1 = i.find('p').get_text() # 售价
try:
pic2 = i.find('em',class_="line-through").get_text()
except:
pic2 = 'null'
f.write("{},{},{},{},{}\n".format(name,year,km,pic1,pic2))
print("\t保存信息车名:",name)
# 主函数 入口
def main(page):
start_url = 'https://www.guazi.com/qinyan/buy/o{}/#bread'.format(page)
html = get_html(start_url)
parse_html(html)
if __name__ == '__main__':
for page in range(1,6):
main(page)
瓜子二手车 翻页 csv
最新推荐文章于 2023-09-16 22:51:18 发布