瓜子二手车 翻页 csv

import requests
from bs4 import BeautifulSoup

# import io
# import sys
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030')  #  改变标准输出的默认编码

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
    'Cookie': 'antipas=9e2f7r9Zdt73Aa5Np1551282931; uuid=6ad2765a-20f5-4965-fa16-775aaa16a524; cityDomain=qinyan; clueSourceCode=%2A%2300; user_city_id=1001984; Hm_lvt_936a6d5df3f3d309bda39e92da3dd52f=1589072257; ganji_uuid=3010858154925758839406; sessionid=d16a17f2-bbb1-4796-a60d-78754a64fd89; lg=1; close_finance_popup=2020-05-10; lng_lat=112.897858_35.042336; gps_type=1; cainfo=%7B%22ca_a%22%3A%22-%22%2C%22ca_b%22%3A%22-%22%2C%22ca_s%22%3A%22seo_baidu%22%2C%22ca_n%22%3A%22default%22%2C%22ca_medium%22%3A%22-%22%2C%22ca_term%22%3A%22-%22%2C%22ca_content%22%3A%22-%22%2C%22ca_campaign%22%3A%22-%22%2C%22ca_kw%22%3A%22-%22%2C%22ca_i%22%3A%22-%22%2C%22scode%22%3A%22-%22%2C%22keyword%22%3A%22-%22%2C%22ca_keywordid%22%3A%22-%22%2C%22display_finance_flag%22%3A%22-%22%2C%22platform%22%3A%221%22%2C%22version%22%3A1%2C%22client_ab%22%3A%22-%22%2C%22guid%22%3A%226ad2765a-20f5-4965-fa16-775aaa16a524%22%2C%22ca_city%22%3A%22jiaozuo%22%2C%22sessionid%22%3A%22d16a17f2-bbb1-4796-a60d-78754a64fd89%22%7D; preTime=%7B%22last%22%3A1589072287%2C%22this%22%3A1589072255%2C%22pre%22%3A1589072255%7D; Hm_lpvt_936a6d5df3f3d309bda39e92da3dd52f=1589072289',
}

# 获取网页信息
def get_html(url):
    print("======正在保存{}页数据======".format(page))
    try:
        html = requests.get(url,headers=headers).content
        html = html.decode('utf-8')
        return html
    except:
        return '产生异常'


#  提取数据
def parse_html(html):
    #  解析数据
    soup = BeautifulSoup(html,'html.parser')
    list = soup.find('ul',{'class':'carlist clearfix js-top'}).find_all('li')   # 包含所有车辆信息的最小父级
    path = r'C:\Users\DELL\Desktop\python_wd\瓜子二手车.csv'
    with open(path,'a',encoding='utf-8')as f:
        for i in list:
            name = i.find('h2',class_="t").get_text()
            number = i.find('div',class_="t-i").get_text().split('|')
            year = number[0]
            km = number[1]
            pic1 = i.find('p').get_text()  #  售价

            try:
                pic2 = i.find('em',class_="line-through").get_text()
            except:
                pic2 = 'null'

            f.write("{},{},{},{},{}\n".format(name,year,km,pic1,pic2))


            print("\t保存信息车名:",name)




#   主函数 入口
def main(page):
    start_url = 'https://www.guazi.com/qinyan/buy/o{}/#bread'.format(page)
    html = get_html(start_url)
    parse_html(html)


if __name__ == '__main__':
    for page in range(1,6):
        main(page)





  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值