爬虫实战项目--优信二手车--天眼

本文介绍了一种使用Python进行网络爬虫的实际案例,包括抓取汽车信息和企业信息的方法。通过利用requests、lxml等库,文章详细展示了如何设置代理、解析网页并提取所需数据。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import requests, time, random
from fake_useragent import UserAgent
from lxml.html import etree, HTMLParser

from requests.packages.urllib3.exceptions import InsecureRequestWarning, InsecurePlatformWarning
from multiprocessing import Pool


# 关闭Https请求警告
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
ua = UserAgent()

number = 1
new_session_xin = 'k8935l0tr72p6dfngdfnuiukoo4n6jfn'
anti_uid = '8F932282-2E08-FA10-DDDC-841EEF3E0BF3'


def get_proxy():
    response = requests.get('http://localhost:5010/get/').text
    proxy = {
 'http': 'http://' + response}
    return proxy


def get_session_xin():
    global anti_uid
    headers = {
        'User-Agent': ua.random,
        'Host': 'www.xin.com',
        'Referer': 'https://www.xin.com/zhengzhou/baoma/',
        'Cookie': 'XIN_bhv_oc=1233; XIN_anti_uid={}; XIN_LOCATION_CITY=%7B%22cityid%22%3A%221001%22%2C%22areaid%22%3A%224%22%2C%22big_areaid%22%3A%222%22%2C%22provinceid%22%3A%2210%22%2C%22cityname%22%3A%22%5Cu90d1%5Cu5dde%22%2C%22ename%22%3A%22zhengzhou%22%2C%22shortname%22%3A%22ZN%22%2C%22service%22%3A%221%22%2C%22near%22%3A%22201%2C501%2C2101%2C2117%2C1010%2C1002%2C601%2C2401%2C901%2C1201%22%2C%22tianrun_code%22%3A%220371%22%2C%22zhigou%22%3A%221%22%2C%22longitude%22%3A%22113.6253680%22%2C%22latitude%22%3A%2234.7465990%22%2C%22direct_rent_support%22%3A%221%22%2C%22salvaged_support%22%3A%221%22%2C%22isshow_c%22%3A%221%22%7D; uid=rBAKEls5vG1giwDiR4LWAg==; NSC_20.eqppmxfc.yjo.dpn=ffffffffaf18140345525d5f4f58455e445a4a423660; XIN_UID_CK=5e21beea-146c-a405-2a32-2df07fc0eac9'.format(anti_uid)
    }

    response = requests.get('https://www.xin.com/search/get_wishlist_token', headers=headers, proxies=get_proxy(), verify=False)
    # 从响应头的Set-Cookie中,取出session_xin
    session_xin = response.cookies.get('session_xin', '没有')
    print(session_xin)
    return session_xin


def get_list_page(page_num):
    global number, new_session_xin, anti_uid
    tm = str(time.time()).split('.')[0]
    url = 'https://www.xin.com/zhengzhou/baoma/i{}'.format(page_num)
    headers = {
        'User-Agent': ua.random,
        'Host': 'www.xin.com',
        'Referer': 'https://www.xin.com/zhengzhou/baoma/',
        'Cookie': 'RELEASE_KEY=; XIN_bhv_oc=1233; XIN_anti_uid={}; XIN_LOCATION_CITY=%7B%22cityid%22%3A%221001%22%2C%22areaid%22%3A%224%22%2C%22big_areaid%22%3A%222%22%2C%22provinceid%22%3A%2210%22%2C%22cityname%22%3A%22%5Cu90d1%5Cu5dde%22%2C%22ename%22%3A%22zhengzhou%22%2C%22shortname%22%3A%22ZN%22%2C%22service%22%3A%221%22%2C%22near%22%3A%22201%2C501%2C2101%2C2117%2C1010%2C1002%2C601%2C2401%2C901%2C1201%22%2C%22tianrun_code%22%3A%220371%22%2C%22zhigou%22%3A%221%22%2C%22longitude%22%3A%22113.6253680%22%2C%22latitude%22%3A%2234.7465990%22%2C%22direct_rent_support%22%3A%221%22%2C%22salvaged_support%22%3A%221%22%2C%22isshow_c%22%3A%221%22%7D; uid=rBAKEls5vG1giwDiR4LWAg==; NSC_20.eqppmxfc.yjo.dpn=ffffffffaf18140345525d5f4f58455e445a4
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值