雪球爬取数据

爬取数据

from urllib import request
import json
# url
url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={}&count={}&category=111'

def Snowball(page,max_id=None,count=None):
    # 请求头
    headers = {
        'Cookie': 'aliyungf_tc=AQAAAFIZtRD3fgIAUhVFeaXEDQJdCP+S; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.1087044515.1534297298; _gid=GA1.2.1740003293.1534297298; u=151534297299198; device_id=0c55f25e05c68b0c012feaab18b5d894; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534297300,1534297331; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534297331',
        'Referer': 'https://xueqiu.com/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
    }
    if max_id is None:
        full_url = url.format(-1,10)
    else:
        full_url = url.format(max_id,count)
    count = 15
    if page<4:
        # 请求对象
        req = request.Request(full_url,headers=headers)
        page += 1
        response = request.urlopen(req)
        html_bytes = response.read().decode('utf-8')
        res_dict = json.loads(html_bytes)
        list1 = res_dict['list']
        # print(res_dict)
        for item  in list1:
            data_str = item['data']
            print(item['id'],data_str)
        Snowball(page,item['id'],count)
if __name__ == "__main__":
    Snowball(1,-1,10)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值