爬虫--爬取雪球网数据

 

import requests
import json
import pymysql


#雪球网

i = 1  #记录第一个ajax请求
max_id = -1
count = 10
while i <= 20:

    url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={}&count={}&category=111'.format(str(max_id),str(count))
    # url1 = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184275&count=15&category=111'
    # url2 = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184086&count=15&category=111'
    headers = {
        'Accept': '*/*',
        # ' Accept-Encoding':'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Cookie': 'aliyungf_tc=AQAAAOtGklSxGQwAUhVFeTaV3wKjdope; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.557243314.1534335292; _gid=GA1.2.1222731268.1534335292; _gat_gtag_UA_16079156_4=1; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534335293; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534335293; u=121534335293404; device_id=4633cb10d0c99f1a3733f5feb4427c50',
        'Host': 'xueqiu.com',
        'Referer': 'https://xueqiu.com/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
    }

    response = requests.get(url, headers=headers)
    # print(response.text)
    # print(type(response.text))
    res_dict = json.loads(response.text)
    print(res_dict)
    # max_id = res_dict['next_max_id']
    # print(max_id)
    max_id = res_dict['next_max_id']
    print(max_id)

    j = 0
    while j < count:
        data = json.loads(res_dict['list'][j]['data'])
        print(data)
        uid = data['id']
        title = data['title']
        print(title)
        desc = data['description']
        target = data['target']

        db = pymysql.connect(host='127.0.0.1', user='root', password='XXXX', port=3306, database='XXX')
        cursor = db.cursor()
        sql = "insert into user(uid,title,description,target) values('{}','{}','{}','{}')".format(uid,title,desc,target)
        cursor.execute(sql)
        db.commit()
        cursor.close()
        db.close()

        j += 1

    # max_id = res_dict['next_max_id']
    # print(max_id)
    i += 1
    count = 15

 

  • 3
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值