爬虫day3 (爬取雪球网n页数据)

爬取雪球网n页数据
用到 与mysql数据库的交互

 

# 爬取雪球网n页数据
# 用到 与mysql数据库的交互

import requests
import json
import pymysql

class mysql_conn(object):
    # 魔术方法, 初始化, 构造函数
    def __init__(self):
        self.db = pymysql.connect(host='127.0.0.1', user='root', password='123456', port=3306, database='py666')
        self.cursor = self.db.cursor()
    # 执行modify(修改)相关的操作
    def execute_modify_mysql(self, sql):
        self.cursor.execute(sql)
        self.db.commit()
    # 魔术方法, 析构化 ,析构函数
    def __del__(self):
        self.cursor.close()
        self.db.close()

# 构造data
def godata(total):
    data = {}
    data['id'] = total['id']
    data['title'] = pymysql.escape_string(total['title'])
    data['description'] = pymysql.escape_string(total['description'])
    data['target'] = pymysql.escape_string(total['target'])
    return data

# 将所需数据遍历存入mysql
def listgomysql(res_list):
    for i in res_list:
        # 获取所需数据
        total = json.loads(i['data'])
        # 构造data
        data = godata(total)
        # 定义sql操作语句
        sql = "insert into biao(uid,title,description,target) values ('%s','%s','%s','%s')" % (
        data['id'], data['title'], data['description'], data['target'])
        # sql = "insert into biao values('{id}','{title}','{description}','{target}')".format(**data)
        # 实例化 mysql_conn对象
        mc = mysql_conn()
        # 调用方法
        mc.execute_modify_mysql(sql)

# 将页数n作为参数传入,add函数完成获取存入操作
def add(n):
    next_max_id='-1'
    k=0
    while k < n:
        url='https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id='+next_max_id+'&count=10&category=-1'
        headers = {
            'Cookie': 'aliyungf_tc=AQAAAPxRByb/ngsAuACIdd7Bn0Lxsxdh; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; u=141534315699746; device_id=fb51fe05bac8b87de1e093e72f85bd6b; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534315700,1534315713; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534315713',

            'Referer': 'https://xueqiu.com/',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
        }
        # 获取数据
        response = requests.get(url,headers=headers)
        # str---dict
        res_dict=json.loads(response.text)
        # 获取下一页 next_max_id
        next_max_id=res_dict['next_max_id']
        # 获取所需数据
        res_list = res_dict['list']
        # 将所需数据遍历存入mysql
        listgomysql(res_list)

        next_max_id=str(next_max_id)
        k += 1

add(10)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值