爬取雪球网案例

1.首先我们封装一个封装一个添加数据库代码,方便后面调用

import pymysql

class mysql_conn(object):
    # 魔术方法,初始化,析构函数
    def __init__(self):
        self.db = pymysql.connect("localhost","root","******","xueqiu__text")
        # 创建游标对象
        self.cursor =self.db.cursor()
    # 执行MySQL语句
    def execute__mysql(self,k):
        self.cursor.execute(k)
        self.db.commit()
    # 魔术方法 使用完对象回收资源
    def __del__(self):
        self.cursor.close()
        self.db.close()

if __name__ == '__main__':
    # sql = 'insert into zhang(id,age) values (3,2)'
    sql = "insert into zhang(uid,title,target,description) values (1,'df','ds','dds')"

    mc = mysql_conn()
    mc.execute__mysql(sql)

2.分析url

#1
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111
# next_id:184263
# next_max_id:184275
# tip:null
#
# #2
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184275&count=15&category=111
# next_id:184082
# next_max_id:184086
# tip:null
#
#
# #3
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184086&count=15&category=111
# next_id:183682
# next_max_id:183687
# tip:null

 

3. 进入正题,爬取数据,

import requests
import json
# 调用上面封装好的执行数据库添加
from MySQL__text import mysql_conn




#1 把url路径拼接出来
#2 把每个ajax里面的信息拿出来
i = 1  #代表一次ajax
max_id = -1
count = 10
while i <=15:

    url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={}&count={}&category=111'.format(str(max_id),str(count))

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36',
        'Cookie': 'aliyungf_tc=AQAAANYIWD45fAgAUhVFedAJ1g52dN1J; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.1201105619.1534335404; _gid=GA1.2.839495955.1534335404; u=711534335406418; device_id=11784be644def4e388466a52197bbf16; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534335407,1534335456,1534340424,1534340452; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534341631'

    }

    response = requests.get(url, headers=headers)
    #打印一下看是否获取到数据
    info = response.content
    # print(type(info),info)
    # bytes 转换成函数 json.loads对象必须是字符串类型,response.text是字符串类型
    res__dict = json.loads(response.text)

    max_id = res__dict['next_max_id']

    for j in range(count):
        data = json.loads(res__dict['list'][j]['data'])

        # print(j)
        uid = data['id']
  
        title = data['title']
  
        target = data['target']
   
        description = data['description']
        print(description)

        # mc = MySQL__text()
        sql = "insert into zhang(uid,title,target,description) values ('{}','{}','{}','{}')".format(uid,title,target,description)

        mc1 = mysql_conn()
        mc1.execute__mysql(sql)


    # target
    # description
    # # 取出数据
    # res = res__dict['list']
    # print(res)
    #
    # for i in res:
    #     result = i['data']
    #     print(result)

    i += 1
    count = 15

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值