1.首先我们封装一个封装一个添加数据库代码,方便后面调用
import pymysql
class mysql_conn(object):
# 魔术方法,初始化,析构函数
def __init__(self):
self.db = pymysql.connect("localhost","root","******","xueqiu__text")
# 创建游标对象
self.cursor =self.db.cursor()
# 执行MySQL语句
def execute__mysql(self,k):
self.cursor.execute(k)
self.db.commit()
# 魔术方法 使用完对象回收资源
def __del__(self):
self.cursor.close()
self.db.close()
if __name__ == '__main__':
# sql = 'insert into zhang(id,age) values (3,2)'
sql = "insert into zhang(uid,title,target,description) values (1,'df','ds','dds')"
mc = mysql_conn()
mc.execute__mysql(sql)
2.分析url
#1
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111
# next_id:184263
# next_max_id:184275
# tip:null
#
# #2
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184275&count=15&category=111
# next_id:184082
# next_max_id:184086
# tip:null
#
#
# #3
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184086&count=15&category=111
# next_id:183682
# next_max_id:183687
# tip:null
3. 进入正题,爬取数据,
import requests
import json
# 调用上面封装好的执行数据库添加
from MySQL__text import mysql_conn
#1 把url路径拼接出来
#2 把每个ajax里面的信息拿出来
i = 1 #代表一次ajax
max_id = -1
count = 10
while i <=15:
url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={}&count={}&category=111'.format(str(max_id),str(count))
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36',
'Cookie': 'aliyungf_tc=AQAAANYIWD45fAgAUhVFedAJ1g52dN1J; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.1201105619.1534335404; _gid=GA1.2.839495955.1534335404; u=711534335406418; device_id=11784be644def4e388466a52197bbf16; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534335407,1534335456,1534340424,1534340452; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534341631'
}
response = requests.get(url, headers=headers)
#打印一下看是否获取到数据
info = response.content
# print(type(info),info)
# bytes 转换成函数 json.loads对象必须是字符串类型,response.text是字符串类型
res__dict = json.loads(response.text)
max_id = res__dict['next_max_id']
for j in range(count):
data = json.loads(res__dict['list'][j]['data'])
# print(j)
uid = data['id']
title = data['title']
target = data['target']
description = data['description']
print(description)
# mc = MySQL__text()
sql = "insert into zhang(uid,title,target,description) values ('{}','{}','{}','{}')".format(uid,title,target,description)
mc1 = mysql_conn()
mc1.execute__mysql(sql)
# target
# description
# # 取出数据
# res = res__dict['list']
# print(res)
#
# for i in res:
# result = i['data']
# print(result)
i += 1
count = 15