import json
import requests
import pymysql
# mysql_coon 主要的功能就是, 将链接数据库的操作变成只连接一次
#
class mysql_conn(object):
# 魔术方法, 初始化, 构造函数
def __init__(self):
self.db = pymysql.connect(host='127.0.0.1', user='root', password='123456', port=3306, database='py1011')
self.cursor = self.db.cursor()
# 执行modify(修改)相关的操作
def execute_modify_mysql(self, sql):
self.cursor.execute(sql)
self.db.commit()
# 魔术方法, 析构化 ,析构函数
def __del__(self):
self.cursor.close()
self.db.close()
# 因为不能访问, 所以我们加个头试试
headers = {
#'Accept': '*/*',
#'Accept-Encoding': 'gzip, deflate, br',
#'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
#'Connection': 'keep-alive',
'Cookie': 'aliyungf_tc=AQAAALoQF3p02gsAUhVFebQ3uBBNZn+H; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.516718356.1534295265; _gid=GA1.2.1050085592.1534295265; u=301534295266356; device_id=f5c21e143ce8060c74a2de7cbcddf0b8; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534295265,1534295722; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534295722',
#'Host': 'xueqiu.com',
#'Referer': 'https://xueqiu.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
#'X-Requested-With': 'XMLHttpRequest',
#'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
# urllib 的相关操作如下
url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111'
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184275&count=15&category=111
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184086&count=15&category=111
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=183687&count=15&category=111
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=183404&count=15&category=111
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=183100&count=15&category=111
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=182767&count=15&category=111
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=182496&count=15&category=111
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=182271&count=15&category=111
response = requests.get(url, headers=headers)
res_dict = json.loads(response.text)
list_list = res_dict['list']
#print(list_list)
# 遍历 list_list
data = {}
for list_item_dict in list_list:
# list 列表内的一个item, 他是一个dict
data_str = list_item_dict['data']
data_dict = json.loads(data_str)
data['ids'] = data_dict['id']
data['title'] = data_dict['title']
data['description'] = data_dict['description']
data['target'] = data_dict['target']
print(data_dict['id'])
print(data_dict['title'])
print(data_dict['description'])
print(data_dict['target'])
# print(list_item_dict)
try:
sql = 'insert into xueqiu(ids,title,description,target) values("{ids}","{title}","{description}","{target}")'.format(**data)
mc = mysql_conn()
mc.execute_modify_mysql(sql)
print('以上内容爬取成功')
print('-' * 50)
except:
print('以上内容出错,没有存到数据库')
print('-' * 50)
雪球网爬取
最新推荐文章于 2024-08-08 08:10:37 发布