import requests import json from mysq1 import mysql_conn # 代理池 proxy = { 'http': 'http://alice:123456@120.78.166.84:6666', 'https': 'http://alice:123456@120.78.166.84:6666', } # 定义max_id 和page 的初始值 n = -1 page=10 mc = mysql_conn() for i in range(5): # 通过.format()将max_id与page的值分别进行循环添加 url="https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={}&count={}&category=111".format(n,page) headers={ "Accept": "*/*", # "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Connection": "keep-alive", "Cookie": "aliyungf_tc=AQAAAAdi2UdiYA4AUhVFeXTbcZ5baMu3; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.1998827256.1534334826; _gid=GA1.2.1317994468.1534334826; u=681534334827015; device_id=8a50282e3ae31674a7ebdd38cdab60b6; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534334829,1534334859; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534334859", "Host": "xueqiu.com", "Referer": "https://xueqiu.com/", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", "X-Requested-With": "XMLHttpRequest", } # 发送requests.get请求 response=requests.get(url,headers=headers,proxies=proxy) # 将获取数据通过json.loads转换成字典 html_dict=json.loads(response.content.decode('utf-8')) # print(html_dict) # 通过 键 获取list值 list_list=html_dict['list'] # print(list_list) # print(type(list_list)) # 通过 键 获取 next_max_id的值 ***此值经过分析为每下一页的URL中max_id的值 n=int(html_dict['next_max_id']) # 给page 重新赋值 从第二次循环(第二页起每页count的值为15) page=15 # 通过遍历获取data 的值 for i in list_list: i =i['data'] # print(i) # 分别提取 想要获取的字段 id,title,description,target user_dict = json.loads(i) id=user_dict['id'] title=user_dict['title'] description=user_dict['description'] target=user_dict['target'] # 存入mysql repr():原样输出 sql = 'insert into xueqiu(uid,title,description,target) values ({},{},{},{})'.format(repr(id),repr(title),repr(description),repr(target)) # 将下面代码改为全局变量 避免重复调用数据库 开启mysql # mc = mysql_conn() mc.execute_modify_mysql(sql)
雪球网 存入mysql
最新推荐文章于 2021-01-14 17:12:33 发布