话不多说,直接上能用的代码
import requests
import datetime
import pymysql
from requests.adapters import HTTPAdapter
import time
def conn():
connect = pymysql.connect(host='', user='', password='', database='shipinformation',charset='utf8')
if connect:
print("连接成功!")
return connect
conn = conn()
# cursor = conn.cursor()
##爬虫获取页面数据
url = "https://flash-api.jin10.com/get_flash_list"
header = {
"x-app-id": "SO1EJGmNgCtmpcPF",
"x-version": "1.0.0",
}
queryParam = {
"max_time": "2021-05-25 9:47:02",
"channel": "-8200",
}
#循环爬取并插入数据:结束条件是爬不到数据为止
totalCount = 0
Data = requests.get(url, queryParam, headers=header).json()['data']
length = len(Data)
while (length > 0):
for i in range(length):
try:
id = Data[i]['id']
time1 = Data[i]['time']
create_time = datetime.datetime.strptime(time1, "%Y-%m-%d %H:%M:%S")
type = Data[i]['type']
if type == 0:
if len(Data[i]['data']) > 2:
pic = Data[i]['data']['pic']
content = Data[i]['data']['content'].replace('<b>','').replace('</b>','').replace('<br />','').replace('<br/>','')
title = Data[i]['data']['title']
elif len(Data[i]['data']) == 1:
pic = None
content = Data[i]['data']['content'].replace('<b>','').replace('</b>','').replace('<br />','').replace('<br/>','')
title = None
else:
pic = Data[i]['data']['pic']
content = Data[i]['data']['content'].replace('<b>','').replace('</b>','').replace('<br />','').replace('<br/>','')
title = None
update_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print(id, time1, type, pic, content, title,update_time)
except Exception as e:
print(e)
continue
totalCount += length
# 修正下一个查询时间
queryParam['max_time'] = Data[length - 1]['time']
print('next queryParam is', queryParam['max_time'])
# 再请求一次数据
try:
s = requests.Session()
s.mount('http://', HTTPAdapter(max_retries=3))
s.mount('https://', HTTPAdapter(max_retries=3))
Data = requests.get(url, queryParam,timeout=5, headers=header).json()['data']
length = len(Data)
except Exception as e:
print(e)
print('all ok,totalCount is:', totalCount)