Python 爬金十数据

话不多说,直接上能用的代码 

import requests
import  datetime
import pymysql
from requests.adapters import HTTPAdapter
import time

def conn():
    connect = pymysql.connect(host='', user='', password='', database='shipinformation',charset='utf8')
    if connect:
        print("连接成功!")
    return connect
conn = conn()
# cursor = conn.cursor()

##爬虫获取页面数据
url = "https://flash-api.jin10.com/get_flash_list"
header = {
    "x-app-id": "SO1EJGmNgCtmpcPF",
    "x-version": "1.0.0",
}
queryParam = {
    "max_time": "2021-05-25 9:47:02",
    "channel": "-8200",
}

#循环爬取并插入数据:结束条件是爬不到数据为止
totalCount = 0
Data = requests.get(url, queryParam, headers=header).json()['data']
length = len(Data)
while (length > 0):
    for i in range(length):
        try:
            id = Data[i]['id']
            time1 = Data[i]['time']
            create_time = datetime.datetime.strptime(time1, "%Y-%m-%d %H:%M:%S")
            type = Data[i]['type']
            if type == 0:
                if len(Data[i]['data']) > 2:
                    pic = Data[i]['data']['pic']
                    content = Data[i]['data']['content'].replace('<b>','').replace('</b>','').replace('<br />','').replace('<br/>','')
                    title = Data[i]['data']['title']
                elif len(Data[i]['data']) == 1:
                    pic = None
                    content = Data[i]['data']['content'].replace('<b>','').replace('</b>','').replace('<br />','').replace('<br/>','')
                    title = None
                else:
                    pic = Data[i]['data']['pic']
                    content = Data[i]['data']['content'].replace('<b>','').replace('</b>','').replace('<br />','').replace('<br/>','')
                    title = None
                update_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                print(id, time1, type, pic, content, title,update_time)
        except Exception as e:
            print(e)
            continue

    totalCount += length
    # 修正下一个查询时间
    queryParam['max_time'] = Data[length - 1]['time']
    print('next queryParam is', queryParam['max_time'])

    # 再请求一次数据
    try:
        s = requests.Session()
        s.mount('http://', HTTPAdapter(max_retries=3))
        s.mount('https://', HTTPAdapter(max_retries=3))
        Data = requests.get(url, queryParam,timeout=5, headers=header).json()['data']
        length = len(Data)
    except Exception as e:
        print(e)


print('all ok,totalCount is:', totalCount)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值