选择爬取的是一年内的内容
使用的是sqlite数据库,增量更新使用的48小时的链接
import requests
import os
import sqlite3
# 一年内的地震数据
def glob_add(page):
i = 1
count=0
while i < page:
url = 'http://www.ceic.ac.cn/ajax/speedsearch?num=6&&page={}'.format(i)
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'}
html = requests.get(url, headers=headers, timeout=10000)
response = html.text
# 去除首尾 ( )
data = eval(response[1:-1])
records = data['shuju']
for result in records:
did = result['id']
on_time = result['O_TIME']
m = result['M']
epi_lat = result['EPI_LAT']
epi_lon = result['EPI_LON']
epi_depth = result['EPI_DEPTH']
location_c = result['LOCATION_C']
count += 1
print (count)
cursor.execute(
r"insert into earthquake values ({},{},'{}',{},{},{},{},'{}');".format(count,did, on_time, m, epi_lat, epi_lon,
epi_depth, location_c))
i = i + 1
conn.commit()
# 增量添加
def local_add():
url = 'http://www.ceic.ac.cn/ajax/speedsearch?num=2&&page=1&&'
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'}
html = requests.get(url, headers=headers, timeout=10000)
response = html.text
# 去除首尾 ( )
data = eval(response[1:-1])
records = data['shuju']
count=0
for result in records:
did = result['id']
on_time = result['O_TIME']
m = result['M']
epi_lat =