第一次爬虫————爬取地震数据

最新推荐文章于 2024-06-12 13:51:15 发布

Hérisson1997

最新推荐文章于 2024-06-12 13:51:15 发布

阅读量4k

点赞数

分类专栏： python 文章标签： python 爬虫

本文链接：https://blog.csdn.net/alvarogimenez/article/details/77191107

版权

python 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

import urllib.request
import requests
import turtle
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
def readhtml(url):
try:
head={}
data={}
head['User-Agent']="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"
req=urllib.request.Request(url,data,head)
response=urllib.request.urlopen(req)
html=response.read()
html=html.decode('utf-8')
return html
except:
return ""
def analyze(html,alist,datalinks):
soup=BeautifulSoup(html,'html.parser')
datalinks=soup.find_all('tr')
for i in datalinks:
x=i.find_all('td')
if len(x)==0:
continue
blist=[]
for y in x:
blist.append(y.string)
alist.append(blist)
def datacollect(alist,ilist):
for i in range(30):
x=alist[i]
ilist.append(int(x[4]))
def filedeal(alist):
lines=[]
g=["震级大小","时间","经度","纬度","震源深度","地点"]
s='\t'.join([g[0],g[1],g[2],g[3],g[4],g[5]])
s=s+'\n'
lines.append(s)
for i in range(30):
x=alist[i]
s='\t'.join([x[0],x[1],x[2],x[3],x[4],x[5]])
s=s+'\n'
lines.append(s)
file=open(r'C:\Users\Administrator\Desktop\earthquake.xls','w')
file.writelines(lines)
file.close()
def collecttime(alist,plist,olist):
for i in range(30):
x=alist[i]
plist.append(str(x[1]))
for y in plist:
i=y.replace(' ','')
g=i.replace('-','')
v=g.replace(':','')
w=int(v)
w=w+1
e=str(w)
olist.append(e)
def filetohtml(alist,olist):
txt='<table border="1">'
txt=txt+'\n'+'<h1 style="text-align:center">'+"最新地震信息"+'</h1>'
txt=txt+'\n'+'<tr>'
g=["震级大小","时间","经度","纬度","震源深度","地点"]
for f in range(6):
txt=txt+'\n'+"<td>"+str(g[f])+"</td>"
txt=txt+'\n'+'</tr>'
for i in range(30):
txt=txt+'\n'+'<tr>'
x=alist[i]
for f in range(6):
if f<5:
txt=txt+'\n'+"<td>"+str(x[f])+"</td>"
else:
txt=txt+'\n'+"<td>"+"<a href="'http://news.ceic.ac.cn/CD'+olist[i]+'.html'">"+str(x[f])+"</a>"+"</td>"
txt=txt+'\n'+'</tr>'
txt=txt+'\n'+'</table>'
txt=txt+'\n'+'<h2 style="text-align:center">'+"地震网站链接"+'</h2>'
txt=txt+'\n'+"<a href="'http://www.ceic.ac.cn/'">"+"中国地震台网"+"</a>"
txt=txt+'\n'+"<a href="'https://earthquake.usgs.gov/earthquakes/map/'">美国地质勘探局</a>"
txt=txt+'\n'+"<a href="'https://www.emsc-csem.org/#2'">欧洲地中海地震观测中心</a>"
fileh=open(r'C:\Users\Administrator\Desktop\earthquake.html','w')
fileh.write(txt)
fileh.close()
def draw(ilist):
yValues = ilist
t = turtle.Turtle()
t.hideturtle()
drawLine(t,0,0,300,0)
drawLine(t,0,0,0,175)
for i in range(6):
drawLineWithDots(t,40 + (40 * i),15 * yValues[i],40 + (40 * (i+1)),15 * (yValues[i+1]),"blue")
drawTickMarks(t,yValues)
displayText(t,yValues)
def drawLine(t,x1,y1,x2,y2,colorP="black"):
t.up()
t.goto(x1,y1)
t.down()
t.pencolor(colorP)
t.goto(x2,y2)
def drawLineWithDots(t,x1,y1,x2,y2,colorP="black"):
t.pencolor(colorP)
t.up()
t.goto(x1,y1)
t.dot(5)
t.down()
t.goto(x2,y2)
t.dot(5)
def drawTickMarks(t,yValues):
for i in range(1,8):
drawLine(t,40*i,0,40*i,10)
drawLine(t,0,15 * max(yValues), 10, 15*max(yValues))
drawLine(t,0,15 * min(yValues), 10, 15*min(yValues))
def displayText(t,yValues):
t.pencolor("blue")
t.up()
t.goto(-10,(15*max(yValues))-8)
t.write(max(yValues),align="center")
t.goto(-10,(15*min(yValues))-8)
t.write(min(yValues),align="center")
x = 40
for i in range(0,20,2):
t.goto(x,-20)
t.write(str(i),align="center")
x += 40
t.goto(0,-50)
t.write("地震震源深度分析表",font=("Arial",16,"normal"))
def getxy(alist,xlist,ylist):
for i in range(30):
x=alist[i]
xlist.append(int(float(x[2])))
ylist.append(int(float(x[3])))
def drawaddress(xlist,ylist):
plt.title("地震信号分析表")
plt.subplot()
plt.xlim(xmax=1800,xmin=-1800)
plt.ylim(ymax=900,ymin=-900)
plt.xlabel("x")
plt.ylabel("y")
plt.plot(xlist,ylist,'k^')
plt.show()
def main():
print("地震信息收集系统")
alist=[]
ilist=[]
datalinks=[]
xlist=[]
ylist=[]
olist=[]
plist=[]
url="http://www.ceic.ac.cn/"
html=readhtml(url)
analyze(html,alist,datalinks)
collecttime(alist,plist,olist)
filedeal(alist)
filetohtml(alist,olist)
datacollect(alist,ilist)
getxy(alist,xlist,ylist)
draw(ilist)
drawaddress(xlist,ylist)
if __name__=='__main__':
main()

Hérisson1997

关注

0
点赞
踩
17

收藏

觉得还不错? 一键收藏
0
评论
第一次爬虫————爬取地震数据

import urllib.requestimport requestsimport turtleimport matplotlib.pyplot as pltfrom bs4 import BeautifulSoupdef readhtml(url): try: head={} data={} head['Use
复制链接

扫一扫