网站http://lishi.tianqi.com/
代码使用方式
python Get_The_Tem.py
脚本名字:Get_The_Tem.py
脚本内容:
# -*- coding: utf-8 -*-
"""
@author: CC
"""
import re,csv
import urllib
import time
import urllib.request
#from urllib import request
"""
设置需要爬取的地区,使用该地区的拼音
设置需要爬取的年、月
"""
#city = 'liannan'
#years = ['2020']
#months = ['01']
city = input("please input the city name by pingying:")
years = input("please input the target year(split by ','),eg:2019,2020:").split(",")
months = input("please input the target month(split by ',',eg:01,02,11):").split(",")
def getHtml(city, year, month):
url = 'http://lishi.tianqi.com/' + city + '/' + str(year) + str(month) + '.html'
print (url)
request1=urllib.request.Request(url)
request1.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36')
response = urllib.request.urlopen(request1)
html = response.read()
return html
def getTemp(html,year,month):
year=int(year)
month=int(month)
html=str(html)
a=re.compile('var hightemp = \[.*\]')
b=re.compile('var lowtemp = \[.*\]')
tablehigh=re.search(a,html).group()
tablelow=re.search(b,html).group()
low=tablelow.split("=")[1].split("]")[0]
low=low.strip( )
low=low.strip('\[|\]')
lowtemp=low.split(',')
for i in range(len(lowtemp)):
lowtemp[i]=lowtemp[i].strip('"')
high=tablehigh.split("=")[1].split("]")[0]
high=high.strip( )
high=high.strip('\[|\]')
hightemp=high.split(',')
for i in range(len(hightemp)):
hightemp[i]=hightemp[i].strip('"')
time=[]
for i in range(len(hightemp)):
time.append([year,month,i+1,float(hightemp[i]),float(lowtemp[i])])
return time
if __name__ == "__main__":
with open(city + '.csv', 'w') as f:
writer = csv.writer(f)
row1 = [('time', 'high', 'low')]
writer.writerows(row1)
for year in years:
for month in months:
html = getHtml(city, year, month)
#flag=1
#if flag==1:
# print (html)
# flag+=1
rows = getTemp(html,year,month)
for i in rows:
for j in range(len(i)):
print ("%d\t"%(i[j]),end="")
print ('')
#writer.writerows(rows)
print (year + month + ' OK!')
time.sleep(2)
欢迎交流:
1193226980@qq.com