抓取网站中的温度数据(最高温和最低温)v1(urllib.request)

网站http://lishi.tianqi.com/
代码使用方式

python Get_The_Tem.py

脚本名字:Get_The_Tem.py
脚本内容:

# -*- coding: utf-8 -*-
"""
@author: CC
"""
 
import re,csv
import urllib
import time
import urllib.request
#from urllib import request
"""
设置需要爬取的地区,使用该地区的拼音
设置需要爬取的年、月
"""
#city = 'liannan'
#years = ['2020']
#months = ['01']
city = input("please input the city name by pingying:")
years = input("please input the target year(split by ','),eg:2019,2020:").split(",")
months = input("please input the target month(split by ',',eg:01,02,11):").split(",")
 
def getHtml(city, year, month):
    url = 'http://lishi.tianqi.com/' + city + '/' + str(year) + str(month) + '.html'
    print (url)
    request1=urllib.request.Request(url)
    request1.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36')
    response = urllib.request.urlopen(request1)
    html = response.read()
    return html

def getTemp(html,year,month):
    year=int(year)
    month=int(month)
    html=str(html)
    a=re.compile('var hightemp = \[.*\]')
    b=re.compile('var lowtemp = \[.*\]')
    tablehigh=re.search(a,html).group()
    tablelow=re.search(b,html).group()
    low=tablelow.split("=")[1].split("]")[0]
    low=low.strip( )
    low=low.strip('\[|\]')
    lowtemp=low.split(',')
    for i in range(len(lowtemp)):
        lowtemp[i]=lowtemp[i].strip('"')
    high=tablehigh.split("=")[1].split("]")[0]
    high=high.strip( )
    high=high.strip('\[|\]')
    hightemp=high.split(',')
    for i in range(len(hightemp)):
        hightemp[i]=hightemp[i].strip('"')
    time=[]
    for i in range(len(hightemp)):
       time.append([year,month,i+1,float(hightemp[i]),float(lowtemp[i])])
    return time

if __name__ == "__main__":
    with open(city + '.csv', 'w') as f:
        writer = csv.writer(f)
        row1 = [('time', 'high', 'low')]
        writer.writerows(row1)
        for year in years:
            for month in months:
                html = getHtml(city, year, month)
                #flag=1
                #if flag==1:
                #    print (html)
                #    flag+=1
                rows = getTemp(html,year,month)
                for i in rows:
                    for j in range(len(i)):
                        print ("%d\t"%(i[j]),end="")
                    print ('')
                #writer.writerows(rows)
                print (year + month + ' OK!')
                time.sleep(2)

欢迎交流:
1193226980@qq.com

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值