立即学习:https://edu.csdn.net/course/play/24797/282219?utm_source=blogtoedu
import urllib.request, re import datetime city = input('请输入一个城市的拼音_____') # year = input('请输入一个年份_____') # month = input('请输入一个月份____') def get_tianqi(cit, yea, mon, da): url = 'https://m.tianqi.com/tianqi/%s/%s%s%s.html' % (cit, yea, mon, da) print(url) request = urllib.request.Request(url) # 伪造浏览器访问头 request.add_header('User-Agent', 'Mozilla/5.0') return urllib.request.urlopen(request).read().decode('UTF-8') # print(get_tianqi('qingdao', '2020', '04', '07')) # 日期,最高气温,最低气温 dates, heighD, lowD = [], [], [] year = '2020' # months = ['%02d' % i for i in range(1, 13)] months = ['04'] # print(months) # 定义一个开始的时间 # startT = datetime.datetime(2019,12,31) for month in months: html = get_tianqi(city, year, month, '%02d' % datetime.date.today().day) # 去掉内容中的空格 htmlText = ''.join(html.split()) print(htmlText) pattern = re.compile('<ulclass="w100listjump">(.*?)</ul>') find_ul = re.findall(pattern, htmlText) print(find_ul[0]) pattern1 = re.compile('<li><atitle="(.*?)</a></li>') find_child = re.findall(pattern1, find_ul[0]) print(find_child.__len__())