编程实现自动爬取中国气象网站 http://www.weather.com.cn/气象数据,将中国主要城市 (北京 、上海 、广州 、杭州 、武汉 、南京 、深圳 、苏州 、厦门 、合肥 和成都 )周边地区当天的最高温度和低爬取并显示,如针对网页显示成都周边地区气候数据如下图:
# encoding=utf-8
import requests
from bs4 import BeautifulSoup
city_url = ['http://www.weather.com.cn/weather1d/101010100.shtml#search',
'http://www.weather.com.cn/weather1d/101020100.shtml#search',
'http://www.weather.com.cn/weather1d/101280101.shtml#search',
'http://www.weather.com.cn/weather1d/101210101.shtml#search',
'http://www.weather.com.cn/weather1d/101200101.shtml#search',
'http://www.weather.com.cn/weather1d/101190101.shtml#search',
'http://www.weather.com.cn/weather1d/101280601.shtml#search',
'http://www.weather.com.cn/weather1d/101280601.shtml#search',
'http://www.weather.com.cn/weather1d/101230201.shtml#search',
'http://www.weather.com.cn/weather1d/101220101.shtml#search',
'http://www.weather.com.cn/weather1d/101270101.shtml#search']
allfind = ['北京', '上海', '广州', '杭州', '武汉', '南京', '深圳', '苏州', '厦门', '合肥', '成都']
def getHTMLText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = 'utf-8'
return r.text
except:
return ""
def fillList(soup, num):
allCity = []
ul = soup.find('ul', {'class': 'clearfix city'})
data = ul.find_all('li')
for li in data:
a = li.find('a')
cityname = a.find('span')
if len(cityname) == 0:
continue
temperature = a.find('i')
temperature_str = temperature.string
#分离温度
str1 = temperature_str.split('/')
temperature_l = str1[0] + '°C '
temperature_h = str1[1]
singleCity = []
singleCity.append(allfind[num])
singleCity.append(cityname.string)
singleCity.append(temperature_l)
singleCity.append(temperature_h)
allCity.append(singleCity)
return allCity
def printCityW(city):
print("{1:^8}{2:{0}^6}{3:{0}^8}{4:{0}^6}".format(chr(12288), "城市名称", "周边地区", "最低温度", "最高温度"))
for i in range(11):
u = city[i]
print("{1:^8}{2:{0}^8}{3:{0}^8}{4:{0}^8}".format(chr(12288), u[0], u[1], u[2], u[3]))
def main():
for i in range(11):
url = city_url[i]
html = getHTMLText(url)
soup = BeautifulSoup(html, "html.parser")
City = fillList(soup, i)
printCityW(City)
if i != 10:
print('-------------------**********---------------------')
main()
运行结果如下: