#爬取天气网景区天气
import requests
import re#正则表达式提取数据
web='http://www.weather.com.cn/weather1d/101290101.shtml'#爬取网页
web_open=requests.get(web)#打开浏览器并打开网站
web_open.encoding='utf-8'#设置编码格式 中文
print(web_open.text)#web_open响应对象 对象名.属性名 .text 将内容编码成字符串并返回
print(dir(web_open))
#正则表达式匹配web_open.text,以提取想要信息
"""
示例
<span class="name">大理</span>
<span class="weather">小雨</span>
<span class="wd">18/27℃</span>
<span class="zs">适宜</span>
"""
city=re.findall('<span class="name">([\u4e00-\u9fa5]*)</span>',web_open.text)#[\u4e00-\u9fa5]* 匹配中文字符字符0次或多次 分组() 改变限定符作用 ()只提取()内的内容
print(city)
weather=re.findall('<span class="weather">([\u4e00-\u9fa5]*)</span>',web_open.text)
print(weather)
temperature=re.findall('<span class="wd">(.*)</span>',web_open.text)
print(temperature)
condition=re.findall('<span class="zs">([\u4e00-\u9fa5]*)</span>',web_open.text)
print(condition)
scenicarea_list=list(zip(city,weather,temperature,condition))
for item in scenicarea_list:
print(item)
目标信息快速查找
于目标网页右键→选项检查→select an element in the page to inspect it→鼠标指向目标信息,于检查窗口获取对应信息
运行结果