代码如下:
import requests
from bs4 import BeautifulSoup
class get_weather(object):
def __init__(self):
self.server = "https://m.tianqi.com/"
self.target = "https://m.tianqi.com/beijing/"
#加入header是把爬虫伪装成浏览器进行访问
self.header = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"}
def get_it(self):
req = requests.get(url=self.target, headers=self.header)
req.encoding = 'UTF-8'
html = req.text
#print(html)
html_bf = BeautifulSoup(html, 'lxml')
#city = html_bf.find('div', class_='hhx_index_newHead_l')
city = html_bf.select('.hhx_index_newHead_l text')
print(city[0].get_text())
html_bf1 = html_bf.find_all('div', class_='city_weather cbg_b1 citydt')
for x in html_bf1:
print(x.get_text())
#print(html_bf1.get_text())
time24 = html_bf.find_all('p', class_='hhx_newAllDayTit')
time_24 = html_bf.find_all('ul', class_='hhx_newAllDayUl')
print(time24[0].a.get_text())
for y in time_24:
print(y.get_text())
#print(time_24[0].li)
if __name__ == "__main__":
dl = get_weather()
dl.get_it()