抓取需要的数据并保存到excel表,需要大量数据做数据分析的可以看看。
话不多说上代码。
import requests
import time
import bs4
import openpyxl
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}
num = 10
diming = 'xiamen' # 爬取的地名全拼
list_1 = []
for x in range(10): # 1为2011年,2为2012年
num = num +1
for q in range(12): # 1为爬取一个月,12为爬取12个月
a = q+1
if a <= 9:
a = '0'+str(a)
else:
pass
url = 'http://lishi.tianqi.com/'+diming+'/20'+str(num)+str(a)+'.html'
res = requests.get(url,headers=headers)
bs = bs4.BeautifulSoup(res.text,'html.parser')
itm = bs.find('div',class_='tian_three').find('ul',class_='thrui').find_all('li')
for itms in itm:
day = itms.find('div',class_='th200').text
day_1 = day[:-4]
b = itms.find_all('div',class_='th140')[0].text
c = b[:-1]
b_1 = itms.find_all('div',class_='th140')[1].text
c_1 = b_1[:-1]
weather = itms.find_all('div',class_='th140')[2].text
a = itms.find_all('div',class_='th140')[3].text
list_1.append([day_1,c,c_1,weather,a])
wb = openpyxl.Workbook()
sheet = wb.active
sheet.title = '天气预报'
sheet['A1'] = '日期'
sheet['B1'] = '最高气温'
sheet['C1'] = '最低气温'
sheet['D1'] = '天气'
sheet['E1'] = '风向'
for tim in list_1:
sheet.append(tim)
wb.save(r'C:\\Users\\86130\\Desktop\\test\\大数据测试天气预报.xlsx')
简洁易懂。