python 天气爬虫完整,测试使用
#!user/bin/env python3
# -*- coding: gbk -*-
import os
import re
import time
import requests
from xpinyin import Pinyin #pycharm--文件--设置---项目---project interpreter 右侧添加搜索安装
from bs4 import BeautifulSoup
def download(url, page,city1,city):
print(f"正在爬取:{url}")
html= requests.get(url).text # 这里不加text返回
soup = BeautifulSoup(html, 'html.parser')
city = soup.select("#content h1")[0].text.strip().rstrip('\n').split('历史天气预报')[0]
lis = soup.select("#content table tr")
for li in lis:
#print(li)
span1 = li.select("td b")
if len(span1)>0:
time1=span1[0].text
tianqi=span1[1].text
qiwen = span1[2].text
fengli = span1[3].text
else:
span1 = li.select("td")
time1 = li.select("td a")[0].text.strip().rstrip('\n')
tianqi = span1[1].text.strip().replace(' ','').strip('\n')
tq = ''
for tt in tianqi.splitlines():
tt = tt.rstrip() + '\n'
tq = tq + tt
bgn_t = tq.strip().replace(' ', '').replace('\n', '').split('/')[0]
end_t = tq.strip().replace(' ', '').replace('\n', '').split('/')[1]
qiwen = span1[2].text
pp=''
for tt in qiwen.splitlines():
tt = tt.rstrip() + '\n'
pp = pp + tt
fengli = span1[3].text.strip().replace(' ','')
min_t=pp.strip().replace(' ', '').replace('\n', '').replace('℃', '').split('/')[0]
max_t = pp.strip().replace(' ', '').replace('\n', '').replace('℃', '').split('/')[1]
write_fo_file(city,city1,time1, min_t,max_t, bgn_t, end_t)
page += 1
if page < 13:
time.sleep(0)
download(f"http://www.田七后保(改拼音).com/lishi/{city1}/month/2018{page}.html",page,city1,city)
def write_fo_file(city,city1,time, min_t,max_t, bgn_t, end_t):
f = open('tianqi4.csv', 'a')
f.write(f'{city},{city1},{time},{min_t},{max_t},{bgn_t},{end_t}\n')
f.closed
def main():
if os.path.exists('tianqi5.csv'):
os.remove('tianqi5.csv')
url = 'http://www.田七后保(改拼音).com/lishi/beijing/month/201810.html'
prov = 'beijing,tianjin,shijiazhuang,tangshan,qinhuangdao,handan,xingtai,baoding,zhangjiakou,chengde,cangzhou,langfang,hengshui,taiyuan,datong,yangquan,changzhi,jincheng,shuozhou,jinzhong,yuncheng,xinzhou,linfen,lvliang,huhehaote,baotou,wuhai,chifeng,tongliao,eerduosi,hulunbeier,bayannaoer,wulanchabu,xinganmeng,xilinguole,alashanmeng,shenyang,dalian,anshan,fushun,benxi,dandong,jinzhou,yingkou,fuxin,liaoyang,panjin,changtu,chaoyang,huludao,changchun,jilin,siping,liaoyuan,tonghua,baishan,songyuan,baicheng,yanbian,haerbin,qiqihaer,jixi,hegang,shuangyashan,daqing,yichun,jiamusi,qitaihe,mudanjiang,heihe,suihua,daxinganling,shanghai,nanjing,wuxi,xuzhou,changzhou,suzhou,nantong,lianyungang,huaian,yancheng,yangzhou,zhenjiang,taizhou,suqian,hangzhou,ningbo,wenzhou,jiaxing,huzhou,shaoxing,jinhua,quzhou,zhoushan,lishui,hefei,wuhu,bangbu,huainan,maanshan,huaibei,tongling,anqing,huangshan,chuzhou,fuyang,chaohu,liuan,bozhou,chizhou,xuancheng,fuzhou,xiamen,putian,sanming,quanzhou,zhangzhou,nanping,longyan,ningde,nanchang,jingdezhen,pingxiang,jiujiang,xinyu,yingtan,ganzhou,jian,shangrao,jinan,qingdao,zibo,zaozhuang,dongying,yantai,weifang,jining,taian,weihai,rizhao,laiwu,linyi,dezhou,liaocheng,binzhou,heze,zhengzhou,kaifeng,lvyang,pingdingshan,anyang,hebi,xinxiang,jiaozuo,puyang,xuchang,tahe,sanmenxia,nanyang,shangqiu,xinyang,zhoukou,zhumadian,wuhan,huangshi,shiyan,yichang,xiangyang,ezhou,jingmen,xiaogan,jingzhou,huanggang,xianning,suizhou,enshi,xiantao,qianjiang,tianmen,shennongjia,changsha,zhuzhou,xiangtan,hengyang,shaoyang,yueyang,changde,zhangjiajie,yiyang,chenzhou,yongzhou,huaihua,loudi,xiangxi,guangzhou,shaoguan,shenzhen,zhuhai,shantou,foshan,jiangmen,zhanjiang,maoming,zhaoqing,huizhou,meizhou,shanwei,heyuan,yangjiang,qingyuan,dongguang,zhongshan,chaozhou,jieyang,yunfu,nanning,liuzhou,guilin,wuzhou,beihai,fangchenggang,qinzhou,guigang,yulin,baise,hezhou,hechi,laibin,chongzuo,haikou,sanya,wuzhishan,qionghai,danzhou,wenchang,wanning,dongfang,dingan,tunchang,chengmai,lingao,baisha,changjiang,lingshui,chongqing,chengdu,zigong,panzhihua,luzhou,deyang,mianyang,guangyuan,suining,neijiang,leshan,nanchong,meishan,yibin,guangan,dazhou,yaan,bazhong,ziyang,aba,ganzi,liangshan,guiyang,liupanshui,zunyi,anshun,tongren,qianxinan,bijie,qiandongnan,qiannan,kunming,qujing,yuxi,baoshan,zhaotong,lijiang,puer,lincang,chuxiong,honghe,wenshan,xishuangbanna,dali,dehong,nujiang,diqing,lasa,changdu,shannan,rikaze,naqu,ali,linzhi,xian,tongchuan,baoji,xianyang,weinan,yanan,hanzhong,ankang,shanglv,lanzhou,jiayuguan,jinchang,baiyin,tianshui,wuwei,zhangye,pingliang,jiuquan,qingyang,dingxi,longnan,linxia,gannan,xining,haidong,haibei,huangnan,hainan,guolv,yushu,haixi,yinchuan,shizuishan,wuzhong,guyuan,zhongwei,wulumuqi,kelamayi,tulufan,hami,changji,bazhou,akesu,kezhou,kashi,hetian,yili,tacheng,aletai,shihezi,wujiaqu,taibei,taizhong,gaoxiong,xianggang,aomen'
city = ''
p = Pinyin()
for city in prov.split(','):
#city1 = p.get_pinyin(city, '')
url = 'http://www.田七后保(改拼音).com/lishi/' + city + '/month/201810.html'
print(url)
download(url, 10, city, city)
print("爬取完毕。")
if __name__ == '__main__':
main()
本文禁止转载或摘编
--
--
--
分享到:
投诉或建议
评论