import urllib.request as r
import re
import time
import os
headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36")
url1="https://tianqi.8684.cn/"
url=[]
urlcity=[]
def province():
global url
opener=r.build_opener()
opener.addheaders=[headers]
data=opener.open(url1).read().decode("utf-8","ignore")
pat1='<div class="p-sort">(.*?)</div>'
data1=re.compile(pat1).findall(data)
#/anhui.htm">安徽
pat2='<a href="/(.*?).htm'
pat3='.htm">(.*?)</a>'
for i in range(0,len(data1)):
data2=re.compile(pat2).findall(data1[i])
data3=re.compile(pat3).findall(data1[i])
for j in range(0,len(data2)):
#print(data2[j])
#print(data3[j])
stringdir="D:\\Desktop\\python爬虫\\天气预报\\"+data3[j]
#print(stringdir)
if os.path.exists(stringdir)==False:
os.makedirs(stringdir)
stringurl=url1+data2[j]+".htm"
#print(stringurl)
url=url+[stringurl]
def everyprovince(url):
global urlcity
#<ul class="w-province">
print(len(url))
for i in range(0,len(url)):
opener=r.build_opener()
opener.addheaders=[headers]
data=opener.open(url[i]).read().decode("utf-8","ignore")
time.sleep(0.3)
pat1='<ul class="w-province">(.*?)</ul>'
data1=re.compile(pat1).findall(data)
#<a href="/zhejiang_hangzhou">杭州</a>
pat2='<a href="/(.*?)">'
pat3='.htm">(.*?)天气</a>'
pat4='>(.*?)</a>'
data2=re.compile(pat2).findall(data1[0])
data3=re.compile(pat3).findall(data)
data4=re.compile(pat4).findall(data1[0])
for j in range(0,len(data4)):
if(data4[j].find("href")<0):
#print(data3[0]+"-"+data4[j])
stringdir="D:\\Desktop\\python爬虫\\天气预报\\"+data3[0]+"\\"+data4[j]
#print(stringdir)
if os.path.exists(stringdir)==False:
os.makedirs(stringdir)
stringurl=url1+data2[j]
urlcity=urlcity+[stringurl]
#print(data2[j])
if stringurl.find("htm")<0:
everycity(stringurl,stringdir,data4[j])
def everycity(urlcity,stringdir,cityname):
try:
headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36")
opener=r.build_opener()
opener.addheaders=[headers]
data=opener.open(urlcity).read().decode("utf-8","ignore")
pat='content="(.*?)天气预报'
pat1='<span>(.*?)</span>'
pat2="<em>(.*?)</em>"
pat3="</em><p><i>(.*?)</i></p></div></li><li><span>"
pat4="</em><p>(.*?)</p><em>"
data5=re.compile(pat).findall(data)
if(len(data)):print(cityname+"获取天气成功")
data1=re.compile(pat1).findall(data)
data2=re.compile(pat2).findall(data)
data3=re.compile(pat3).findall(data)
data4=re.compile(pat4).findall(data)
filepath=stringdir+"\\"+cityname+".txt"
fp=open(filepath,"w")
for i in range(0,15):
if(i<7):
#print(data1[i]+' '+data4[i]+' '+data2[i*3]+' '+data2[i*3+1]+' '+data2[i*3+2])
fp.write(data1[i]+' '+data4[i]+' '+data2[i*3]+' '+data2[i*3+1]+' '+data2[i*3+2]+'\n')
else:
#print(data1[i]+' '+data2[14+i]+' '+data3[15-i-8])
fp.write(data1[i]+' '+data2[14+i]+' '+data3[15-i-8]+'\n')
fp.close()
except Exception as err:
print (err)
province()
everyprovince(url)
#coding: utf-8
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from email.header import Header
import urllib.request as r
import re
import time
def post(smtpserver,username,password,receiver,subject,text,html):
#设置smtplib所需的参数
#下面的发件人,收件人是用于邮件传输的。
sender='woailibohao@126.com'
#receiver='XXX@126.com'
#收件人为多个收件人
#通过Header对象编码的文本,包含utf-8编码信息和Base64编码信息。以下中文名测试ok
#subject = '中文标题'
#subject=Header(subject, 'utf-8').encode()
#构造邮件对象MIMEMultipart对象
#下面的主题,发件人,收件人,日期是显示在邮件页面上的。
msg = MIMEMultipart('mixed')
msg['Subject'] = subject
msg['From'] = 'woailibohao@126.com <woailibohao@126.com>'
#msg['To'] = 'XXX@126.com'
#收件人为多个收件人,通过join将列表转换为以;为间隔的字符串
msg['To'] = ";".join(receiver)
#msg['Date']='2012-3-16'
#构造文字内容
text_plain = MIMEText(text,'plain', 'utf-8')
msg.attach(text_plain)
#构造图片链接
'''
sendimagefile=open(r'D:\pythontest\testimage.png','rb').read()
image = MIMEImage(sendimagefile)
image.add_header('Content-ID','<image1>')
image["Content-Disposition"] = 'attachment; filename="testimage.png"'
msg.attach(image)
'''
#构造html
#发送正文中的图片:由于包含未被许可的信息,网易邮箱定义为垃圾邮件,报554 DT:SPM :<p><img src="cid:image1"></p>
text_html = MIMEText(html,'html', 'utf-8')
text_html["Content-Disposition"] = 'attachment; filename="texthtml.html"'
#msg.attach(text_html)
#构造附件
'''
sendfile=open(r'D:\pythontest\1111.txt','rb').read()
text_att = MIMEText(sendfile, 'base64', 'utf-8')
text_att["Content-Type"] = 'application/octet-stream'
#以下附件可以重命名成aaa.txt
#text_att["Content-Disposition"] = 'attachment; filename="aaa.txt"'
#另一种实现方式
text_att.add_header('Content-Disposition', 'attachment', filename='aaa.txt')
#以下中文测试不ok
#text_att["Content-Disposition"] = u'attachment; filename="中文附件.txt"'.decode('utf-8')
msg.attach(text_att)
'''
#发送邮件
smtp = smtplib.SMTP()
smtp.connect('smtp.126.com')
#我们用set_debuglevel(1)就可以打印出和SMTP服务器交互的所有信息。
#smtp.set_debuglevel(1)
smtp.login(username, password)
smtp.sendmail(sender, receiver, msg.as_string())
smtp.quit()
def postweather():
smtpserver = 'smtp.126.com'
username = 'woailibohao@126.com'
password='libohao0'
receiver=['2052658718@qq.com','980101115@qq.com','994428149@qq.com']
subject = 'haerbin weather report'
text = ""
html ="<html><head></head><body>"
url="https://tianqi.8684.cn/heilongjiang_haerbin"
headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36")
opener=r.build_opener()
opener.addheaders=[headers]
data=opener.open(url).read().decode("utf-8","ignore")
pat1='<span>(.*?)</span>'
pat2="<em>(.*?)</em>"
pat3="</em><p><i>(.*?)</i></p></div></li><li><span>"
pat4="</em><p>(.*?)</p><em>"
if(len(data)):print("获取天气成功")
data1=re.compile(pat1).findall(data)
data2=re.compile(pat2).findall(data)
data3=re.compile(pat3).findall(data)
data4=re.compile(pat4).findall(data)
for i in range(0,15):
if(i<7):
print(data1[i]+' '+data4[i]+' '+data2[i*3]+' '+data2[i*3+1]+' '+data2[i*3+2])
html=html+"<p>"+data1[i]+' '+data4[i]+' '+data2[i*3]+' '+data2[i*3+1]+' '+data2[i*3+2]+"</p>"
text=text+data1[i]+' '+data4[i]+' '+data2[i*3]+' '+data2[i*3+1]+' '+data2[i*3+2]+"\n"
else:
print(data1[i]+' '+data2[14+i]+' '+data3[15-i-8])
html=html+"<p>"+data2[14+i]+' '+data3[15-i-8]+"</p>"
text=text+data1[i]+' '+data2[14+i]+' '+data3[15-i-8]+"\n"
html=html+"</body></html> "
subject =subject+data1[0]
post(smtpserver,username,password,receiver,subject,text,html)
postweather()