pythonspider 全国天气 并发送邮件

import urllib.request as r
import re
import time
import os

headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36")
url1="https://tianqi.8684.cn/"
url=[]
urlcity=[]
def province():
    global url
    opener=r.build_opener()
    opener.addheaders=[headers]
    data=opener.open(url1).read().decode("utf-8","ignore")
    pat1='<div class="p-sort">(.*?)</div>'
    data1=re.compile(pat1).findall(data)
    #/anhui.htm">安徽
    pat2='<a href="/(.*?).htm'
    pat3='.htm">(.*?)</a>'
    for i in range(0,len(data1)):
        data2=re.compile(pat2).findall(data1[i])
        data3=re.compile(pat3).findall(data1[i])
        
        for j in range(0,len(data2)):
            #print(data2[j])
            #print(data3[j])
            stringdir="D:\\Desktop\\python爬虫\\天气预报\\"+data3[j]
            #print(stringdir)
            if os.path.exists(stringdir)==False:
                os.makedirs(stringdir)
            stringurl=url1+data2[j]+".htm"
            #print(stringurl)
            url=url+[stringurl]
    
def everyprovince(url):
    global urlcity
    #<ul class="w-province">
    print(len(url))
    for i in range(0,len(url)):
        opener=r.build_opener()
        opener.addheaders=[headers]
        data=opener.open(url[i]).read().decode("utf-8","ignore")
        time.sleep(0.3)
        pat1='<ul class="w-province">(.*?)</ul>'
        data1=re.compile(pat1).findall(data)
        #<a href="/zhejiang_hangzhou">杭州</a>  
        pat2='<a href="/(.*?)">'
        pat3='.htm">(.*?)天气</a>'
        pat4='>(.*?)</a>'
        data2=re.compile(pat2).findall(data1[0])
        data3=re.compile(pat3).findall(data)
        data4=re.compile(pat4).findall(data1[0])
        for j in range(0,len(data4)):
            if(data4[j].find("href")<0):
                #print(data3[0]+"-"+data4[j])
                stringdir="D:\\Desktop\\python爬虫\\天气预报\\"+data3[0]+"\\"+data4[j]
                #print(stringdir)
                if os.path.exists(stringdir)==False:
                    os.makedirs(stringdir)
                stringurl=url1+data2[j]
                urlcity=urlcity+[stringurl]
                #print(data2[j])
                if stringurl.find("htm")<0:
                    everycity(stringurl,stringdir,data4[j])
            

def everycity(urlcity,stringdir,cityname):
    try:
        headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36")
        opener=r.build_opener()
        opener.addheaders=[headers]
        data=opener.open(urlcity).read().decode("utf-8","ignore")
        pat='content="(.*?)天气预报'
        pat1='<span>(.*?)</span>'
        pat2="<em>(.*?)</em>"
        pat3="</em><p><i>(.*?)</i></p></div></li><li><span>"
        pat4="</em><p>(.*?)</p><em>"
        data5=re.compile(pat).findall(data)
        if(len(data)):print(cityname+"获取天气成功")
        data1=re.compile(pat1).findall(data)
        data2=re.compile(pat2).findall(data)
        data3=re.compile(pat3).findall(data)
        data4=re.compile(pat4).findall(data)
        filepath=stringdir+"\\"+cityname+".txt"
        fp=open(filepath,"w")
        for i in range(0,15):
            if(i<7):
                #print(data1[i]+'  '+data4[i]+'  '+data2[i*3]+'  '+data2[i*3+1]+'  '+data2[i*3+2])
                fp.write(data1[i]+'  '+data4[i]+'  '+data2[i*3]+'  '+data2[i*3+1]+'  '+data2[i*3+2]+'\n')
            else:
                #print(data1[i]+'  '+data2[14+i]+'  '+data3[15-i-8])
                fp.write(data1[i]+'  '+data2[14+i]+'  '+data3[15-i-8]+'\n')
        fp.close()
    except Exception as err:
        print (err)
province()
everyprovince(url)

#coding: utf-8    
  
import smtplib    
from email.mime.multipart import MIMEMultipart    
from email.mime.text import MIMEText    
from email.mime.image import MIMEImage 
from email.header import Header   
import urllib.request as r
import re
import time

def post(smtpserver,username,password,receiver,subject,text,html):
    #设置smtplib所需的参数
    #下面的发件人,收件人是用于邮件传输的。
    sender='woailibohao@126.com'
    #receiver='XXX@126.com'
    #收件人为多个收件人
    
    #通过Header对象编码的文本,包含utf-8编码信息和Base64编码信息。以下中文名测试ok
    #subject = '中文标题'
    #subject=Header(subject, 'utf-8').encode()
        
    #构造邮件对象MIMEMultipart对象
    #下面的主题,发件人,收件人,日期是显示在邮件页面上的。
    msg = MIMEMultipart('mixed') 
    msg['Subject'] = subject
    msg['From'] = 'woailibohao@126.com <woailibohao@126.com>'
    #msg['To'] = 'XXX@126.com'
    #收件人为多个收件人,通过join将列表转换为以;为间隔的字符串
    msg['To'] = ";".join(receiver) 
    #msg['Date']='2012-3-16'
    #构造文字内容   
    text_plain = MIMEText(text,'plain', 'utf-8')    
    msg.attach(text_plain)    

    #构造图片链接
    '''
    sendimagefile=open(r'D:\pythontest\testimage.png','rb').read()
    image = MIMEImage(sendimagefile)
    image.add_header('Content-ID','<image1>')
    image["Content-Disposition"] = 'attachment; filename="testimage.png"'
    msg.attach(image)
    '''
    
    #构造html
    #发送正文中的图片:由于包含未被许可的信息,网易邮箱定义为垃圾邮件,报554 DT:SPM :<p><img src="cid:image1"></p>
    text_html = MIMEText(html,'html', 'utf-8')
    text_html["Content-Disposition"] = 'attachment; filename="texthtml.html"'   
    #msg.attach(text_html)    
    

    #构造附件
    '''
    sendfile=open(r'D:\pythontest\1111.txt','rb').read()
    text_att = MIMEText(sendfile, 'base64', 'utf-8') 
    text_att["Content-Type"] = 'application/octet-stream'  
    #以下附件可以重命名成aaa.txt  
    #text_att["Content-Disposition"] = 'attachment; filename="aaa.txt"'
    #另一种实现方式
    text_att.add_header('Content-Disposition', 'attachment', filename='aaa.txt')
    #以下中文测试不ok
    #text_att["Content-Disposition"] = u'attachment; filename="中文附件.txt"'.decode('utf-8')
    msg.attach(text_att)    
    '''    
    #发送邮件
    smtp = smtplib.SMTP()    
    smtp.connect('smtp.126.com')
    #我们用set_debuglevel(1)就可以打印出和SMTP服务器交互的所有信息。
    #smtp.set_debuglevel(1)  
    smtp.login(username, password)    
    smtp.sendmail(sender, receiver, msg.as_string())    
    smtp.quit()
def postweather():
    smtpserver = 'smtp.126.com'
    username = 'woailibohao@126.com'
    password='libohao0'
    receiver=['2052658718@qq.com','980101115@qq.com','994428149@qq.com']
    subject = 'haerbin weather report'
    text = ""
    html ="<html><head></head><body>"
         
        
    url="https://tianqi.8684.cn/heilongjiang_haerbin"
    headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36")
    opener=r.build_opener()
    opener.addheaders=[headers]
    data=opener.open(url).read().decode("utf-8","ignore")
    pat1='<span>(.*?)</span>'
    pat2="<em>(.*?)</em>"
    pat3="</em><p><i>(.*?)</i></p></div></li><li><span>"
    pat4="</em><p>(.*?)</p><em>"
    if(len(data)):print("获取天气成功")
    data1=re.compile(pat1).findall(data)
    data2=re.compile(pat2).findall(data)
    data3=re.compile(pat3).findall(data)
    data4=re.compile(pat4).findall(data)
    for i in range(0,15):
        if(i<7):
            print(data1[i]+'  '+data4[i]+'  '+data2[i*3]+'  '+data2[i*3+1]+'  '+data2[i*3+2])
            html=html+"<p>"+data1[i]+'  '+data4[i]+'  '+data2[i*3]+'  '+data2[i*3+1]+'  '+data2[i*3+2]+"</p>"
            text=text+data1[i]+'  '+data4[i]+'  '+data2[i*3]+'  '+data2[i*3+1]+'  '+data2[i*3+2]+"\n"
        else:
            print(data1[i]+'  '+data2[14+i]+'  '+data3[15-i-8])
            html=html+"<p>"+data2[14+i]+'  '+data3[15-i-8]+"</p>"
            text=text+data1[i]+'  '+data2[14+i]+'  '+data3[15-i-8]+"\n"
    html=html+"</body></html> "
    subject =subject+data1[0]
    post(smtpserver,username,password,receiver,subject,text,html)
postweather()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值