爬取csdn热榜链接并自动发送至邮箱

爬取csdn热榜链接并自动发送至邮箱

import requests
import re
import os
from browsermobproxy import Server
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from pyquery import PyQuery
from smtplib import SMTP_SSL
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header


class Csdn(object):
    def __init__(self):
        self.page_links = []
        self.page_titles = []
        self.base_url = 'https://blog.csdn.net/rank/list'
        self.headers = {
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36',
        }

        self.main()


    def main(self):
        self.get_blank_links()
        self.get_hot()
        print('数据已爬取完毕,正在发送邮件...')
        return self.hot


    def get_blank_links(self):
        n = 0
        ps = ''
        links = []
        titlese = []
        l = []

        options = webdriver.ChromeOptions()
        options.add_argument('-headless')
        server = Server(os.getcwd() + '\\video\\browsermob-proxy-2.1.4\\bin\\browsermob-proxy.bat')
        server.start()
        proxy = server.create_proxy()
        options.add_argument('--proxy-server={0}'.format(proxy.proxy))
        options.add_argument('ignore-certificate-errors')
        browser = webdriver.Chrome(chrome_options=options)
        proxy.new_har("ht_list2", options={'captureContent': True})
        browser.get(self.base_url)
        html = browser.page_source
        browser.close()
        htmls = re.findall(r'https://blog\.csdn\.net/.+?.*/article/details/\d*',html)
        titles = re.findall(r'class=.title.>.+?.*</a>',html)
        for i in titles:
            title = re.findall(r'>.+?.*<',i)
            self.page_titles.append(str(title).strip('[]<>'))

        for i in htmls:
            html = i.split(' ')
            for j in html:
                if 'article' in j:
                    h = j.strip('="')
                    h = re.search('https://.+?.*',h).group(0)
                    self.page_links.append(h)


        for i in self.page_links:
            if not i in links:
                links.append(i.strip('<>"'))

        for i in self.page_titles:
            if not i in titlese:
                titlese.append(i.strip('<>"'))

        self.page_links = links

        for p in titlese:
            for i in p:
                if i != '<' and i != '>' and i != '"':
                    ps += i

            l.append(ps)
            ps = ''

        self.page_titles = l
        len_link = len(self.page_links)
        links = []

        while n < len_link - 2:
            links.append(self.page_links[n])
            n += 1

        self.page_links = links

    def get_hot(self):
        page = ''''''
        sum = 0
        if len(self.page_links) == len(self.page_titles):
            page += 'CSDN热榜:\n'
            while sum < len(self.page_links):
                page += ('Top'+str(sum+1)+self.page_titles[sum] + ': ' + self.page_links[sum] + '\n')
                sum += 1
        else:
            print('error:links error')
        self.hot = page


class Email(object):
    def __init__(self):
        self.csdn = Csdn()
        self.hot = self.csdn.hot
        self.email()

    def email(self):
        server = 'smtp.163.com'

        sender = 'svip123456789svip@163.com'
        pwd = 'ETAVQOBPVVTYPSXZ'

        receiver = ['svip123456789svip@163.com', 'svip123456789svip@163.com']
        mail_title = 'CSDN热榜'
        content = str(self.hot)

        msg = MIMEMultipart()
        msg["Subject"] = Header(mail_title, 'utf-8')
        msg["From"] = sender
        msg['To'] = ";".join(receiver)
        msg.attach(MIMEText(content, 'plain', 'utf-8'))

        smtp = SMTP_SSL(server)
        smtp.login(sender, pwd)

        smtp.sendmail(sender, receiver, msg.as_string())

        smtp.quit()






if __name__ == '__main__':
    Email()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值