#百度新闻
import requests
import re
import sys
import openpyxl as px
import smtplib
from email.mime.text import MIMEText
import datetime as dt
today = dt.datetime.now().strftime("%Y-%m-%d")
print(today)
#获取关键字列表
keywords = []
wb = px.load_workbook("/home/o*****/下载/股票关键字.xlsx")
ws = wb.active
r = ws.max_row
print(r)
for i in range(1, r+1):
m = ws.cell(i, 1).value
#print(m)
keywords.append(m)
print(keywords)
#定义搜索方法
def searching():
url = "https://www.baidu.com/s?ie=utf-8&cl=2&medium=0&rtt=1&bsst=1&rsv_dl=news_t_sk&tn=news&word=" + keyword
headers = {'User-Agent':'Mozilla/5.0 AppleWebkit/537.36 Chrome/69.0.3497.100 Safari/537.36'}
res = requests.get(url, headers = headers)
res.encoding = res.apparent_encoding
Text1 = ""
p_title = '<html>.*?<head>.*?<title>(.*?)</title>'
title = re.findall(p_title, res.text, re.S)
#print(title)
Text1 += "\n" + title[0] + "\n"
p_href = '<h3.*?<a href="(.*?)"'
p_news = '<h3.*?<a href.*?>(.*?)</a>'
p_time = 'c-font-normal">(.*?)</span>'
p_source = 'c-gap-right">(.*?)</span>'
news = re.findall(p_news, res.text, re.S)
href = re.findall(p_href, res.text, re.S)
time = re.findall(p_time, res.text, re.S)
media = re.findall(p_source, res.text, re.S)
for i in range(0, len(news)):
try:
news[i] = news[i].strip()
news[i] = re.sub("<.*?>", "", news[i])
source = media[i]
shijian = time[i]
t1 = ("%d、" %(i+1) + news[i] + "(" + source + ":" + shijian +")")
t2 = href[i]
Text1 += t1 + "\n"
Text1 += t2 + "\n"
except:
#print("无内容!")
pass
#print(Text1)
#写入文本文档
f = open("/home/o*****/文档/" + today + "今日股市行情.txt", "a+")
f.write(Text1)
f.close()
def sendMail():
#设置并发送邮件
user = "3*********@qq.com"
pwd = "************"
to = "********@qq.com"
#读取文本文档
f = open("/home/o******/文档/" + today + "今日股市行情.txt", "r")
Text1 = f.read()
#print(Text1)
#邮件内容、标题、收件人、发件人
msg = MIMEText(Text1)
msg["Subject"] = "今日股市新闻"
msg["From"] = user
msg["To"] = to
#发送邮件
s = smtplib.SMTP_SSL("smtp.qq.com", 465)
s.login(user, pwd)
s.send_message(msg)
#执行代码
for keyword in keywords:
searching()
sendMail()