1. 下载实习信息
我是在求职汇公众号找到的实习信息。他们每天发布实习信息,而且数量比较多,适合使用python处理
import requests
url = "https://mp.weixin.qq.com/s/sw3zrQuqR9667PYnxw98oQ"
from selenium import webdriver #需要驱动文件
import re
xpath_all = '//*[@id="js_content"]'
option=webdriver.ChromeOptions()
option.add_argument('--headless')
driver=webdriver.Chrome(options=option,executable_path="chromedriver.exe")
driver.get(url=url)
req=driver.find_element_by_xpath(xpath_all)
len(req.text)
2. 简单筛选出我想要的实习信息——可以远程的
从简单的文本信息中提取邮件信息、职位title、职位详细描述和要求发送邮件的主题和简历的文件名
all_jobs = req.text.split("#")
# define a function to get all the job titles and the number
def selection(all_intern):
job_titles = {}
i = -1
for intern in all_intern:
i += 1
if "远程" in intern and i >0:
inde = intern.index("远程")
if "不" not in intern[inde-10:inde] and "无" not in intern[inde-10:inde]:
intern_title = intern.split("\n")[0]
job_titles[i] = intern_title
return job_titles
selected = selection(all_jobs)
selected
job_list = []
for i in selected.keys():
#print(i)
job = all_jobs[i]
# get the main information of the job
job_title = job.split("\n")[0]
# get the job description(a long sentence)
job_descrip = re.split(r'\n',job, 1)[1]
# find the sending email address
match = re.search(r'[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+', job_descrip)
job_email = match.group(0)
decrip_without_email = job_descrip[:match.start()] + job_descrip[match.end():]
# extract the title of the email from job_descrip
m1 = re.search(r'\u201c',decrip_without_email)
m2 = re.search(r'\u201d',decrip_without_email)
if m1 is not None and m2 is not None:
email_title = decrip_without_email[m1.start()+1:m2.end()-1]
title_match = re.search(r'[\_\+\-]', email_title)
separation = title_match.group(0)
else:
title_match = re.search(r'[\_\+\-]', decrip_without_email)
try:
separation = title_match.group(0)
t1 = decrip_without_email.split(separation)
t1[0] = re.split(r'[\(\)\《\》\——\;\,\。\“\”\<\>\!\【\】\:\:\n]',t1[0])[-1]
t1[-1] = re.split(r'[\(\)\《\》\——\;\,\。\“\”\<\>\!\【\】\n]',t1[-1])[0]
email_title = separation.join(t1)
if len(email_title)>100:
email_title = "error"
except:
email_title = None
# parse them into job_list
job_list.append([separation, job_title, job_email, email_title, job_descrip])
len(job_list)
3.发送邮件
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header
from email.mime.application import MIMEApplication
from email.mime.base import MIMEBase
from email import encoders
import smtplib as smtp
import os
#在引号中填写自己的个人信息
keywords_dict = {
"姓名": "",
"年级": "",
"专业": "",
"联系方式": "",
"学校": "",
"本科学校": "",
"毕业时间": "",
"电话": ""
}
def get_real_title(separation, email_title):
real_title0 = separation.join([keywords_dict.get(i, i) for i in email_title.split(separation)])
print(real_title0)
real_title = input()
return real_title
def write_content(job_email, job_descrip):
header = job_email + '\n' + job_descrip
# starter定义的每封邮件的开头
starter = "您好!\n\n我是来自。\n"
# 根据职位描述中的关键词大致写一下正文内容
mwind = re.search(r'wind',job_descrip, flags=re.IGNORECASE)
mexcel = re.search(r'excel', job_descrip, flags=re.IGNORECASE)
mpython = re.search(r'python', job_descrip, flags=re.IGNORECASE)
zhongjian = ""
if mexcel:
zhongjian = zhongjian + "\n本人通过了计算机二级OFFICE的考试,能够熟练运用Excel的数据透视表和各类函数功能。\n"
if mwind:
zhongjian = zhongjian + "\n\n"
if mpython:
zhongjian = zhongjian + "\n本人能够熟练使用python的pandas和numpy进行数据清洗和数据分析。\n"
# 邮件结尾固定,写实习时间和感谢语
ending = "\n关于实习时间,\n\n简历已经添加在附件中,希望您能考虑我的申请!"
# 下面提供可以修改中间段内容的选项
# 下面三行可以将中间段内容自动存入剪贴板,觉得没必要可以不用
# import pyperclip
# pyperclip.copy(zhongjian)
# spam = pyperclip.paste()
cover = input(zhongjian)
return starter + cover + ending
def send_email(title, content, receiver):
text = MIMEText(content, 'plain', 'utf-8')
# 打开简历文件
with open("简历.pdf", 'rb') as pdf_file:
pdf = MIMEApplication(pdf_file.read(), _subtype = 'pdf')
pdf.add_header('content-disposition', 'attachment', filename="%s.pdf" % title)
message = MIMEMultipart(_subparts=(text, pdf))
message['Subject'] = Header(title, 'utf-8')
message['From'] = Header("自定义", 'utf-8')
smtp.sendmail(sender, receiver, message.as_string())
return message
# 登录qq邮箱,password是16位smtp功能验证码
# sender是你的邮箱
# username是你邮箱@之前的部分
password = ""
sender = "123456@qq.com"
username = "123456"
smtp = smtplib.SMTP()
smtp.connect('smtp.qq.com')
smtp.login(username, password)
# 开始发送
failures = []
success = []
for job in job_list:
#[separation, job_title, job_email, email_title, job_descrip]
sepa = job[0]
receiver = job[2]
email_title = job[3]
job_descrip = job[4]
print(job[1])
print(job_descrip)
whether_to_send = input("是否发送邮件:")
if whether_to_send == "N":
continue
print(receiver)
check_email = input("邮箱是否正确?")
if check_mail != "Y":
receiver = check_email
title = get_real_title(sepa, email_title)
try:
content = write_content(job_email, job_descrip)
msg = send_email(title, content)
success.append(receiver)
except:
failures.append(job)
failures
smtp.quit()