可以写出匹配邮箱的正则表达式为:
r'[0-9a-zA-Z._]+@[0-9a-zA-Z._]+\.[0-9a-zA-Z._]+'
代码如下:
#!/usr/bin/python3
#_*_coding: utf-8_*_
import requests
import re
def get_email(url):
"""get all the email address from the url"""
content = requests.get(url).text
pattern = r'[0-9a-zA-Z._]+@[0-9a-zA-Z._]+\.[0-9a-zA-Z._]+'
p = re.compile(pattern)
m = p.findall(content)
with open('emil.txt', 'w') as f:
for mm in m:
f.write(mm+'\n')
'''
with open('tmp.html', 'w') as f:
f.writelines(content)
'''
if __name__=='__main__':
get_email('https://www.douban.com/note/553260472/?cid=44910100')