社区邮件要怎么样才能最快速的浏览?来用这个脚本吧,自动下载到本地,去掉广告,留下内容。
import os
import time
import subprocess
def execute(cmd):
p = subprocess.Popen(cmd, stdin = subprocess.PIPE,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE,
shell = True)
result = p.stdout.read()
return result
def markData(input):
input = input.replace("neutron", "<font color='#00EEEE'>neutron</font>")
return input
def formatMail(input):
if input.find(" ") != -1:
input = input.strip()
array = input.split(" ")
newInput = " "
count = 0
for data in array:
count = count + 1
newInput = newInput + " " + data
if count % 9 == 0 and count != len(array):
newInput = newInput + "<br>"
return newInput
return input
def downloadOneMail(project, year, month, mailID):
fileName = "mail/" + mailID
if os.path.isfile(fileName):
return
url = "http://lists.openstack.org/pipermail/%s/%s-%s/%s" % (project, year, month, mailID)
result = execute("curl " + url)
if result.find("<!--beginarticle-->") == -1:
return
array = result.split("\n")
wf = open(fileName, 'w')
wf.write("<html><head><title>%s</title></head>" % fileName)
wf.write("\n<style>body{font-size:12px;text-decoration:none;}a{text-decoration:none;}</style><body>")
dataFlag = 0
for input in array:
if dataFlag == 0:
if input.find("<!--beginarticle-->") != -1:
dataFlag = 1
elif dataFlag == 1:
if input.find("<!--endarticle-->") != -1:
dataFlag = 2
break
input = formatMail(input)
wf.write(input + "\n")
wf.write("</body></html>")
wf.close()
print fileName + " download"
def downloadMail(project, year, month):
os.system("mkdir mail")
url = "http://lists.openstack.org/pipermail/%s/%s-%s/thread.html" % (project, year, month)
result = execute("curl " + url)
array = result.split("\n")
wf = open("%s-%s.html" % (year, month), 'w')
wf.write("<html><head><title>Mail list</title></head>")
wf.write("<style>body{font-size:12px;text-decoration:none;}a{text-decoration:none;}</style><body><a target=\"left\" href=\"" + year + "-" + month + ".html#finally\">Go to end</a>")
dataFlag = 0
for input in array:
if dataFlag == 0:
if input.find("Messages:") != -1:
dataFlag = 1
wf.write(input + "\n")
elif dataFlag == 1:
if input.find("Last message date:") != -1:
dataFlag = 2
break
start = input.find("<A HREF=\"")
if start != -1:
end = input.find("\">", start)
mailID = input[start + 9:end]
downloadOneMail(project, year, month, mailID)
input = input.replace("<A HREF=\"", "<A target=\"right\" HREF=\"mail/")
input = markData(input)
wf.write(input + "\n")
wf.write("<a name=\"finally\"></a></body></html>")
wf.close()
wf = open("index.html", 'w')
wf.write("<html><head><title>mail</title></head>\n")
wf.write("<frameset cols=\"60%,*\"> <frame name=\"left\" src=\"" + year + "-" + month + ".html\">")
wf.write("<frame name=\"right\" src=\"list.html\"></frameset></html>")
wf.close()
wf = open("list.html", 'w')
wf.write("<html><head><title>list</title></head><body>\n")
listfiles = os.listdir(".")
for htmlfile in listfiles:
if os.path.isfile(htmlfile):
if htmlfile.find(".html") != -1 and htmlfile != "index.html" and htmlfile != "list.html":
wf.write("<a target=\"left\" href=\"" + htmlfile + "\">" + htmlfile + "</a><br>")
wf.write("</body></html>")
wf.close()
downloadMail("openstack-dev", "2014", "August")