python2.7实现从JIRA爬CAMEL项目中所有BUG并保存到CSV文件

依旧是老师的作业,开源精神指引着我……

以下是Python2.7代码,2018.5.22运行通过


# coding=utf-8
author__ = 'Read Air'
import cookielib
import urllib2
import re
import csv


def saveHtml(file_name, file_content):
    #    注意windows文件命名的禁用符,比如 /
    with open(file_name.replace('/', '_') + ".html", "wb") as f:
        #   写文件用bytes而不是str,所以要转码
        f.write(file_content)


cookie = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
opener.addheaders.append(('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; \
rv:11.0) like Gecko Core/1.63.5221.400 QQBrowser/10.0.1125.400'))

result_url = opener.open(
    urllib2.Request(
        'https://issues.apache.org/jira/browse/CAMEL-12525?jql=project%20%3D%20CAMEL%20AND%20resolution%20%3D%20Unresolved%20ORDER%20BY%20priority%20DESC%2C%20updated%20DESC'))
content = result_url.read()
print "Success in Web1!"
saveHtml("h", content)
re_law1 = '<div class=\"aui-group split-view\">(.*?)</div></div></div></div>'
key_data = re.findall(re_law1, content, re.S)

re_law2 = "<a class=\"splitview-issue-link\" data-issue-key=\"(.*?)\" href=\"(.*?)\"><img height=\"16\" width=\"16\" alt=\"(.*?)\" title=\"(.*?)\""

out = open("BUG List.csv", "a+")
csv_writer = csv.writer(out, dialect="excel")
csv_writer.writerow(["Number", "Description"])

index = 0
for i in re.findall(re_law2, key_data[0], re.S):
    if i[2] == "Bug":
        index += 1
        print "find " + str(index) + " Bug(s)! named " + i[0]
        result_url = opener.open(urllib2.Request('https://issues.apache.org' + i[1]))
        content1 = result_url.read()
        re_law3 = "<div class=\"user-content-block\">[\s\S]+? <p>(.*?)</p>"
        # print re.findall(re_law3, content1, re.S)
        saveHtml(i[0], content1)
        out = open("BUG List.csv", "a+")
        if re.findall(re_law3, content1, re.S):
            description = re.findall(re_law3, content1, re.S)[0]
        else:
            description = "Not Found!"
        csv_writer = csv.writer(out, dialect="excel")
        csv_writer.writerow([i[0], description])
        # print i[2]
    # print i[2]

out.close()


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值