from bs4 import BeautifulSoup
import datetime
import requests
import bs4
import re
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates #处理日期
import hashlib
def md5_key(str):
m = hashlib.md5()
b = str.encode(encoding='utf-8')
m.update(b)
return m.hexdigest()
def my_regular(patt_id_tag, content):
value = re.findall(patt_id_tag, content)
if value:
value = value[0]
else :
value = "-1"
return value
def get_id_tag(content, id_name):
id_name = id_name.strip()
patt_id_tag = """<[^>]*id=['"]?""" + id_name + """['" ][^>]*>"""
return my_regular(patt_id_tag ,content)
def get_id_value(content, id_name):
id_name = id_name.strip()
content = get_id_tag(content, id_name)
return my_regular("""value=['](.*)[']""" ,content)
def getCountOfBugFromHtmlSource(content):
return my_regular("<strong>(.*)</strong> 条记录",content)
def get_tr_tag(content):
patt_id_tag = """<tr (.*)</tr>"""
return my_regular(patt_id_tag, content)
def id_to_num(content):
patt_id_tag = """(\d+)"""
return my_regular(patt_id_tag, content)
def time_get_day(year, month, day):
time = str(year) + "-" + str(month) + "-"+ str(day) +" 23:59:59"
time = datetime.datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
return time
def get_list_from_htmlsource(ulist, html):
soup = BeautifulSoup(html, 'html.parser')
trs = soup.find_all('tr')
itertrs = iter(trs)
next(itertrs)
for tr in itertrs:
if len(tr.select('td')) > 10 :
ui = []
#print(tr.select('td')[11].get_text())
i = 0
for td in tr.select('td'):
#print(td.get_text())
if i < 3 :
#ui.append(int(td.get_text()))
ui.append(int(id_to_num(td.get_text())))
else :
ui.append(td.get_text())
i = i + 1
ulist.append(ui)
#数据整理
def list_data_normalization(ulist):
year = int(datetime.datetime.now().year)
timeLast = datetime.datetime.now()
for tr in ulist:
time = tr[11]
if time != "00-00 00:00":
time = datetime.datetime.strptime(str(year) + "-" + time, "%Y-%m-%d %H:%M")
if timeLast < time :
year -= 1
time = datetime.datetime.strftime(time, "%m-%d %H:%M")
time = datetime.datetime.strptime(str(year) + "-" + time, "%Y-%m-%d %H:%M")
timeLast = time
tr[5] = 1 #已经解决的bug
else:
time = datetime.datetime.strptime("2099-01-01", "%Y-%m-%d")#写一个很大的时间,
tr[5] = 0
tr[11] = time
ulist_create_time = sorted(ulist, key=lambda x :x[7])
for tr in ulist_create_time:
time = tr[7]
if(tr[0] < 6102):
time = "2018-" + time
else:
time = "2019-" + time
solve_time = datetime.datetime.strptime(time, "%Y-%m-%d %H:%M")
tr[7] = time
return ulist_create_time
#index = 7 create time
#index = 11 solve time
def count_before_time(ulist, time, index):
i = 0
for tr in ulist:
if index == 7:
if(str(tr[index]) < str(time)):
i += 1
else:
if(tr[5] > 0 and str(tr[index]) < str(time)):
i += 1
return i
def day_is_weekday(time):
hol = {"2018-01-01", "2018-02-15", "2018-02-16", "2018-02-17", "2018-02-18", "2018-02-19", "2018-02-20",
"2018-02-21", "2018-04-05", "2018-04-06", "2018-04-07", "2018-04-29", "2018-04-30", "2018-05-01"
"2018-06-16", "2018-06-17", "2018-06-18", "2018-09-22", "2018-09-23", "2018-09-24", "2018-10-01",
"2018-10-02", "2018-10-03", "2018-10-04", "2018-10-05", "2018-10-06", "2018-10-07", "2018-12-30",
"2018-12-31", "2019-01-01", "2019-02-01", "2019-02-04", "2019-02-05", "2019-02-06", "2019-02-07",
"2019-02-08", "2019-02-11", "2019-02-12", "2019-04-05", "2019-05-01", "2019-06-07", "2019-09-13",
"2019-10-01", "2019-10-02", "2019-10-03", "2019-10-04", "2019-10-07"}
work = {"2018-02-11", "2018-04-2", "2018-04-08", "2018-04-28", "2018-09-29", "2018-09-30", "2018-12-29", "2019-09-29", "2019-10-12"}
#d = datetime.datetime.strptime(time, '%Y-%m-%d')
d = datetime.datetime.strftime(time, '%Y-%m-%d')
if d in hol:
return 2
elif d in work:
return 0
elif time.weekday() in (5, 6):
return 1
else:
return 0
def get_pwd(password, str1):
rand =get_id_value(str1, "verifyRand")
#print(rand)
return md5_key(md5_key(password) + rand)
#登录的主方法
def login(baseurl_host,account,password, headers_base):
baseurl = baseurl_host + "/zentaopms/www/user-login.html"
#使用seesion登录,这样的好处是可以在接下来的访问中可以保留登录信息
session = requests.session()
#print(session.cookies)
#requests 的session登录,以post方式,参数分别为url、headers、data
content = session.get(baseurl,headers = headers_base)
#print(session.cookies)
#post需要的表单数据,类型为字典
login_data = {
'account': account,
'password': get_pwd(password, content.text),
'referer': 'http%3A%2F%2F192.168.100.98%3A8089%2Fzentaopms%2Fwww%2Fmy%2F',
}
content = session.post(baseurl, headers = headers_base,data = login_data)
print("--------登录详情-----------------")
print(content.text)
print("--------登录结束-----------------")
return session
print("-------------------------")
#再次使用session以get去访问网页,一定要带上heades
s = session.get("http://192.168.100.98:8089/zentaopms/www/my/", headers = headers_base)
print(s.text)
#把爬下来的首页写到文本中
f = open('chandaochandao.txt', 'w')
f.write(s.text)
def getBugs(url_host, session, headers_base):
url = url_host + "/zentaopms/www/search-buildQuery.html"
content = session.post(url, headers = headers_base,data = 'fieldtitle=&fieldkeywords=&fieldsteps=&fieldassignedTo=&fieldresolvedBy=&fieldstatus=&fieldconfirmed=ZERO&fieldproduct=4&fieldplan=&fieldmodule=0&fieldproject=&fieldseverity=0&fieldpri=0&fieldtype=&fieldos=&fieldbrowser=&fieldresolution=&fieldactivatedCount=&fieldtoTask=&fieldtoStory=&fieldopenedBy=&fieldclosedBy=&fieldlastEditedBy=&fieldmailto=&fieldopenedBuild=&fieldresolvedBuild=&fieldopenedDate=&fieldassignedDate=&fieldresolvedDate=&fieldclosedDate=&fieldlastEditedDate=&fielddeadline=&fieldid=&fieldbugfrom=&fieldbugproject=&fieldcustomercompany=&fieldgcprojectno=&fieldgcprojectmanager=&fieldtimecount=&fieldbugresource=&andOr1=AND&field1=openedBy&operator1=%3D&value1=yangjian&andOr2=and&field2=id&operator2=%3D&value2=&andOr3=and&field3=keywords&operator3=include&value3=&groupAndOr=and&andOr4=AND&field4=steps&operator4=include&value4=&andOr5=and&field5=assignedTo&operator5=%3D&value5=&andOr6=and&field6=resolvedBy&operator6=%3D&value6=&module=bug&actionURL=%2Fzentaopms%2Fwww%2Fbug-browse-4-0-bySearch-myQueryID.html&groupItems=3&queryID=&formType=lite')
print("**********************")
#print(content.text)
url = url_host + "/zentaopms/www/bug-browse-4-0-bySearch-myQueryID.html"
#http://192.168.100.98:8089/zentaopms/www/bug-browse-4-0-bySearch-myQueryID--2000-2000-1.html
#http://192.168.100.98:8089/zentaopms/www/bug-browse-4-0-bysearch-0-resolvedDate_desc-2000-2000.html
url = url_host + "/zentaopms/www/bug-browse-4-0-bysearch-0-resolvedDate_desc-2000-2000.html"
content = session.get(url, headers = headers_base)
return content.text
#获取可以绘制图形的数据
#days = 获取最近数据的天数。
#xlist = 返回 时间序列
#ylist = 返回 总共bug 序列
#ylist2 = 返回 已经修改的bug序列
def get_list_data(days, xlist, ylist, ylist2):
i = days # 30天 统计
j = 0
time = datetime.datetime.now()
time = time_get_day(time.year,time.month, time.day)
time = time + datetime.timedelta(days = -i)
while j < i - 1:
time = time + datetime.timedelta(days=1)
j += 1
if day_is_weekday(time) == 0 :
#print(str(time) + "----" + str(day_is_weekday(time)) + "----"+ str(count_before_time(ulist, time, 7)))
#xlist.append(datetime.datetime.strftime(time, '%Y-%m-%d'))
xlist.append(time)
ylist.append(count_before_time(ulist, time, 7))
ylist2.append(count_before_time(ulist, time, 11))
#绘图
#xlist = 时间 序列
#ylist = 总bug数 序列
#ylist2 = 已经解决的bug数量 序列
def draw_picture(xlist, ylist,ylist2):
np.set_printoptions(suppress=True)
mpl.rcParams['font.sans-serif']=['SimHei'] #指定默认字体 SimHei为黑体
mpl.rcParams['axes.unicode_minus']=False #用来正常显示负
y = ylist #总bug数
y2 = ylist2 #解决了的bug数
xx = xlist
plt.figure(figsize=(13,5))
plt.plot(xlist,ylist,color='red',linewidth=1, marker='x',label = '累计bug数,单位(个)') #在当前绘图对象绘图(X轴,Y轴,蓝色虚线,线宽度)
plt.plot(xlist,ylist2,color='green',linewidth=1, marker='x',label = '累计解决bug数,单位(个)') #在当前绘图对象绘图(X轴,Y轴,蓝色虚线,线宽度)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m-%d')) #设置x轴主刻度显示格式(日期)
plt.grid(True)
plt.legend(loc='lower right') #图例位置右下角
plt.xlabel('日期', color = 'r')
plt.ylabel('个数', color = 'r')
plt.axis('tight')
plt.title('GIS内核与应用组迭代统计图')
# 设置数字标签
for a, b in zip(xx, y):
plt.text(a, b, b, ha='center', va='bottom',color ='green', fontsize=12)
# 设置数字标签
for a, b in zip(xx, y2):
plt.text(a, b, b, ha='center', va='bottom',color ='blue', fontsize=12)
plt.show()
#程序从这里开始。
url_host = "http://192.168.100.98:8089"
account = "wuyongbo"
password = "test.1234"
headers_base = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'http://192.168.100.98:8089/zentaopms/www/user-login.html',
}
#进行登录,将账户信息替换成你的用户名和密码即可
session = login(url_host, account, password, headers_base)
content = getBugs(url_host, session, headers_base)
ulist =[]
get_list_from_htmlsource(ulist, content)
#0 = bug id
#7 = creat_time
#11 = solve_time
ulist = list_data_normalization(ulist)
xlist = []
ylist = []
ylist2 = []
#需要查询的自然日数量。20
get_list_data(30, xlist, ylist, ylist2)
print("time---总共bug数量----已经修改的bug数量")
for i in range(len(xlist)):
str1 = datetime.datetime.strftime(xlist[i], '%Y-%m-%d') + "----"+ str(ylist[i]) + "----" + str(ylist2[i])
print(str1)
draw_picture(xlist, ylist, ylist2)
#目前存在的问题有
#1:day_is_weekday函数中节假日和工作日是否包括完全?
#2:getBugs 函数中 目前是强行获取2000个每页,当然后期如果超过2000个bug的时候 这个地方还需要完善。需要考虑分页功能【当总bug数量超过2000个的时候可以再考虑完善】
#
#
#
#
#
#