python 登录禅道，并且把bug数量统计，并且生成统计图

本文链接：https://blog.csdn.net/qd1308504206/article/details/85705183
from bs4 import BeautifulSoup

import datetime
import requests
import bs4
import re
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates    #处理日期
import hashlib


def md5_key(str):
    m = hashlib.md5()
    b = str.encode(encoding='utf-8')
    m.update(b)
    return m.hexdigest()

def my_regular(patt_id_tag, content):
    value = re.findall(patt_id_tag, content)
    if value:
        value = value[0]
    else :
        value = "-1"
    return value

def get_id_tag(content, id_name):
    id_name = id_name.strip()
    patt_id_tag = """<[^>]*id=['"]?""" + id_name + """['" ][^>]*>"""
    return my_regular(patt_id_tag ,content)
 
def get_id_value(content, id_name):
    id_name = id_name.strip()
    content = get_id_tag(content, id_name)
    return my_regular("""value=['](.*)[']""" ,content)
 
def getCountOfBugFromHtmlSource(content):
    return my_regular("<strong>(.*)</strong> 条记录",content)

def get_tr_tag(content):
    patt_id_tag = """<tr (.*)</tr>"""
    return my_regular(patt_id_tag, content)

def id_to_num(content):
    patt_id_tag = """(\d+)"""
    return my_regular(patt_id_tag, content)

def time_get_day(year, month, day):
    time = str(year) + "-" + str(month) + "-"+ str(day) +" 23:59:59"
    time = datetime.datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
    return time


def get_list_from_htmlsource(ulist, html):
    soup = BeautifulSoup(html, 'html.parser')
    trs = soup.find_all('tr')
    itertrs = iter(trs)
    next(itertrs)
    for tr in itertrs:
        if len(tr.select('td')) > 10 :
            ui = []
            #print(tr.select('td')[11].get_text())
            i = 0
            for td in tr.select('td'):
                #print(td.get_text())
                if i < 3 :
                    #ui.append(int(td.get_text()))
                    ui.append(int(id_to_num(td.get_text())))
                else :
                    ui.append(td.get_text())
                i = i + 1
            ulist.append(ui)

#数据整理
def list_data_normalization(ulist):
    year = int(datetime.datetime.now().year)
    timeLast = datetime.datetime.now()
    for tr in ulist:
        time = tr[11]
        if time != "00-00 00:00":
            time = datetime.datetime.strptime(str(year) + "-" + time, "%Y-%m-%d %H:%M")
            if timeLast < time :
                year -= 1
                time = datetime.datetime.strftime(time, "%m-%d %H:%M")
                time = datetime.datetime.strptime(str(year) + "-" + time, "%Y-%m-%d %H:%M")
                
            timeLast = time
            tr[5] = 1 #已经解决的bug
        else:
            time = datetime.datetime.strptime("2099-01-01", "%Y-%m-%d")#写一个很大的时间，
            tr[5] = 0
            
        tr[11] = time

    ulist_create_time = sorted(ulist, key=lambda x :x[7])
    for tr in ulist_create_time:
        time = tr[7]
        if(tr[0] < 6102):
            time = "2018-" + time
        else:
            time = "2019-" + time
            
        solve_time = datetime.datetime.strptime(time, "%Y-%m-%d %H:%M")
        tr[7] = time
        
    return ulist_create_time      

#index = 7 create time
#index = 11 solve time
def count_before_time(ulist, time, index):
    i = 0
    for tr in ulist:
        if index == 7:
            if(str(tr[index]) < str(time)):
                i += 1
        else:
            if(tr[5] > 0 and str(tr[index]) < str(time)):
                i += 1
    return i


def day_is_weekday(time):
    hol = {"2018-01-01", "2018-02-15", "2018-02-16", "2018-02-17", "2018-02-18", "2018-02-19", "2018-02-20",
           "2018-02-21", "2018-04-05", "2018-04-06", "2018-04-07", "2018-04-29", "2018-04-30", "2018-05-01"
           "2018-06-16", "2018-06-17", "2018-06-18", "2018-09-22", "2018-09-23", "2018-09-24", "2018-10-01",
           "2018-10-02", "2018-10-03", "2018-10-04", "2018-10-05", "2018-10-06", "2018-10-07", "2018-12-30",
           "2018-12-31", "2019-01-01", "2019-02-01", "2019-02-04", "2019-02-05", "2019-02-06", "2019-02-07",
           "2019-02-08", "2019-02-11", "2019-02-12", "2019-04-05", "2019-05-01", "2019-06-07", "2019-09-13",
           "2019-10-01", "2019-10-02", "2019-10-03", "2019-10-04", "2019-10-07"}
    
    work = {"2018-02-11", "2018-04-2", "2018-04-08", "2018-04-28", "2018-09-29", "2018-09-30", "2018-12-29", "2019-09-29", "2019-10-12"}
    
    #d = datetime.datetime.strptime(time, '%Y-%m-%d')
    d = datetime.datetime.strftime(time, '%Y-%m-%d')

    if d in hol:
        return 2
    elif d in work:
        return 0
    elif time.weekday() in (5, 6):
        return 1
    else:
        return 0
def get_pwd(password, str1):
    rand =get_id_value(str1, "verifyRand")
    #print(rand)
    return md5_key(md5_key(password) + rand)

#登录的主方法
def login(baseurl_host,account,password, headers_base):
    baseurl = baseurl_host + "/zentaopms/www/user-login.html"
 
    #使用seesion登录，这样的好处是可以在接下来的访问中可以保留登录信息
    session = requests.session()
    #print(session.cookies)
    #requests 的session登录，以post方式，参数分别为url、headers、data
    content = session.get(baseurl,headers = headers_base)
    #print(session.cookies)
    #post需要的表单数据，类型为字典
    login_data = {
            'account': account,
            'password': get_pwd(password, content.text),
            'referer': 'http%3A%2F%2F192.168.100.98%3A8089%2Fzentaopms%2Fwww%2Fmy%2F',
    }
 
    content = session.post(baseurl, headers = headers_base,data = login_data)
    print("--------登录详情-----------------")
    print(content.text)
    print("--------登录结束-----------------")
    return session
 
    print("-------------------------")
    #再次使用session以get去访问网页，一定要带上heades
    s = session.get("http://192.168.100.98:8089/zentaopms/www/my/", headers = headers_base)
    print(s.text)
    #把爬下来的首页写到文本中
    f = open('chandaochandao.txt', 'w')
    f.write(s.text)
 

def getBugs(url_host, session, headers_base):
    url = url_host + "/zentaopms/www/search-buildQuery.html"
    content = session.post(url, headers = headers_base,data = 'fieldtitle=&fieldkeywords=&fieldsteps=&fieldassignedTo=&fieldresolvedBy=&fieldstatus=&fieldconfirmed=ZERO&fieldproduct=4&fieldplan=&fieldmodule=0&fieldproject=&fieldseverity=0&fieldpri=0&fieldtype=&fieldos=&fieldbrowser=&fieldresolution=&fieldactivatedCount=&fieldtoTask=&fieldtoStory=&fieldopenedBy=&fieldclosedBy=&fieldlastEditedBy=&fieldmailto=&fieldopenedBuild=&fieldresolvedBuild=&fieldopenedDate=&fieldassignedDate=&fieldresolvedDate=&fieldclosedDate=&fieldlastEditedDate=&fielddeadline=&fieldid=&fieldbugfrom=&fieldbugproject=&fieldcustomercompany=&fieldgcprojectno=&fieldgcprojectmanager=&fieldtimecount=&fieldbugresource=&andOr1=AND&field1=openedBy&operator1=%3D&value1=yangjian&andOr2=and&field2=id&operator2=%3D&value2=&andOr3=and&field3=keywords&operator3=include&value3=&groupAndOr=and&andOr4=AND&field4=steps&operator4=include&value4=&andOr5=and&field5=assignedTo&operator5=%3D&value5=&andOr6=and&field6=resolvedBy&operator6=%3D&value6=&module=bug&actionURL=%2Fzentaopms%2Fwww%2Fbug-browse-4-0-bySearch-myQueryID.html&groupItems=3&queryID=&formType=lite')
    print("**********************")
    #print(content.text)
 
    url = url_host + "/zentaopms/www/bug-browse-4-0-bySearch-myQueryID.html"
    #http://192.168.100.98:8089/zentaopms/www/bug-browse-4-0-bySearch-myQueryID--2000-2000-1.html
    #http://192.168.100.98:8089/zentaopms/www/bug-browse-4-0-bysearch-0-resolvedDate_desc-2000-2000.html
    url = url_host + "/zentaopms/www/bug-browse-4-0-bysearch-0-resolvedDate_desc-2000-2000.html"
    content = session.get(url, headers = headers_base)
    return content.text

#获取可以绘制图形的数据
#days = 获取最近数据的天数。
#xlist = 返回 时间序列
#ylist = 返回 总共bug 序列
#ylist2 = 返回 已经修改的bug序列
def get_list_data(days, xlist, ylist, ylist2):
    i = days # 30天 统计
    j = 0
    time = datetime.datetime.now()
    time = time_get_day(time.year,time.month, time.day)
    time = time + datetime.timedelta(days = -i)
    while j < i - 1:
        time = time + datetime.timedelta(days=1)
        j += 1
        if day_is_weekday(time) == 0 :
            #print(str(time) + "----" + str(day_is_weekday(time)) + "----"+ str(count_before_time(ulist, time, 7)))
            #xlist.append(datetime.datetime.strftime(time, '%Y-%m-%d'))
            xlist.append(time)
            ylist.append(count_before_time(ulist, time, 7))
            ylist2.append(count_before_time(ulist, time, 11))

#绘图
#xlist = 时间  序列
#ylist = 总bug数 序列
#ylist2 = 已经解决的bug数量 序列
def draw_picture(xlist, ylist,ylist2):
    np.set_printoptions(suppress=True)
    mpl.rcParams['font.sans-serif']=['SimHei'] #指定默认字体 SimHei为黑体
    mpl.rcParams['axes.unicode_minus']=False #用来正常显示负

    y = ylist #总bug数
    y2 = ylist2 #解决了的bug数
    
    xx = xlist
    plt.figure(figsize=(13,5))
    plt.plot(xlist,ylist,color='red',linewidth=1, marker='x',label = '累计bug数,单位(个)')   #在当前绘图对象绘图（X轴，Y轴，蓝色虚线，线宽度）
    plt.plot(xlist,ylist2,color='green',linewidth=1, marker='x',label = '累计解决bug数,单位(个)')   #在当前绘图对象绘图（X轴，Y轴，蓝色虚线，线宽度）  
     
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))  #设置x轴主刻度显示格式（日期）
    plt.grid(True)
    plt.legend(loc='lower right') #图例位置右下角
    plt.xlabel('日期', color = 'r')
    plt.ylabel('个数', color = 'r')
    plt.axis('tight')
    plt.title('GIS内核与应用组迭代统计图')
    # 设置数字标签
    for a, b in zip(xx, y):
        plt.text(a, b, b, ha='center', va='bottom',color ='green', fontsize=12)
     
    # 设置数字标签
    for a, b in zip(xx, y2):
        plt.text(a, b, b, ha='center', va='bottom',color ='blue', fontsize=12)
        
    plt.show()

    
#程序从这里开始。
url_host = "http://192.168.100.98:8089"
account = "wuyongbo"
password = "test.1234"
headers_base = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'http://192.168.100.98:8089/zentaopms/www/user-login.html',
}
#进行登录，将账户信息替换成你的用户名和密码即可
session = login(url_host, account, password, headers_base)
content = getBugs(url_host, session, headers_base)


ulist =[]
get_list_from_htmlsource(ulist, content)
#0 = bug id
#7 = creat_time
#11 = solve_time

ulist = list_data_normalization(ulist)


xlist = []
ylist = []
ylist2 = []

#需要查询的自然日数量。20
get_list_data(30, xlist, ylist, ylist2)

print("time---总共bug数量----已经修改的bug数量")

for i in range(len(xlist)):
    str1 = datetime.datetime.strftime(xlist[i], '%Y-%m-%d') + "----"+ str(ylist[i]) + "----" + str(ylist2[i])
    print(str1)
    
draw_picture(xlist, ylist, ylist2)

#目前存在的问题有
#1：day_is_weekday函数中节假日和工作日是否包括完全？
#2：getBugs 函数中 目前是强行获取2000个每页，当然后期如果超过2000个bug的时候 这个地方还需要完善。需要考虑分页功能【当总bug数量超过2000个的时候可以再考虑完善】
#
#
#
#
#
#