需求 : 导出 gitlab 某个项目的所有issuse ,
分析: gitlab 并不具备 导出所有问题列表的能力,提供的API 也不友好, 遂自己用python爬取所有问题列表,具体源码如下:
import requests
import json
import time
#import itchat
import random
import _thread
import os
from urllib.parse import urlencode
from bs4 import BeautifulSoup
import ssl
#修改此处为自己登陆的Cookie
localCookie = "sidebar_collapsed=false; _gitlab_session=e0cff180434843a933739a0427763816; event_filter=push"
#修改此处为自己保存输出文件的目录
outputdir = "D:/yuxl/"
ssl._create_default_https_context = ssl._create_unverified_context
def parseContentList(content):
soup = BeautifulSoup(content, 'html.parser')
trs = soup.find_all(class_="issue-title-text")
listret = []
for tr in trs:
listr = []
str1 = tr.text
sps = str1.split("\n")
nodes = tr.find_all("a")
listr.append(sps[1])
for node in nodes:
if node["href"] and len(node["href"]) > 20:
strids = str(node["href"]).split('/')
c = len(strids)-1
strtmp = strids[c]
listr.append(strtmp)
listret.append(listr)
return listret
def downLoad(filename, paraMac):
totoname = 0
global localCookie
headers = {
"Host": paraMac['host'],
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Cookie": localCookie,
#"Host": "192.168.31.238:10088",
"If-None-Match": "W/\"e92673383cfe9f2384cf6e65329603e2\"",
#"Referer": "http://192.168.31.238:10088/jing/kernel_2018_05/issues/936",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36",
}
session = requests.session()
strurl = ""
for val in paraMac.keys():
strurl+= paraMac[val]
strurl+="/"
strurl = strurl[0:len(strurl) - 1]
#组织路径
global outputdir
txtpath = outputdir + filename + str(time.strftime("%Y_%m_%d", time.localtime()) ) + ".txt"
f = open(txtpath, "w", encoding="utf-8")
for var in range(1,1000):
surltmp = strurl.replace("{chijing}",str(var));
content = session.get(surltmp, params=paraMac, headers=headers)
content = content.text
listret = parseContentList(content)
if len(listret) < 1:
f.close()
return
#print(listret)
for listr in listret:
f.write(listr[1])
f.write(":")
f.write(listr[0])
f.write("\t\n")
f.flush()
f.close()
def start():
#修改此处可以下载其他项目所有问题
paraKernel = {
"http":"http:/",
"host":"192.168.31.238:10088",#服务地址端口
"username": "jing",#项目创建人的名字
"projectname": "kernel_2018_05",#项目名称
"issues": "issues",#问题
"other": "?scope=all&utf8=%E2%9C%93&state=all&page={chijing}",#分页参数
}
mydict = {
"内核":paraKernel,
}
for key in mydict.keys():
downLoad(key, mydict[key])
#1: downLoad函数中的cookie
#2:所有涉及到路径的地方,所有的路径都保证路径存在
start()