1、cas登录Jenkins
# -*- coding: utf-8 -*-
# @Time : 2023/5/23
# @Author : Pnny
# @desc : cas认证
import time
import requests
import os
import pickle
from bs4 import BeautifulSoup
class CasService(object):
def __init__(self):
self.session = requests.session() # cas session
# 当前Jenkins登录采用CAS统一认证方式
def login(self, service_url, user_name, pass_word, write_path, fileName):
# 请求Jenkins通过cas重定向后的第一个url
response = self.session.get(url=service_url, allow_redirects=False)
# print(f"{service_url}:{response.status_code}")
if response.status_code == 200:
return True
# 从location中获取到下一个需要请求的url
cas_url = response.headers["Location"]
# print(cas_url)
# 发起请求,如果报403,请检查服务器上hosts文件中cas的域名映射是否正常,cas的ip地址在浏览器中可获取
cas_response = self.session.get(cas_url, allow_redirects=False)
# print(f"{cas_url}:{cas_response.status_code}")
if cas_response.status_code == 200:
# 通过bs4获取登录页面信息
login_html = BeautifulSoup(cas_response.text, 'lxml')
# 通过select方法拿到execution信息,该信息在登录认证时需要
execution_value = login_html.select('#fm1 > input[name=execution]')[0]['value']
# 组装认证信息
auth_data = {
"_eventId": "submit",
"execution": execution_value,
"username": user_name,
"password": pass_word,
}
# 发起认证请求
auth_response = self.session.post(cas_url, data=auth_data, allow_redirects=False)
# print(f"{cas_url}:{auth_response.status_code}")
# 认证请求成后依然返回302,然后重定向回Jenkins的地址
if auth_response.status_code == 302:
# 拿到Jenkins的url
url_with_ticket = auth_response.headers["location"]
# 访问最终的url
confirm_response = self.session.get(url=url_with_ticket, allow_redirects=True)
# print(f"{url_with_ticket}:{confirm_response.status_code}")
if confirm_response.status_code == 200:
info_log("login Jenkins success !!! ")
# 将登录成功后的cookie写入文件
self.write_cas_cookies_to_file(write_path, fileName)
else:
error_log("login in Jenkins failed !!! ")
else:
error_log(f"auth failed,url:{cas_url}")
# cookie写入文件
def write_cas_cookies_to_file(self, write_path, fileName):
# 如果cookie文件存在,删除
if os.path.exists(f"{write_path}{fileName}"):
os.remove(f"{write_path}{fileName}")
# 重新写入cookie文件
with open(f"{write_path}{fileName}", 'wb') as f:
# print(self.session.cookies)
pickle.dump(self.session.cookies, f)
def info_log(msg):
print(f"\033[0;32;40m{now_to_date()}:[INFO]:{msg}\033[0m")
def error_log(msg):
print(f"\033[0;31;40m{now_to_date()}:[ERROR]:{msg}\033[0m")
def now_to_date(format_string="%Y-%m-%d %H:%M:%S"):
time_stamp = int(time.time())
time_array = time.localtime(time_stamp)
str_date = time.strftime(format_string, time_array)
return str_date
# # 读取cookie文件
# def load_cas_cookies_from_file(self):
# if os.path.exists("cas_cookies.dat"):
# with open("cas_cookies.dat", 'rb') as f:
# self.session.cookies.update(pickle.load(f))
# if __name__ == '__main__':
# casService = CasService()
# url = "https://jenkins.xxx.com/securityRealm/commenceLogin?from=%2F"
# userName = "xxx"
# password = "xxx"
# write_path = "/home/xxx/pytest/JKProjects/"
# casService.login(url, userName, password, write_path)
# casService.write_cas_cookies_to_file()
2、爬取Jenkins上所需信息,并写入文件中
# -*- coding: utf-8 -*-
# @Time : 2023/5/11
# @Author : pengyong
# @desc : 爬取Jenkins目录结构
import pickle
import time
import requests
import os
import shutil
from bs4 import BeautifulSoup
from cas_login import CasService
class Crawler(object):
def __init__(self, url, path, fileName):
self.cookie = None
self.response = None
self.first_url = url
self.write_path = path
self.class_name_table = "jenkins-table sortable"
self.class_name_pane = "sortable pane bigtable"
self.class_name_td = "jenkins-table__link model-link inside"
self.project_index = "project.index"
self.owner_flag = "责任人"
self.session = requests.session()
self.cookie_file_name = fileName
self.description_file = "descriptions/"
self.build_result_file = "lastBuildResult/"
self.build_message = "allLastBuildMessage"
self.headers = {
"Accept": "text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8",
"Accept-Language": "zh_CN",
"Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18363",
}
# 读取cookie
def load_cas_cookies_from_file(self):
if os.path.exists(f"{write_path}{self.cookie_file_name}"):
with open(f"{write_path}{self.cookie_file_name}", 'rb') as f:
self.session.cookies.update(pickle.load(f))
# 开始爬取Jenkins
def get_views(self):
# 登录首页,拿到响应结果
response = self.session.post(self.first_url, headers=self.headers)
if response.status_code != 200:
error_log(f"error: login failed!!!")
return
html_doc = response.text
soup = BeautifulSoup(html_doc, "html.parser")
views = soup.find_all("div", class_="tab")
# print(views)
# 写入文件前清理历史文件
delete_when_is_exist(self.write_path, self.project_index)
# 查看上次构建结果目录是否存在,存在则清理
if os.path.exists(self.write_path + self.build_result_file):
os.chdir(self.write_path)
shutil.rmtree(self.build_result_file)
# 遍历查找结果
for view in views:
link = view.find("a")
info_log("版本: " + link["href"])
# print(link["href"], link.get_text())
if link["href"] == "/view/all/":
# 如果view是“所有”,则不获取分类的数据
continue
# print(f"project:{link.get_text()}")
# 项目列表写入文件
writeFile(self.write_path, self.project_index, link.get_text())
# 将一级目录写入指定目录的文件中(1个项目的信息存储在一个文件中)
delete_when_is_exist(self.write_path, link.get_text())
writeFile(self.write_path, link.get_text(), f"project:{link.get_text()}")
# 拼接view的url
url = self.first_url + link["href"]
# 发起请求
response = self.session.get(url)
html_doc = response.text
soup = BeautifulSoup(html_doc, "html.parser")
table_list = soup.find("table", id="projectstatus", class_=self.class_name_pane)
# print(table_list)
# 遍历各项目页签下的第一级列表
if table_list is None:
# 一级目录不是目录时,直接获取job相关信息并保存
tds = soup.findAll("a", class_=self.class_name_td)
for td in tds:
if td is not None:
print("工程:" + td.get_text())
tds1 = soup.find("tr", id=f"job_{td.get_text()}").findAll("td")
job_url = td["href"]
# 将二级目录为job的信息写入文件中:父目录 job名称 job的url
writeFile(self.write_path, link.get_text(),
f"job:{link.get_text()} {td.get_text()} {url + job_url}")
# description = tds1[3].contents[0]
description_td = tds1[3]
if len(description_td.contents) != 0:
description = description_td.contents[0]
else:
description = ""
# 将job的描述信息写入文件,因为job的描述信息无固定规则,所以单独写入一个文件
writeDescriptionToFile(self, td, description)
# 构建结果写入文件
# getTheLastBuildResults(self, url + job_url, td)
else:
# 一级目录是文件时,继续遍历
cells = table_list.findAll("a")
for cell in cells:
link_url = cell["href"]
link_text = cell.get_text()
if link_url == "#":
# 排除不需要的列
continue
print("目录: " + link_text)
# 拼接一级目录的url
url = self.first_url + link_url
# 点击一级目录,进入到二级目录中
response = self.session.get(url)
html_doc = response.text
soup = BeautifulSoup(html_doc, "html.parser")
# 将一级目录为文件的信息写入文件中
view_description = soup.find("div", id="description").find("div")
# print(view_description.text)
# 写入二级目录的描述信息到指定文件
delete_when_is_exist(self.write_path + self.description_file, link_text)
writeFile(self.write_path + self.description_file, link_text, view_description.text)
tds = soup.findAll("a", class_=self.class_name_td)
if tds is None:
continue
# 遍历二级目录中的任务信息
count = 0
for td in tds:
count = count + 1
if td is not None:
print("工程:" + td.get_text())
tds1 = soup.find("tr", id=f"job_{td.get_text()}").findAll("td")
job_url = td["href"]
# 将二级目录为job的信息写入文件中:父目录 job名称 job的url
writeFile(self.write_path, link.get_text(),
f"job:{link_text} {td.get_text()} {url + job_url}")
# description = tds1[3].contents[0]
description_td = tds1[3]
if len(description_td.contents) != 0:
description = description_td.contents[0]
else:
description = ""
# 将job的描述信息写入文件,因为job的描述信息无固定规则,所以单独写入一个文件
writeDescriptionToFile(self, td, description)
# 构建结果写入文件
# getTheLastBuildResults(self, url + job_url, td)
# 写入二级目录的数量信息到项目文件中
writeFile(self.write_path, link.get_text(), f"path:{link_text} {count}")
# 将job的描述信息写入文件,因为job的描述信息无固定规则,所以单独写入一个文件
def writeDescriptionToFile(self, td, description):
# 判断是否已经存在该目录
if not os.path.exists(self.write_path + self.description_file):
# 目录不存在,进行创建操作,使用os.makedirs()方法创建多层目录
os.makedirs(self.write_path + self.description_file)
delete_when_is_exist(self.write_path + self.description_file, td.get_text())
writeFile(self.write_path + self.description_file, td.get_text(), description)
# 最近一次构建结果写入文件
def getTheLastBuildResults(self, url, td):
# 进入job任务详情,获取最近一次的构建结果信息
job_detail_response = self.session.get(url)
job_html_doc = job_detail_response.text
# print(job_html_doc)
job_soup = BeautifulSoup(job_html_doc, "html.parser")
build_history = job_soup.find("a", class_="build-status-link")
print(build_history)
if build_history is not None:
last_build_status = build_history.findAll("use")[0]["href"].split("#")[1]
if last_build_status is None:
last_build_result = ""
elif last_build_status == "build-status-in-progress":
last_build_result = "last-progress"
else:
last_build_result = build_history.findAll("use")[1]["href"].split("#")[1]
last_build_number = job_soup.find("a", class_="model-link inside build-link display-name").text
last_build_time = job_soup.find("a", class_="model-link inside build-link").text
last_build_message = td.get_text() + " " + last_build_number + " " + last_build_result + " " + last_build_time
# print(last_build_message)
if not os.path.exists(self.write_path + self.build_result_file):
# 使用os.makedirs()方法创建多层目录
os.makedirs(self.write_path + self.build_result_file)
writeFile(self.write_path + self.build_result_file, self.build_message, last_build_message)
# Jenkins信息写入文件
def writeFile(file_path, file_name, content):
# print(f"{file_path}{file_name}")
with open(f"{file_path}{file_name}", 'a+', encoding='utf-8') as f:
f.write(f"{content}\n")
# 相同文件存在时先删除
def delete_when_is_exist(file_path, file_name):
if os.path.exists(f"{file_path}{file_name}"):
os.remove(f"{file_path}{file_name}")
def info_log(msg):
print(f"\033[0;32;40m{now_to_date()}:[INFO]:{msg}\033[0m")
def error_log(msg):
print(f"\033[0;31;40m{now_to_date()}:[ERROR]:{msg}\033[0m")
def now_to_date(format_string="%Y-%m-%d %H:%M:%S"):
time_stamp = int(time.time())
time_array = time.localtime(time_stamp)
str_date = time.strftime(format_string, time_array)
return str_date
if __name__ == '__main__':
info_log("***Start crawling,please wait......***")
run_time_start = time.time()
firstUrl = "https://jenkins.xxx.com"
loginUrl = "https://jenkins.xxx.com/securityRealm/commenceLogin?from=%2F"
userName = "xxx"
password = "xxx"
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
write_path = parent_dir + "/JKProjects/"
cookie_file_name = "cas_cookies.dat"
# 初始化登录
casService = CasService()
# 调用登录函数
casService.login(loginUrl, userName, password, write_path, cookie_file_name)
# 初始化获取Jenkins信息脚本
craw = Crawler(firstUrl, write_path, cookie_file_name)
# 加载cookie
craw.load_cas_cookies_from_file()
# 开始爬取信息
craw.get_views()
run_time_end = time.time()
run_time = (run_time_end - run_time_start)
info_log("***End of crawling***")
info_log(f"running time is:{run_time} s")
写入文件效果:
3、Java中定时触发爬取;并根据写入文件的数据格式,通过命令获取文件中的所需信息,然后入库
package com.sics.testplatform.common;
import com.sics.testplatform.controller.build.analysis.IBuildAnalysis;
import com.sics.testplatform.controller.build.broadcast.IBuildBroadcast;
import com.sics.testplatform.mapper.build.analysis.ScheduledMapper;
import com.sics.testplatform.service.run.RunRecordsServiceImpl;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.SchedulingConfigurer;
import org.springframework.scheduling.config.ScheduledTaskRegistrar;
import org.springframework.scheduling.support.CronTrigger;
import org.springframework.stereotype.Component;
/**
* Jenkins同步 -定时任务
*
* @author pengyong
* @create 2023/05/24
*/
@Component
@EnableScheduling
@Slf4j
public class CronTaskConfig implements SchedulingConfigurer {
@Autowired
ScheduledMapper scheduledMapper;
@Autowired
IBuildAnalysis iBuildAnalysis;
@Autowired
IBuildBroadcast iBuildBroadcast;
@Autowired
RunRecordsServiceImpl runRecordsService;
@Override
public void configureTasks(ScheduledTaskRegistrar scheduledTaskRegistrar) {
scheduledTaskRegistrar.addTriggerTask(this::process,
triggerContext -> {
String cron = scheduledMapper.selectCronById("2");
if (cron == null || cron.isEmpty() ){
log.error("数据库:SCHEDULED表中定时任务配置信息不存在");
return null;
} else {
return new CronTrigger(cron).nextExecutionTime(triggerContext);
}
});
}
private void process() {
log.info("******* process1 : cron task is running *******");
iBuildAnalysis.syncJenkinsAllDirectory();
}
}
/**
* 同步Jenkins中目录结构,并入库
* 目录结果由爬虫脚本爬取后存到指定目录,同步时执行如下步骤:
* 1、调用python爬虫脚本,爬取Jenkins目录结构
* 2、执行命令,获取目录结果文件内容,并返回给java程序
* 3、java程序拿到结果后,进行入库
*
* @return 同步结果
*/
@Override
public HttpFormatResult syncJenkinsAll() {
if (isJenkinsSync) {
return new HttpFormatResult(HttpReturnMsg.FAIL_CODE, "同步任务正在执行中或其他人正在执行该任务,请稍后重试");
}
isJenkinsSync = true;
//开启线程池去执行
ExecutorService service = Executors.newSingleThreadExecutor();
service.execute(() -> {
log.info("****** 开始同步Jenkins信息 *****");
try {
// 执行前清理历史数据
YTPUtils.run("cd " + BuildConstant.BASE_PATH + "; rm -rf " + BuildConstant.SYNC_RESULT_PATH);
YTPUtils.run("cd " + BuildConstant.BASE_PATH + "; mkdir -p " + BuildConstant.SYNC_RESULT_PATH + BuildConstant.DESCRIPTION_RESULT_PATH);
//执行爬虫脚本
YTPUtils.execute("cd " + BuildConstant.BASE_PATH + BuildConstant.SCRIPT_FILE + "; "
+ BuildConstant.PYTHON3_CMD + BuildConstant.SCRIPT_NAME);
//获取爬虫结果并返回
String getJenkinsMsg = BuildConstant.CAT_CMD + BuildConstant.BASE_PATH + BuildConstant.SYNC_RESULT_PATH + BuildConstant.PROJECT_INDEX;
List<String> projectList = YTPUtils.executeCMD(getJenkinsMsg);
// log.info(String.valueOf(projectList));
//一级目录
firstDirStorage(projectList);
//二级目录,如果二级为任务,在任务信息方法中处理
secondDirOrJobStorage(projectList);
//任务信息列表
jobMessageStorage(projectList);
isJenkinsSync = false;
} catch (Exception e) {
isJenkinsSync = false;
throw new RuntimeException(e.getMessage());
}
});
}
执行shell方法
public static String run(String command) {
Scanner input = null;
StringBuilder result = new StringBuilder();
Process process = null;
List<String> commandArr = new ArrayList<>();
commandArr.add("/bin/sh");
commandArr.add("-c");
commandArr.add(command);
log.info(">>> cmd: " + command);
try {
process = Runtime.getRuntime().exec(commandArr.toArray(new String[commandArr.size()]));
process.waitFor(10, TimeUnit.SECONDS);
InputStream is = process.getInputStream();
input = new Scanner(is);
while (input.hasNextLine()) {
String line = input.nextLine() + "\n";
log.info(">>> line:" + line);
result.append(line);
}
} catch (Exception e) {
log.error("error:" + e.getMessage());
e.printStackTrace();
} finally {
if (input != null) {
input.close();
}
if (process != null) {
process.destroy();
}
}
return result.toString().trim();
}