一、编写了Go脚本爬取jenkins插件网上的插件
脚本备注:
1、脚本默认是爬取插件的近10个版本(后期使用发现还是有很多插件版本不全)
修改方式:subdirectory_analysis方法里的第一个入参就是限定下载的版本数,-1则表示不限制
2、插件下载目录修改可自定义
3、Jenkins的插件网有反爬虫机制,会拒绝访问,脚本因此中断后可重新执行下载过的插件不会重复下载
二、源码分享
1、源码展示
package main
import (
"compress/gzip"
"fmt"
_ "fmt"
"io"
"io/ioutil"
"net/http"
"os"
"path"
"regexp"
"strings"
"sync"
"time"
)
type first_url struct {
root_url string
root_dir string
url_path1 []string
url_path2 []string
url_path3 []string
}
var down_paral chan string
var Ndown_paral chan string
var wg sync.WaitGroup
var nwg sync.WaitGroup
func http_get(request_url string) string {
resp, err := http.Get(request_url)
if err != nil {
fmt.Printf("%v 网页访问失败;失败信息:%v", request_url, err)
return "Nil"
}
//判断网页是否经过压缩读取body
ContentType := strings.ToLower(resp.Header.Get("Content-Encoding"))
if strings.Contains(ContentType, "gzip") {
readers, readers_err := gzip.NewReader(resp.Body)
if readers_err != nil {
fmt.Printf("网页%v解压失败,失败信息:%v", request_url, readers_err)
return "Nil"
}
defer readers.Close()
//fmt.Println("Respon result:", readers)
}
body, body_err := ioutil.ReadAll(resp.Body)
var body_result string
if body_err != nil {
fmt.Printf("读取网页主体失败,失败信息:%v", body_err)
return "Nil"
} else {
body_result = string(body)
}
defer resp.Body.Close()
return body_result
}
func analysis_title(limit_point int, root_dir *string, url_path *[]string, url_addr *string, body_result string) {
lines := strings.Split(body_result, "\n")
re := regexp.MustCompile("title=\"([^\"]*)\"")
var line_row []string
for _, line_row_temp := range lines {
if strings.Contains(line_row_temp, "title") && !strings.Contains(line_row_temp, "镜像") {
line_row = append(line_row, line_row_temp)
}
}
if limit_point > 0 && len(line_row) > limit_point {
line_row = line_row[len(line_row)-20 : len(line_row)]
}
//fmt.Println(line_row)
for _, line := range line_row {
//fmt.Println("网页地址:", url.root_url+re_retry.FindString(re.FindString(line)))
if strings.Contains(line, "hpi") {
url_path_temp := fmt.Sprintln((*url_addr) + strings.Replace(strings.Replace(re.FindString(line), "title=", "", 1), "\"", "", 2))
*(url_path) = append((*url_path), url_path_temp)
sys_subdir := strings.Split(url_path_temp, "/")
mkdir_dir("E:/" + strings.Join(sys_subdir[3:len(sys_subdir)-1], "/"))
down_hpi(strings.Trim(url_path_temp, "\n"), "E:/"+strings.Join(sys_subdir[3:len(sys_subdir)], "/"))
time.Sleep(30 * time.Second)
} else {
url_path_temp := fmt.Sprintln((*url_addr) + strings.Replace(strings.Replace(re.FindString(line), "title=", "", 1), "\"", "", 2) + "/")
check_file_exists := path.Dir(strings.Replace(url_path_temp, "https://mirrors.tuna.tsinghua.edu.cn", strings.TrimSpace("E:"), 1))
if _, check_err := os.Stat(check_file_exists); !os.IsNotExist(check_err) {
fmt.Printf("%v 目录已存在,跳过此目录\n", check_file_exists)
continue
}
//fmt.Println(check_file_exists)
*(url_path) = append((*url_path), url_path_temp)
}
//fmt.Println("analysis_title====", url_path_temp)
}
}
func subdirectory_analysis(limit_point int, waitGo *sync.WaitGroup, down_paral chan string, root_dir *string, url_path2 *[]string) {
for {
url_path1_result, ok := <-down_paral
if !ok {
//fmt.Printf("%v 队列读取失败 退出线程\n", wg)
break
}
//fmt.Println("sss", url_path1_result)
request_path1_url := strings.Trim(url_path1_result, "\n")
respon := http_get(request_path1_url)
if respon == "Nil" {
fmt.Printf("网页%v 访问失败", url_path1_result)
continue
}
analysis_title(limit_point, root_dir, url_path2, &request_path1_url, respon)
}
defer waitGo.Done()
}
func mkdir_dir(dir string) {
_, dir_err := os.Stat(dir)
if os.IsNotExist(dir_err) {
//fmt.Printf("%v 文件目录不存在,自动创建该文件\n", dir)
os.MkdirAll(dir, 0777)
}
}
func down_hpi(url string, file_path string) {
fileName := path.Join(path.Dir(strings.TrimSpace(file_path)), path.Base(strings.TrimSpace(file_path)))
_, file_stat := os.Stat(fileName)
if file_stat == nil {
fmt.Printf("%v 文件已存在\n", fileName)
return
}
repsonse, resp_err := http.Get(url)
if resp_err != nil {
fmt.Printf("%v 网页访问失败,报错信息:%v", url, resp_err)
}
defer repsonse.Body.Close()
file, file_err := os.Create(fileName)
if file_err != nil {
fmt.Printf("%v 文件创建失败,失败信息%v\n", file, file_err)
}
defer file.Close()
_, copy_err := io.Copy(file, repsonse.Body)
if copy_err != nil {
fmt.Println("Failed to download file:", copy_err)
return
}
fmt.Printf("%v downloaded successfully.", file_path)
}
func main() {
down_paral = make(chan string, 10)
Ndown_paral = make(chan string, 20)
wg.Add(10)
nwg.Add(20)
url := &first_url{
root_url: "https://mirrors.tuna.tsinghua.edu.cn/jenkins/updates",
url_path1: []string{},
root_dir: "E:/jenkins/plugins",
}
mkdir_dir(url.root_dir)
var body_result string = http_get(url.root_url)
analysis_title(-1, &url.root_dir, &url.url_path1, &url.root_url, body_result)
go func() {
for _, val := range url.url_path1 {
down_paral <- val
}
defer close(down_paral)
}()
for i := 1; i <= 10; i++ {
go subdirectory_analysis(20, &wg, down_paral, &url.root_dir, &url.url_path2)
}
wg.Wait()
go func() {
for _, val := range url.url_path2 {
Ndown_paral <- val
}
defer close(Ndown_paral)
}()
for i := 1; i <= 20; i++ {
go subdirectory_analysis(20, &nwg, Ndown_paral, &url.root_dir, &url.url_path3)
}
nwg.Wait()
//down_hpi("https://mirrors.tuna.tsinghua.edu.cn/jenkins/updates/update-center.json", "E:/jenkins/updates/update-center.json")
}
三 Jenkins插件网搭建
1、使用httpd服务搭建网站
2、将插件上传到本地目录
3、在httpd服务目录下创建软连接,且重启服务
四、配置jenkins插件仓库地址
1、进到工作目录
我是官网下载war包直接启动jenkins方式进行搭建的,该方式jenkins默认工作目录是启动用户家目录的.jenkins目录下
2、修改jenkins默认插件下载地址
进到/root/.jenkins/update目录下修改default.json文件,将默认插件下载地址改成自己的私有插件库地址,sed 后面马赛克部分是替换成你搭建的私有插件库的地址,示例如下: