python同步gitlab數據至MySQL代碼

19 篇文章 0 订阅

python同步gitlab數據至MySQL代碼

gitlab接口:https://docs.gitlab.com/ee/api/projects.html

1.對接接口

由於某些project的數據量過大,直接採集,會報錯:

File “C:\Users\SARAH.X\Documents\env\Lib\site-packages\urllib3\util\retry.py”,
line 515, in increment raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type] urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host=‘gitlab.sjfood.us’, port=443): Max retries exceeded with url:/api/v4/projects/638/vulnerability_findings?&per_page=100&page=4
(Caused by ReadTimeoutError(“HTTPSConnectionPool(host=‘gitlab.sjfood.us’,port=443): Read timed out. (read timeout=15)”)) During handling of the above exception, another exception occurred: Traceback (most recent call last): File"C:\Users\SARAH.X\PycharmProjects\pythonProject\gitlab_scan\server.py", line 53, in
response = util.project_vulnerability_findings(projectId, page) File"C:\Users\SARAH.X\PycharmProjects\pythonProject\gitlab_scan\util.py",
line 33, in project_vulnerability_findings response = s.request(“GET”, url=url, timeout=15, headers=get_header(), verify=False)

util.py

import time
import requests
from requests.adapters import HTTPAdapter
import urllib3


def get_header():
    access_token = "qcwssZuXdB_kHoaBpyNa"
    headers = {
        "Private-Token": access_token,
        'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
        'Accept': '*/*',
        'Host': 'gitlab.sjfood.us',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive'
    }
    return headers


# 獲取所有project的id
def all_project_id(page):
    url = f"https://gitlab.sjfood.us/api/v4/projects?&per_page=100&page={page}"
    response = requests.get(url, headers=get_header(), verify=False)
    return response


# 獲取特定project的漏洞调查结果
def project_vulnerability_findings(projectId, page):
    # proxies = {'https': '127.0.0.1:10880'}
    requests.adapters.DEFAULT_RETRIES = 5
    url = f"http://gitlab.sjfood.us/api/v4/projects/{projectId}/vulnerability_findings?&per_page=100&page={page}"
    s = requests.session()
    # max_retries=3 重试3次
    s.mount('http://', HTTPAdapter(max_retries=3))
    s.mount('https://', HTTPAdapter(max_retries=3))
    #response = s.request("GET", url=url, timeout=30, headers=get_header(), verify=False)

    while True:
        try:
            response = s.request("GET", url=url, timeout=30, headers=get_header(), verify=False)
            if response.status_code == 200:
                return response
            else:
                return 'error'
        except:
            time.sleep(5)
            pass

2.接口調用

server.py

import time
import util
import mysql_to_write
import urllib3

if __name__ == '__main__':
    urllib3.disable_warnings()
    # 獲取所有項目ID
    page = 1
    print("page:",page)
    response = util.all_project_id(page)
    data = response.json()
    project_ids = []
    for project in data:
        # print(project)
        # mysql_to_write.add_project_record(project)
        project_ids.append(project['id'])
    header = response.headers
    while header['X-Next-Page'] != '':
        page = header['X-Next-Page']
        # print(page)
        response = util.all_project_id(page)
        data = response.json()
        header = response.headers
        for project in data:
            # mysql_to_write.add_project_record(project)
            project_ids.append(project['id'])
        # print(len(project_ids))
    print("project长度:", len(project_ids))
    print("project数据:", project_ids)
    # print(project_ids)

    to_remove = [638, 546, 305, 253, 175, 152, 83]

    # 逐个移除元素
    for num in to_remove:
        if num in project_ids:
            project_ids.remove(num)
    print("去除後的project长度", len(project_ids))

    correct_project_ids = []
    # project_ids = ['1134']
    for projectId in project_ids:
        page = 1
        project_vulnerability_ids = []
        response = util.project_vulnerability_findings(projectId, page)
        # print("response:", response)
        if response != 'error' and len(response.json()) > 0:
            print(projectId)
            correct_project_ids.append(projectId)
            data = response.json()
            for project in data:
                # print(project)
                # print(project['uuid'])
                # mysql_to_write.add_project_vulnerability_findings(project)
                project_vulnerability_ids.append(project['uuid'])
            header = response.headers
            # print(header)
            while header['X-Next-Page'] != '':
                page = header['X-Next-Page']
                # print('page:',page)
                response = util.project_vulnerability_findings(projectId, page)
                data = response.json()
                header = response.headers
                for project in data:
                    # mysql_to_write.add_project_vulnerability_findings(project)
                    project_vulnerability_ids.append(project['uuid'])
                print("本次uuid长度:", len(project_vulnerability_ids))
            print("projectId:", projectId, ",数组长度:", len(response.json()), "uuid长度:",len(project_vulnerability_ids))
    # print("correct_project_ids", correct_project_ids)
    # time.sleep(3)

3、拆分project

project in [638, 546, 305, 253, 175, 152, 83 ]

import time
import util
import mysql_to_write
import urllib3

if __name__ == '__main__':
    urllib3.disable_warnings()
    correct_project_ids = []
    project_ids = ['83']
    for projectId in project_ids:
        page = 1
        project_vulnerability_ids = []
        response = util.project_vulnerability_findings(projectId, page)
        # print("response:", response)
        if response != 'error' and len(response.json()) > 0:
            print(projectId)
            correct_project_ids.append(projectId)
            data = response.json()
            for project in data:
                # print(project)
                # print(project['uuid'])
                mysql_to_write.add_project_vulnerability_findings(project)
                project_vulnerability_ids.append(project['uuid'])
            header = response.headers
            # print(header)
            while header['X-Next-Page'] != '':
                page = header['X-Next-Page']
                # print('page:',page)
                response = util.project_vulnerability_findings(projectId, page)
                data = response.json()
                header = response.headers
                for project in data:
                    mysql_to_write.add_project_vulnerability_findings(project)
                    project_vulnerability_ids.append(project['uuid'])
                print("本次uuid长度:", len(project_vulnerability_ids))
            print("projectId:", projectId, ",数组长度:", len(response.json()), "uuid长度:",len(project_vulnerability_ids))
    # print("correct_project_ids", correct_project_ids)
    # time.sleep(3)

4.MYSQL建表
model.py

from sqlalchemy import Column, String, JSON, Integer, DateTime, Boolean,Text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

Base = declarative_base()

users = 'root'
password = 'mivbAs7Awc'
host = 'xxxx'
port = '3306'
database = 'gitlab'
charset = 'utf8mb4'

sqlalchemy_database_url = f'mysql+pymysql://{users}:{password}@{host}:{port}/{database}?charset={charset}'
engine = create_engine(sqlalchemy_database_url)

# 建表語句
class ProjectVulnerabilityFindings(Base):
    __tablename__ = "new_project_vulnerability_findings"
    id = Column(String(50))
    report_type = Column(String(100))
    name = Column(String(500))
    severity = Column(String(50))
    confidence = Column(String(50))
    scanner = Column(JSON)
    identifiers = Column(JSON)
    project_fingerprint = Column(String(200))
    uuid = Column(String(50), index=True, primary_key=True)
    create_jira_issue_url = Column(String(500))
    create_vulnerability_feedback_issue_path = Column(String(500))
    create_vulnerability_feedback_merge_request_path = Column(String(500))
    create_vulnerability_feedback_dismissal_path = Column(String(500))
    project = Column(JSON)
    dismissal_feedback = Column(String(500))
    issue_feedback = Column(JSON)
    merge_request_feedback = Column(String(500))
    description = Column(Text)
    links = Column(JSON)
    location = Column(JSON)
    remediations = Column(String(500))
    solution = Column(String(500))
    evidence = Column(String(500))
    request = Column(String(500))
    response = Column(String(500))
    evidence_source = Column(String(500))
    supporting_messages = Column(JSON)
    assets = Column(JSON)
    details = Column(JSON)
    state = Column(String(500))
    scan = Column(JSON)
    blob_path = Column(String(500))

class ProjectInfo(Base):
    __tablename__ = "projects"
    id = Column(Integer, index=True, primary_key=True)
    description = Column(Text)
    name = Column(String(300))
    name_with_namespace = Column(String(300))
    path = Column(String(300))
    path_with_namespace = Column(String(300))
    created_at = Column(String(300))
    default_branch = Column(String(300))
    tag_list = Column(JSON)
    ssh_url_to_repo = Column(String(100))
    http_url_to_repo = Column(String(100))
    web_url = Column(String(100))
    readme_url = Column(String(500))
    avatar_url = Column(String(300))
    forks_count = Column(Integer)
    star_count = Column(Integer)
    last_activity_at = Column(String(300))
    namespace = Column(JSON)
    container_registry_image_prefix = Column(String(300))
    _links = Column(JSON)
    packages_enabled = Column(Boolean)
    empty_repo = Column(Boolean)
    archived = Column(Boolean)
    visibility = Column(String(300))
    owner = Column(JSON)
    resolve_outdated_diff_discussions = Column(Boolean)
    container_registry_enabled = Column(Boolean)
    container_expiration_policy = Column(JSON)
    issues_enabled = Column(Boolean)
    merge_requests_enabled = Column(Boolean)
    wiki_enabled = Column(Boolean)
    jobs_enabled = Column(Boolean)
    snippets_enabled = Column(Boolean)
    service_desk_enabled = Column(Boolean)
    service_desk_address = Column(String(300))
    can_create_merge_request_in = Column(Boolean)
    issues_access_level = Column(String(300))
    repository_access_level = Column(String(300))
    merge_requests_access_level = Column(String(300))
    forking_access_level = Column(String(300))
    wiki_access_level = Column(String(300))
    builds_access_level = Column(String(300))
    snippets_access_level = Column(String(300))
    pages_access_level = Column(String(300))
    operations_access_level = Column(String(300))
    analytics_access_level = Column(String(300))
    emails_disabled = Column(String(300))
    shared_runners_enabled = Column(Boolean)
    lfs_enabled = Column(Boolean)
    creator_id = Column(Integer)
    import_status = Column(String(300))
    open_issues_count = Column(Integer)
    ci_default_git_depth = Column(Integer)
    ci_forward_deployment_enabled = Column(Boolean)
    public_jobs = Column(Boolean)
    build_timeout = Column(Integer)
    auto_cancel_pending_pipelines = Column(String(300))
    build_coverage_regex = Column(String(300))
    ci_config_path = Column(String(300))
    shared_with_groups = Column(JSON)
    only_allow_merge_if_pipeline_succeeds = Column(Boolean)
    allow_merge_on_skipped_pipeline = Column(String(300))
    restrict_user_defined_variables = Column(Boolean)
    request_access_enabled = Column(Boolean)
    only_allow_merge_if_all_discussions_are_resolved = Column(Boolean)
    remove_source_branch_after_merge = Column(Boolean)
    printing_merge_request_link_enabled = Column(Boolean)
    merge_method = Column(String(300))
    suggestion_commit_message = Column(String(300))
    auto_devops_enabled = Column(Boolean)
    auto_devops_deploy_strategy = Column(String(300))
    autoclose_referenced_issues = Column(Boolean)
    approvals_before_merge = Column(Integer)
    mirror = Column(Boolean)
    external_authorization_classification_label = Column(String(300))
    marked_for_deletion_at = Column(String(300))
    marked_for_deletion_on = Column(String(300))
    requirements_enabled = Column(Boolean)
    security_and_compliance_enabled = Column(Boolean)
    compliance_frameworks = Column(JSON)
    issues_template = Column(Text)
    merge_requests_template = Column(String(500))
    permissions = Column(JSON)

Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()

5.寫入MySQL

mysql_to_write.py

import model

session = model.session


def add_project_vulnerability_findings(info):
    session.query(model.ProjectVulnerabilityFindings).filter(model.ProjectVulnerabilityFindings.uuid == info['uuid']).delete()
    session.commit()

    obj = model.ProjectVulnerabilityFindings(
        id=info['id'],
        report_type=info['report_type'],
        name=info['name'],
        severity=info['severity'],
        confidence=info['confidence'],
        scanner=info['scanner'],
        identifiers=info['identifiers'],
        project_fingerprint=info['project_fingerprint'],
        uuid=info['uuid'],
        create_jira_issue_url=info['create_jira_issue_url'],
        create_vulnerability_feedback_issue_path=info['create_vulnerability_feedback_issue_path'],
        create_vulnerability_feedback_merge_request_path=info['create_vulnerability_feedback_merge_request_path'],
        create_vulnerability_feedback_dismissal_path=info['create_vulnerability_feedback_dismissal_path'],
        project=info['project'],
        dismissal_feedback=info['dismissal_feedback'],
        issue_feedback=info['issue_feedback'],
        merge_request_feedback=info['merge_request_feedback'],
        description=info['description'],
        links=info['links'],
        location=info['location'],
        remediations=info['remediations'],
        solution=info['solution'],
        evidence=info['evidence'],
        request=info['request'],
        response=info['response'],
        evidence_source=info['evidence_source'],
        supporting_messages=info['supporting_messages'],
        assets=info['assets'],
        details=info['details'],
        state=info['state'],
        scan=info['scan'],
        blob_path=info['blob_path']
    )
    session.add(obj)
    session.commit()
    return obj



def add_project_record(info):
    session.query(model.ProjectInfo).filter(model.ProjectInfo.id == info['id']).delete()
    session.commit()
    if 'owner' not in info:
        info['owner'] = None
    if 'default_branch' not in info:
        info['default_branch'] = None
    if 'ci_config_path' not in info:
        info['ci_config_path'] = None
    if 'container_expiration_policy' not in info:
        info['container_expiration_policy'] = None

    obj = model.ProjectInfo(
        id=info['id'],
        description=info['description'],
        name=info['name'],
        name_with_namespace=info['name_with_namespace'],
        path=info['path'],
        path_with_namespace=info['path_with_namespace'],
        created_at=info['created_at'],
        default_branch=info['default_branch'],
        tag_list=info['tag_list'],
        ssh_url_to_repo=info['ssh_url_to_repo'],
        http_url_to_repo=info['http_url_to_repo'],
        web_url=info['web_url'],
        readme_url=info['readme_url'],
        avatar_url=info['avatar_url'],
        forks_count=info['forks_count'],
        star_count=info['star_count'],
        last_activity_at=info['last_activity_at'],
        namespace=info['namespace'],
        container_registry_image_prefix=info['container_registry_image_prefix'],
        _links=info['_links'],
        packages_enabled=info['packages_enabled'],
        empty_repo=info['empty_repo'],
        archived=info['archived'],
        visibility=info['visibility'],
        owner=info['owner'],
        resolve_outdated_diff_discussions=info['resolve_outdated_diff_discussions'],
        container_registry_enabled=info['container_registry_enabled'],
        container_expiration_policy=info['container_expiration_policy'],
        issues_enabled=info['issues_enabled'],
        merge_requests_enabled=info['merge_requests_enabled'],
        wiki_enabled=info['wiki_enabled'],
        jobs_enabled=info['jobs_enabled'],
        snippets_enabled=info['snippets_enabled'],
        service_desk_enabled=info['service_desk_enabled'],
        service_desk_address=info['service_desk_address'],
        can_create_merge_request_in=info['can_create_merge_request_in'],
        issues_access_level=info['issues_access_level'],
        repository_access_level=info['repository_access_level'],
        merge_requests_access_level=info['merge_requests_access_level'],
        forking_access_level=info['forking_access_level'],
        wiki_access_level=info['wiki_access_level'],
        builds_access_level=info['builds_access_level'],
        snippets_access_level=info['snippets_access_level'],
        pages_access_level=info['pages_access_level'],
        operations_access_level=info['operations_access_level'],
        analytics_access_level=info['analytics_access_level'],
        emails_disabled=info['emails_disabled'],
        shared_runners_enabled=info['shared_runners_enabled'],
        lfs_enabled=info['lfs_enabled'],
        creator_id=info['creator_id'],
        import_status=info['import_status'],
        open_issues_count=info['open_issues_count'],
        ci_default_git_depth=info['ci_default_git_depth'],
        ci_forward_deployment_enabled=info['ci_forward_deployment_enabled'],
        public_jobs=info['public_jobs'],
        build_timeout=info['build_timeout'],
        auto_cancel_pending_pipelines=info['auto_cancel_pending_pipelines'],
        build_coverage_regex=info['build_coverage_regex'],
        ci_config_path=info['ci_config_path'],
        shared_with_groups=info['shared_with_groups'],
        only_allow_merge_if_pipeline_succeeds=info['only_allow_merge_if_pipeline_succeeds'],
        allow_merge_on_skipped_pipeline=info['allow_merge_on_skipped_pipeline'],
        restrict_user_defined_variables=info['restrict_user_defined_variables'],
        request_access_enabled=info['request_access_enabled'],
        only_allow_merge_if_all_discussions_are_resolved=info['only_allow_merge_if_all_discussions_are_resolved'],
        remove_source_branch_after_merge=info['remove_source_branch_after_merge'],
        printing_merge_request_link_enabled=info['printing_merge_request_link_enabled'],
        merge_method=info['merge_method'],
        suggestion_commit_message=info['suggestion_commit_message'],
        auto_devops_enabled=info['auto_devops_enabled'],
        auto_devops_deploy_strategy=info['auto_devops_deploy_strategy'],
        autoclose_referenced_issues=info['autoclose_referenced_issues'],
        approvals_before_merge=info['approvals_before_merge'],
        mirror=info['mirror'],
        external_authorization_classification_label=info['external_authorization_classification_label'],
        marked_for_deletion_at=info['marked_for_deletion_at'],
        marked_for_deletion_on=info['marked_for_deletion_on'],
        requirements_enabled=info['requirements_enabled'],
        security_and_compliance_enabled=info['security_and_compliance_enabled'],
        compliance_frameworks=info['compliance_frameworks'],
        issues_template=info['issues_template'],
        merge_requests_template=info['merge_requests_template'],
        permissions=info['permissions']
    )
    session.add(obj)
    session.commit()
    return obj
  • 3
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

m0_37759590

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值