python同步gitlab數據至MySQL代碼
gitlab接口:https://docs.gitlab.com/ee/api/projects.html
1.對接接口
由於某些project的數據量過大,直接採集,會報錯:
File “C:\Users\SARAH.X\Documents\env\Lib\site-packages\urllib3\util\retry.py”,
line 515, in increment raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type] urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host=‘gitlab.sjfood.us’, port=443): Max retries exceeded with url:/api/v4/projects/638/vulnerability_findings?&per_page=100&page=4
(Caused by ReadTimeoutError(“HTTPSConnectionPool(host=‘gitlab.sjfood.us’,port=443): Read timed out. (read timeout=15)”)) During handling of the above exception, another exception occurred: Traceback (most recent call last): File"C:\Users\SARAH.X\PycharmProjects\pythonProject\gitlab_scan\server.py", line 53, in
response = util.project_vulnerability_findings(projectId, page) File"C:\Users\SARAH.X\PycharmProjects\pythonProject\gitlab_scan\util.py",
line 33, in project_vulnerability_findings response = s.request(“GET”, url=url, timeout=15, headers=get_header(), verify=False)
util.py
import time
import requests
from requests.adapters import HTTPAdapter
import urllib3
def get_header():
access_token = "qcwssZuXdB_kHoaBpyNa"
headers = {
"Private-Token": access_token,
'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
'Accept': '*/*',
'Host': 'gitlab.sjfood.us',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive'
}
return headers
# 獲取所有project的id
def all_project_id(page):
url = f"https://gitlab.sjfood.us/api/v4/projects?&per_page=100&page={page}"
response = requests.get(url, headers=get_header(), verify=False)
return response
# 獲取特定project的漏洞调查结果
def project_vulnerability_findings(projectId, page):
# proxies = {'https': '127.0.0.1:10880'}
requests.adapters.DEFAULT_RETRIES = 5
url = f"http://gitlab.sjfood.us/api/v4/projects/{projectId}/vulnerability_findings?&per_page=100&page={page}"
s = requests.session()
# max_retries=3 重试3次
s.mount('http://', HTTPAdapter(max_retries=3))
s.mount('https://', HTTPAdapter(max_retries=3))
#response = s.request("GET", url=url, timeout=30, headers=get_header(), verify=False)
while True:
try:
response = s.request("GET", url=url, timeout=30, headers=get_header(), verify=False)
if response.status_code == 200:
return response
else:
return 'error'
except:
time.sleep(5)
pass
2.接口調用
server.py
import time
import util
import mysql_to_write
import urllib3
if __name__ == '__main__':
urllib3.disable_warnings()
# 獲取所有項目ID
page = 1
print("page:",page)
response = util.all_project_id(page)
data = response.json()
project_ids = []
for project in data:
# print(project)
# mysql_to_write.add_project_record(project)
project_ids.append(project['id'])
header = response.headers
while header['X-Next-Page'] != '':
page = header['X-Next-Page']
# print(page)
response = util.all_project_id(page)
data = response.json()
header = response.headers
for project in data:
# mysql_to_write.add_project_record(project)
project_ids.append(project['id'])
# print(len(project_ids))
print("project长度:", len(project_ids))
print("project数据:", project_ids)
# print(project_ids)
to_remove = [638, 546, 305, 253, 175, 152, 83]
# 逐个移除元素
for num in to_remove:
if num in project_ids:
project_ids.remove(num)
print("去除後的project长度", len(project_ids))
correct_project_ids = []
# project_ids = ['1134']
for projectId in project_ids:
page = 1
project_vulnerability_ids = []
response = util.project_vulnerability_findings(projectId, page)
# print("response:", response)
if response != 'error' and len(response.json()) > 0:
print(projectId)
correct_project_ids.append(projectId)
data = response.json()
for project in data:
# print(project)
# print(project['uuid'])
# mysql_to_write.add_project_vulnerability_findings(project)
project_vulnerability_ids.append(project['uuid'])
header = response.headers
# print(header)
while header['X-Next-Page'] != '':
page = header['X-Next-Page']
# print('page:',page)
response = util.project_vulnerability_findings(projectId, page)
data = response.json()
header = response.headers
for project in data:
# mysql_to_write.add_project_vulnerability_findings(project)
project_vulnerability_ids.append(project['uuid'])
print("本次uuid长度:", len(project_vulnerability_ids))
print("projectId:", projectId, ",数组长度:", len(response.json()), "uuid长度:",len(project_vulnerability_ids))
# print("correct_project_ids", correct_project_ids)
# time.sleep(3)
3、拆分project
project in [638, 546, 305, 253, 175, 152, 83 ]
import time
import util
import mysql_to_write
import urllib3
if __name__ == '__main__':
urllib3.disable_warnings()
correct_project_ids = []
project_ids = ['83']
for projectId in project_ids:
page = 1
project_vulnerability_ids = []
response = util.project_vulnerability_findings(projectId, page)
# print("response:", response)
if response != 'error' and len(response.json()) > 0:
print(projectId)
correct_project_ids.append(projectId)
data = response.json()
for project in data:
# print(project)
# print(project['uuid'])
mysql_to_write.add_project_vulnerability_findings(project)
project_vulnerability_ids.append(project['uuid'])
header = response.headers
# print(header)
while header['X-Next-Page'] != '':
page = header['X-Next-Page']
# print('page:',page)
response = util.project_vulnerability_findings(projectId, page)
data = response.json()
header = response.headers
for project in data:
mysql_to_write.add_project_vulnerability_findings(project)
project_vulnerability_ids.append(project['uuid'])
print("本次uuid长度:", len(project_vulnerability_ids))
print("projectId:", projectId, ",数组长度:", len(response.json()), "uuid长度:",len(project_vulnerability_ids))
# print("correct_project_ids", correct_project_ids)
# time.sleep(3)
4.MYSQL建表
model.py
from sqlalchemy import Column, String, JSON, Integer, DateTime, Boolean,Text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
users = 'root'
password = 'mivbAs7Awc'
host = 'xxxx'
port = '3306'
database = 'gitlab'
charset = 'utf8mb4'
sqlalchemy_database_url = f'mysql+pymysql://{users}:{password}@{host}:{port}/{database}?charset={charset}'
engine = create_engine(sqlalchemy_database_url)
# 建表語句
class ProjectVulnerabilityFindings(Base):
__tablename__ = "new_project_vulnerability_findings"
id = Column(String(50))
report_type = Column(String(100))
name = Column(String(500))
severity = Column(String(50))
confidence = Column(String(50))
scanner = Column(JSON)
identifiers = Column(JSON)
project_fingerprint = Column(String(200))
uuid = Column(String(50), index=True, primary_key=True)
create_jira_issue_url = Column(String(500))
create_vulnerability_feedback_issue_path = Column(String(500))
create_vulnerability_feedback_merge_request_path = Column(String(500))
create_vulnerability_feedback_dismissal_path = Column(String(500))
project = Column(JSON)
dismissal_feedback = Column(String(500))
issue_feedback = Column(JSON)
merge_request_feedback = Column(String(500))
description = Column(Text)
links = Column(JSON)
location = Column(JSON)
remediations = Column(String(500))
solution = Column(String(500))
evidence = Column(String(500))
request = Column(String(500))
response = Column(String(500))
evidence_source = Column(String(500))
supporting_messages = Column(JSON)
assets = Column(JSON)
details = Column(JSON)
state = Column(String(500))
scan = Column(JSON)
blob_path = Column(String(500))
class ProjectInfo(Base):
__tablename__ = "projects"
id = Column(Integer, index=True, primary_key=True)
description = Column(Text)
name = Column(String(300))
name_with_namespace = Column(String(300))
path = Column(String(300))
path_with_namespace = Column(String(300))
created_at = Column(String(300))
default_branch = Column(String(300))
tag_list = Column(JSON)
ssh_url_to_repo = Column(String(100))
http_url_to_repo = Column(String(100))
web_url = Column(String(100))
readme_url = Column(String(500))
avatar_url = Column(String(300))
forks_count = Column(Integer)
star_count = Column(Integer)
last_activity_at = Column(String(300))
namespace = Column(JSON)
container_registry_image_prefix = Column(String(300))
_links = Column(JSON)
packages_enabled = Column(Boolean)
empty_repo = Column(Boolean)
archived = Column(Boolean)
visibility = Column(String(300))
owner = Column(JSON)
resolve_outdated_diff_discussions = Column(Boolean)
container_registry_enabled = Column(Boolean)
container_expiration_policy = Column(JSON)
issues_enabled = Column(Boolean)
merge_requests_enabled = Column(Boolean)
wiki_enabled = Column(Boolean)
jobs_enabled = Column(Boolean)
snippets_enabled = Column(Boolean)
service_desk_enabled = Column(Boolean)
service_desk_address = Column(String(300))
can_create_merge_request_in = Column(Boolean)
issues_access_level = Column(String(300))
repository_access_level = Column(String(300))
merge_requests_access_level = Column(String(300))
forking_access_level = Column(String(300))
wiki_access_level = Column(String(300))
builds_access_level = Column(String(300))
snippets_access_level = Column(String(300))
pages_access_level = Column(String(300))
operations_access_level = Column(String(300))
analytics_access_level = Column(String(300))
emails_disabled = Column(String(300))
shared_runners_enabled = Column(Boolean)
lfs_enabled = Column(Boolean)
creator_id = Column(Integer)
import_status = Column(String(300))
open_issues_count = Column(Integer)
ci_default_git_depth = Column(Integer)
ci_forward_deployment_enabled = Column(Boolean)
public_jobs = Column(Boolean)
build_timeout = Column(Integer)
auto_cancel_pending_pipelines = Column(String(300))
build_coverage_regex = Column(String(300))
ci_config_path = Column(String(300))
shared_with_groups = Column(JSON)
only_allow_merge_if_pipeline_succeeds = Column(Boolean)
allow_merge_on_skipped_pipeline = Column(String(300))
restrict_user_defined_variables = Column(Boolean)
request_access_enabled = Column(Boolean)
only_allow_merge_if_all_discussions_are_resolved = Column(Boolean)
remove_source_branch_after_merge = Column(Boolean)
printing_merge_request_link_enabled = Column(Boolean)
merge_method = Column(String(300))
suggestion_commit_message = Column(String(300))
auto_devops_enabled = Column(Boolean)
auto_devops_deploy_strategy = Column(String(300))
autoclose_referenced_issues = Column(Boolean)
approvals_before_merge = Column(Integer)
mirror = Column(Boolean)
external_authorization_classification_label = Column(String(300))
marked_for_deletion_at = Column(String(300))
marked_for_deletion_on = Column(String(300))
requirements_enabled = Column(Boolean)
security_and_compliance_enabled = Column(Boolean)
compliance_frameworks = Column(JSON)
issues_template = Column(Text)
merge_requests_template = Column(String(500))
permissions = Column(JSON)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
5.寫入MySQL
mysql_to_write.py
import model
session = model.session
def add_project_vulnerability_findings(info):
session.query(model.ProjectVulnerabilityFindings).filter(model.ProjectVulnerabilityFindings.uuid == info['uuid']).delete()
session.commit()
obj = model.ProjectVulnerabilityFindings(
id=info['id'],
report_type=info['report_type'],
name=info['name'],
severity=info['severity'],
confidence=info['confidence'],
scanner=info['scanner'],
identifiers=info['identifiers'],
project_fingerprint=info['project_fingerprint'],
uuid=info['uuid'],
create_jira_issue_url=info['create_jira_issue_url'],
create_vulnerability_feedback_issue_path=info['create_vulnerability_feedback_issue_path'],
create_vulnerability_feedback_merge_request_path=info['create_vulnerability_feedback_merge_request_path'],
create_vulnerability_feedback_dismissal_path=info['create_vulnerability_feedback_dismissal_path'],
project=info['project'],
dismissal_feedback=info['dismissal_feedback'],
issue_feedback=info['issue_feedback'],
merge_request_feedback=info['merge_request_feedback'],
description=info['description'],
links=info['links'],
location=info['location'],
remediations=info['remediations'],
solution=info['solution'],
evidence=info['evidence'],
request=info['request'],
response=info['response'],
evidence_source=info['evidence_source'],
supporting_messages=info['supporting_messages'],
assets=info['assets'],
details=info['details'],
state=info['state'],
scan=info['scan'],
blob_path=info['blob_path']
)
session.add(obj)
session.commit()
return obj
def add_project_record(info):
session.query(model.ProjectInfo).filter(model.ProjectInfo.id == info['id']).delete()
session.commit()
if 'owner' not in info:
info['owner'] = None
if 'default_branch' not in info:
info['default_branch'] = None
if 'ci_config_path' not in info:
info['ci_config_path'] = None
if 'container_expiration_policy' not in info:
info['container_expiration_policy'] = None
obj = model.ProjectInfo(
id=info['id'],
description=info['description'],
name=info['name'],
name_with_namespace=info['name_with_namespace'],
path=info['path'],
path_with_namespace=info['path_with_namespace'],
created_at=info['created_at'],
default_branch=info['default_branch'],
tag_list=info['tag_list'],
ssh_url_to_repo=info['ssh_url_to_repo'],
http_url_to_repo=info['http_url_to_repo'],
web_url=info['web_url'],
readme_url=info['readme_url'],
avatar_url=info['avatar_url'],
forks_count=info['forks_count'],
star_count=info['star_count'],
last_activity_at=info['last_activity_at'],
namespace=info['namespace'],
container_registry_image_prefix=info['container_registry_image_prefix'],
_links=info['_links'],
packages_enabled=info['packages_enabled'],
empty_repo=info['empty_repo'],
archived=info['archived'],
visibility=info['visibility'],
owner=info['owner'],
resolve_outdated_diff_discussions=info['resolve_outdated_diff_discussions'],
container_registry_enabled=info['container_registry_enabled'],
container_expiration_policy=info['container_expiration_policy'],
issues_enabled=info['issues_enabled'],
merge_requests_enabled=info['merge_requests_enabled'],
wiki_enabled=info['wiki_enabled'],
jobs_enabled=info['jobs_enabled'],
snippets_enabled=info['snippets_enabled'],
service_desk_enabled=info['service_desk_enabled'],
service_desk_address=info['service_desk_address'],
can_create_merge_request_in=info['can_create_merge_request_in'],
issues_access_level=info['issues_access_level'],
repository_access_level=info['repository_access_level'],
merge_requests_access_level=info['merge_requests_access_level'],
forking_access_level=info['forking_access_level'],
wiki_access_level=info['wiki_access_level'],
builds_access_level=info['builds_access_level'],
snippets_access_level=info['snippets_access_level'],
pages_access_level=info['pages_access_level'],
operations_access_level=info['operations_access_level'],
analytics_access_level=info['analytics_access_level'],
emails_disabled=info['emails_disabled'],
shared_runners_enabled=info['shared_runners_enabled'],
lfs_enabled=info['lfs_enabled'],
creator_id=info['creator_id'],
import_status=info['import_status'],
open_issues_count=info['open_issues_count'],
ci_default_git_depth=info['ci_default_git_depth'],
ci_forward_deployment_enabled=info['ci_forward_deployment_enabled'],
public_jobs=info['public_jobs'],
build_timeout=info['build_timeout'],
auto_cancel_pending_pipelines=info['auto_cancel_pending_pipelines'],
build_coverage_regex=info['build_coverage_regex'],
ci_config_path=info['ci_config_path'],
shared_with_groups=info['shared_with_groups'],
only_allow_merge_if_pipeline_succeeds=info['only_allow_merge_if_pipeline_succeeds'],
allow_merge_on_skipped_pipeline=info['allow_merge_on_skipped_pipeline'],
restrict_user_defined_variables=info['restrict_user_defined_variables'],
request_access_enabled=info['request_access_enabled'],
only_allow_merge_if_all_discussions_are_resolved=info['only_allow_merge_if_all_discussions_are_resolved'],
remove_source_branch_after_merge=info['remove_source_branch_after_merge'],
printing_merge_request_link_enabled=info['printing_merge_request_link_enabled'],
merge_method=info['merge_method'],
suggestion_commit_message=info['suggestion_commit_message'],
auto_devops_enabled=info['auto_devops_enabled'],
auto_devops_deploy_strategy=info['auto_devops_deploy_strategy'],
autoclose_referenced_issues=info['autoclose_referenced_issues'],
approvals_before_merge=info['approvals_before_merge'],
mirror=info['mirror'],
external_authorization_classification_label=info['external_authorization_classification_label'],
marked_for_deletion_at=info['marked_for_deletion_at'],
marked_for_deletion_on=info['marked_for_deletion_on'],
requirements_enabled=info['requirements_enabled'],
security_and_compliance_enabled=info['security_and_compliance_enabled'],
compliance_frameworks=info['compliance_frameworks'],
issues_template=info['issues_template'],
merge_requests_template=info['merge_requests_template'],
permissions=info['permissions']
)
session.add(obj)
session.commit()
return obj