check_file_exist.py
检查第一个文件夹中的文件是否也在第二个文件夹里
import os
folder1 = 'D:\Desktop\pusar\pulsar_wx\lib' # 第一个文件夹的路径
folder2 = 'D:\Desktop\pusar\pulsar_nj\lib' # 第二个文件夹的路径
files1 = os.listdir(folder1) # 获取第一个文件夹中的文件列表
files2 = os.listdir(folder2) # 获取第二个文件夹中的文件列表
for file1 in files1:
if file1 not in files2:
print(file1)
compare.py
比较两个文件的相似度
import string
import os
import Levenshtein
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from difflib import SequenceMatcher
import difflib
from fuzzywuzzy import fuzz
# 要比较两个文件的相似度,可以使用文本相似度算法,如余弦相似度或编辑距离。
#
# 余弦相似度:
#
# 首先,将两个文件的内容分别转换为词向量表示。
# 然后,计算两个词向量的余弦相似度。
# 余弦相似度的值范围在[-1, 1]之间,值越接近1表示两个文件越相似,值越接近-1表示两个文件越不相似。
# 编辑距离:
#
# 首先,将两个文件的内容作为字符串进行比较。
# 然后,使用编辑距离算法(如Levenshtein距离)计算两个字符串之间的编辑距离。
# 编辑距离表示将一个字符串转换为另一个字符串所需的最少编辑操作次数,如插入、删除和替换操作。
# 编辑距离的值越小表示两个文件越相似,值越大表示两个文件越不相似。
def compare_files(f1, f2):
with open(f1, errors="ignore") as file1, open(f2, errors="ignore") as file2:
f1_data = file1.read()
f2_data = file2.read()
# 移除标点符号和空白字符
translator = str.maketrans('', '', string.punctuation + string.whitespace)
content1 = f1_data.translate(translator)
content2 = f2_data.translate(translator)
# 将文件内容转换为词向量表示
vectorizer = CountVectorizer().fit_transform([content1, content2])
vectors = vectorizer.toarray()
# 计算余弦相似度
similarity = cosine_similarity([vectors[0]], [vectors[1]])[0][0]
return similarity
def file_similarity_checker(f1, f2):
with open(f1, errors="ignore") as file1, open(f2, errors="ignore") as file2:
f1_data = file1.read()
f2_data = file2.read()
checking = SequenceMatcher(None, f1_data, f2_data).ratio()
return checking
def generate_file_dict(folder_path):
file_dict = {}
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
if os.path.isfile(file_path):
with open(file_path, 'r') as file:
file_content = file.read()
file_dict[filename] = file_content
return file_dict
def visit_folder(folder_path):
# 遍历文件夹中的所有文件和子文件夹
for file_name in os.listdir(folder_path):
file_path = os.path.join(folder_path, file_name)
if os.path.isfile(file_path):
# 如果是文件,则打印文件路径
example_path=os.path.join(dict_path,file_name)
similarity3 = calculate_cosine_similarity(file_path, example_path)
if similarity3<0.4:
similarity1=file_similarity_checker(file_path,example_path)
similarity = compare_files(file_path,example_path)
similarity2 = levenshtein_compare(file_path, example_path)
# similarity1=file_similarity_checker(file_path,example_path)
# similarity = compare_files(file_path,example_path)
# if similarity<0.4 and similarity1<0.4:
# similarity2 = levenshtein_compare(file_path, example_path)
# print(f"The similarity between {file_path} and {example_path} is: {similarity} {similarity1} {similarity2}")
if file_name == 'process-env.properties':
continue
if similarity < 0.8 and similarity1 < 0.8 and similarity2<0.8:
print(
f"The similarity between {file_path} and {example_path} is: {similarity3} {similarity} {similarity1} {similarity2}")
try:
with open(file_path, 'r') as file:
content = file.read() # 读取文件内容
print(content) # 打印文件内容
print()
with open(example_path, 'r') as file:
content = file.read() # 读取文件内容
print(content) # 打印文件内容
except FileNotFoundError:
print("文件不存在")
except IOError:
print("无法打开文件")
print()
elif os.path.isdir(file_path):
# 如果是文件夹,则递归调用visit_folder函数
visit_folder(file_path)
def difflib_compare(f1,f2):
with open(f1, errors="ignore") as file1, open(f2, errors="ignore") as file2:
f1_data = file1.read()
f2_data = file2.read()
seq = difflib.SequenceMatcher(None, f1_data, f2_data)
similarity = seq.ratio()
return similarity
def fuzz_compare(f1,f2):
with open(f1, errors="ignore") as file1, open(f2, errors="ignore") as file2:
f1_data = file1.read()
f2_data = file2.read()
similarity = fuzz.ratio(f1_data, f2_data)
return similarity
def levenshtein_compare(file1, file2):
with open(file1, 'r') as f1, open(file2, 'r') as f2:
text1 = f1.read()
text2 = f2.read()
distance = Levenshtein.distance(text1, text2)
similarity = 1 - (distance / max(len(text1), len(text2)))
return similarity
def preprocess_text(text):
# Convert text to lowercase
#text = text.lower()
# Remove special characters and digits
#text = re.sub(r'[^a-zA-Z0-9]', ' ', text)
text = text.replace(" ", "")
return text
def calculate_cosine_similarity(f1,f2):
with open(f1, errors="ignore") as file1, open(f2, errors="ignore") as file2:
f1_data = file1.read()
f2_data = file2.read()
# Preprocess the texts
processed_text1 = preprocess_text(f1_data)
processed_text2 = preprocess_text(f2_data)
# Create a TfidfVectorizer object
vectorizer = TfidfVectorizer()
# Fit and transform the vectorizer on the processed texts
tfidf_matrix = vectorizer.fit_transform([processed_text1, processed_text2])
# Calculate the cosine similarity between the texts
cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
return cosine_sim
dict_path = 'D:\\Desktop\\server\\all\\RERAT\\APP9804_SZ'
def main():
folder_path = 'D:\\Desktop\\server\\all\\RERAT'
visit_folder(folder_path)
if __name__ == '__main__':
main()
#sim=file_similarity_checker('D:\\Desktop\\server\\all\\FXDED\\APP9726_SZ\\application.yml','D:\\Desktop\\server\\all\\FXDED\\APP9727_HA\\application.properties')
#sim=difflib_compare('D:\\Desktop\\server\\all\\FXDED\\APP9726_SZ\\application.yml','D:\\Desktop\\server\\all\\FXDED\\APP9727_HA\\application.properties')
#sim=fuzz_compare('D:\\Desktop\\server\\all\\FXDED\\APP9726_SZ\\application.yml','D:\\Desktop\\server\\all\\FXDED\\APP9727_HA\\application.properties')
#sim=levenshtein_compare('D:\\Desktop\\server\\all\\FXDED\\APP9726_SZ\\application.yml','D:\\Desktop\\server\\all\\FXDED\\APP9727_HA\\application.properties')
#sim=compare_files('D:\\Desktop\\server\\all\\FXDED\\APP9726_SZ\\application.yml','D:\\Desktop\\server\\all\\FXDED\\APP9727_HA\\application.properties')
#print(sim)
split_file.py
按空行将文件内容划分为多个文件
import os
folder_names = ["CZ", "HA", "LYG", "NJ", "NT", "SQ", "SZ", "TZ", "WX", "XZ", "YC", "YZ", "ZJ"]
def split_file_by_empty_lines(file_path, output_folder):
# 打开输入文件
with open(file_path, 'r') as input_file:
lines = input_file.readlines()
# 初始化变量
current_file_number = 0
current_file_lines = []
prefix=''
# 遍历每一行
for line in lines:
if line.strip() == '':
for filename in os.listdir(output_folder):
if folder_names[current_file_number] in filename:
prefix = filename
break
# 当遇到空行时,将当前文件的内容写入新文件
output_file_path = os.path.join(output_folder+"\\"+prefix, f'process-env.properties')
with open(output_file_path, 'w') as output_file:
output_file.writelines(current_file_lines)
# 重置变量
current_file_number += 1
current_file_lines = []
else:
# 将非空行添加到当前文件的内容中
current_file_lines.append(line)
# 写入最后一个文件
for filename in os.listdir(output_folder):
if folder_names[current_file_number] in filename:
prefix = filename
break
# 当遇到空行时,将当前文件的内容写入新文件
output_file_path = os.path.join(output_folder +"\\"+ prefix, f'process-env.properties')
with open(output_file_path, 'w') as output_file:
output_file.writelines(current_file_lines)
print('ok')
# 示例用法
split_file_by_empty_lines('D:\\Desktop\\env\\rerat_env.txt', 'D:\\Desktop\\server\\all\\server_RERAT')
get_coverity_streams.py
获取coverity projects和streams
import requests
class Coverity(object):
def __init__(self, host, port, username, password):
self.host = host
self.port = port
self.username = username
self.password = password
self.url = 'https://{}:{}'.format(self.host, self.port)
def get_coverity_projects(self):
proj_stream = dict()
url = '{}/api/v2/projects?includeChildren=true&includeStreams=true' \
''.format(self.url)
# payload = dict(projectId=project_id)
payload = dict()
headers = dict(Accept='application/json')
response = requests.get(url, auth=(self.username, self.password),
params=payload, headers=headers)
if not response.ok:
err_msg = 'get project failed, msg: {}, status_code: {}'.format(
response.text, response.status_code)
self.logger.error(err_msg)
return None
else:
# df = pd.read_csv(io.StringIO(response.text))
# print(str(response.text))
json_list = response.json()['projects']
# print(json_list)
for item in json_list:
# print(item['name'])
# print(item['description'])
proj_stream[item['name']] = list()
for stream in item['streams']:
proj_stream[item['name']].append(stream['name'])
# print(str(proj_stream))
return proj_stream
coverity = Coverity(host="coverity-earth.hobot.cc", port=8443,
username="", password="")
proj_stream = coverity.get_coverity_projects()
for proj, streams in proj_stream.items():
print(proj)
for stream in streams:
print(stream)
print()
singleton.py
单例模式
import random
def singleton(cls):
_instance = {}
def inner():
if cls not in _instance:
_instance[cls] = cls()
return _instance[cls]
return inner
@singleton
class Config(object):
def __init__(self):
self.id = random.randint(1, 1000)
print(self.id)
def __str__(self):
return str(self.id)