# -*- coding:utf-8 -*-
import os
import numpy as np
import re
import pandas as pd
def traverse(filepath):
files = os.listdir(filepath)
for fi in files:
fi_d = os.path.join(filepath, fi)
if os.path.isdir(fi_d): # 判断是否为文件夹
if not os.listdir(fi_d): # 如果文件夹为空
os.rmdir(fi_d) # 删除这个空文件夹
else:
traverse(fi_d)
else:
file = os.path.join(filepath, fi_d)
if os.path.getsize(file) == 0: # 文件大小为0
os.remove(file) # 删除这个文件
def get_file(path): # 创建一个空列表
files = os.listdir(path)
files.sort() # 排序
loglist = []
txtlist=[]
for file in files:
if not os.path.isdir(path + file): # 判断该文件是否是一个文件夹
f_name = str(file)
# print(f_name)
tr = '\\' # 多增加一个斜杠
filename = path + tr + f_name
if(filename[-3:]=="log"):
loglist.append(filename)
# print(filename)
if(filename[-3:]=="txt"):
txtlist.append(filename)
return (loglist,txtlist)
if __name__ == '__main__':
path = r'E:\zzztest\zzzzDEMO\file2'
outpath= r'E:\zzztest\zzzzDEMO\file2'
# path= input("Please intput file path:")
# outpath = input("Please intput the ouput file address:")
num=len(path)
traverse(path)
loglist,txtlist = get_file(path)
# print(list)
right=0
dictlog=[]
dicttxt=[]
final=[]
for i in range(len(loglist)):
# print(loglist[i])
for line in open(loglist[i],"r",encoding='UTF-8'):
# print(line)
if(line[0:11]==" gauss 1"):
right=1
# print(1)
if(right==6):
right=0
if(right):
# print(line)
right=right+1
result = re.findall(r'-?\d+\.?\d*e?-?\d*?', line[12:])
# print(result)
l=loglist[i][num+1:]
result[0]=l+"-"+result[0]
dictlog.append(result)
for i in range(len(txtlist)):
# print(txtlist[i])
for line in open(txtlist[i],"r",encoding='ISO-8859-1'):
# print(line)
# if(line[0:9]==" guass"):
# right=1
# print(1)
# if(line[0:10])==("Writing ou"):
# right=0
# if(right==1):
# # print(line)
result = re.findall(r'-?\d+\.?\d*e?-?\d*?', line)
l=txtlist[i][num+1:]
result[0]=l+"-"+result[0]
dicttxt.append(result)
# print(dictlog)
# print(dictlog[0],dictlog[1])
for i in range(len(dicttxt)):
final.append([dictlog[i*5+0][0][0:8],dictlog[i*5+0][0][20:],dictlog[i*5+1][0][20:],dictlog[i*5+2][0][20:],
dictlog[i*5+3][0][20:],dictlog[i*5+4][0][20:],dicttxt[i*1+0][0][18:]])
print(final)
# df=pd.read_excel(r'E:\zzztest\zzzzDEMO\file2\machinelearning.xlsx')
# print(df)
df = pd.DataFrame(final, columns= pd.MultiIndex.from_tuples(
[('cmpname','cmpname'),('input','input1'),('input','input2'),('input','input3'),('input','input4'),
('input','input5'),('output','output')]
))
print(df)
df.to_excel(outpath+ '\\result.xlsx')
# df["affinity(kcal/mol)"] = pd.to_numeric(df["affinity(kcal/mol)"],errors='coerce')
# df["RMSD l.b."] = pd.to_numeric(df["RMSD l.b."],errors='coerce')
# df["RMSD u.b.()"] = pd.to_numeric(df["RMSD u.b.()"],errors='coerce')
# c=df.sort_values(by=['affinity(kcal/mol)'], ascending=True)
# # print (c)
# # writer = pd.ExcelWriter('result.xlsx')
# # os.chdir(outpath)
# # print(os.getcwd())
# # c.to_excel(writer,float_format='%.5f')
# c.to_csv(outpath+ '\\result.csv')
# # writer.save()
# print("输出成功")