# -*- coding:utf-8 -*-
import os
import time
import requests
import re
import pandas as pd
import numpy as np
# retval = os.getcwd()
# os.chdir(retval+"/temp")
filename='1R4L'
# filename = '1RL'
url=f"http://files.rcsb.org/download/{filename}.pdb"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.47'}
timeout=5
def get_correctfile(path):
files = os.listdir(path)
files.sort()
list=[]
for file in files:
if not os.path.isdir(path + file): # 判断该文件是否是一个文件夹
f_name = str(file)
# print(f_name)
tr = '\\' # 多增加一个斜杠
filename = path + tr + f_name
if(filename[-3:]=="pdb"):
list.append(filename[len(path)+1:-4])
return (list)
def get_file(path): # 创建一个空列表
list=[]
if(path[-3:]=="txt"):
for line in open(path, "r", encoding='UTF-8'):
ret = re.findall('[0-9A-Za-z]{4}', line, re.M)
list.extend(ret)
# print(list)
if (path[-4:] == "xlsx"):
df = pd.read_excel(path, header=None)
# df.dropna(how = 'any')
b = np.array(df)
c = b.tolist()
d = sum(c, [])
d = [x for x in d if x == x]
list.extend(d)
if (path[-3:] == "csv"):
df = pd.read_csv(path, header=None)
# df.dropna(how = 'any')
b = np.array(df)
c = b.tolist()
d = sum(c, [])
d = [x for x in d if x == x]
list.extend(d)
# print(line)
return (list)
def save_file(fileurl,filename):
content = requests.get(fileurl,headers=headers)
if content.status_code != 404:
content=content.text
with open(filename+'.pdb', "wb") as f:
f.write(content.encode("utf-8"))
global total
total += 1
print(f"保存第{total}张图片")
else:
print('404')
if __name__ == '__main__':
global total
total = 0
list=[]
falselist=[]
correctlist=[]
path= input("输入文件选择的文件目录带文件类型:")
workpath = input("输入工作目录:")
workpath = workpath + "\\temp"
os.makedirs(workpath)
os.chdir(workpath)
list=get_file(path)
for filename in list:
url = f"http://files.rcsb.org/download/{filename}.pdb"
save_file(url,filename)
flag=5
while(flag):
flag=flag-1
falselist=[]
correctlist=get_correctfile(workpath)
# print(correctlist)
for name in list:
if(name not in correctlist):
falselist.extend([name])
# print(falselist)
# print(list)
print(f"共计{len(list)}个文件,已成功下载{len(correctlist)}个")
for filename in falselist:
url = f"http://files.rcsb.org/download/{filename}.pdb"
save_file(url, filename)
print(f"请求结束共计{len(list)}个文件,已成功下载{len(correctlist)}个,失败文件已输出")
file = open('AAAfalselist.txt', 'w')
for filename in falselist:
file.write(filename+'\n')
file.close()
08-10
2051
06-03