import csv
def count_low_complexity_residues(sequence):
total_residues = len(sequence)
low_complexity_residues = int(total_residues * 0.04)
return low_complexity_residues
def find_low_complexity_regions(sequence, max_residues):
regions = []
current_region = {'start': 0, 'end': 0, 'count': 0}
for i in range(len(sequence)):
if sequence[i].islower():
current_region['end'] = i
current_region['count'] += 1
else:
if current_region['count'] > 0:
regions.append(current_region)
current_region = {'start': i, 'end': i, 'count': 0}
if current_region['count'] > 0:
regions.append(current_region)
valid_regions = [region for region in regions if region['count'] <= max_residues]
return valid_regions
def process_file(file_path):
with open(file_path, 'r') as file:
lines = file.readlines()
proteins = []
current_protein = None
for line in lines:
line = line.strip()
if line.startswith('>'):
if current_protein:
proteins.append(current_protein)
current_protein = {'name': line[1:], 'sequence': ''}
print('current_protein:', current_protein)
else:
current_protein['sequence'] += line
if current_protein:
proteins.append(current_protein)
valid_proteins = []
for protein in proteins:
low_complexity_residues = count_low_complexity_residues(protein['sequence'])
if low_complexity_residues <= 3:
valid_regions = find_low_complexity_regions(protein['sequence'], low_complexity_residues)
valid_proteins.append({'name': protein['name'], 'regions': valid_regions, 'count': low_complexity_residues})
return valid_proteins
def write_to_csv(result, output_file):
with open(output_file, 'w', newline='') as csvfile:
fieldnames = ['Protein Name', 'Region Start', 'Region End', 'Region Count']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for protein in result:
for region in protein['regions']:
writer.writerow({
'Protein Name': protein['name'],
'Region Start': region['start'] + 1,
'Region End': region['end'] + 1,
'Region Count': region['count']
})
if __name__ == "__main__":
file_path = './5.0-lcr.interval'
output_file = "output02.csv"
result = process_file(file_path)
write_to_csv(result, output_file)
print(f"结果已成功写入到 {output_file}")